#! /bin/sh

#================================================================
# estwolels
# List the path of cache files of wwwoffle
#================================================================


# set variables
LANG=C ; export LANG
LC_ALL=C ; export LC_ALL
progname="estwolels"
spooldir=/var/spool/wwwoffle
denysufs="css|js|dtd|rdf|rss|swf|class|md5"
denysufs="$denysufs|png|gif|jpg|jpeg|jpe|bmp|tif|tiff"
denysufs="$denysufs|pnm|pbm|pgm|ppm|xbm|xpm|ps|eps|au|svg|dvi|ico"
denysufs="$denysufs|mid|midi|kar|mp3|mp2|au|snd|wav|aif|aiff"
denysufs="$denysufs|mpg|mpeg|mpe|qt|mov|avi"
denysufs="$denysufs|pdf|rtf|rtx|doc|xls|ppt|xdw|csv|tsv"
denysufs="$denysufs|gz|zip|bz2|lzh|lha|tar|bin|cpio|shar|jar|war"
sizemax="1048576"

# show help message
if [ "$1" = "--help" ]
then
  printf 'List the path and the URL of cache files of wwwoffle.\n'
  printf '\n'
  printf 'Usage:\n'
  printf '  %s [spooldir]\n' "$progname"
  printf '\n'
  exit 0
fi


# check the spool directory
if [ -n "$1" ]
then
  spooldir="$1"
fi
if [ -d "$spooldir/http" ] && [ -r "$spooldir/http" ] && [ -x "$spooldir/http" ]
then
  true
else
  printf '%s: cannot scan %s\n' "$progname" "$spooldir/http" 1>&2
  exit 1
fi


# list the path and the URL of cache files
ls "$spooldir/http" |
while read domain
do
  wwwoffle-ls "http://$domain"
done |
fgrep " http://" |
egrep -i -v "\.($denysufs)$" |
awk -F ' ' -v sdir="$spooldir/http" -v sizemax="$sizemax" '
{
  if($2 < 1 || $2 > sizemax) next
  file = $0
  sub(/ .*/, "", file)
  domain = $0
  sub(/.* http:\/\//, "", domain)
  sub(/\/.*/, "", domain)
  url = $0
  sub(/.* http:\/\//, "", url)
  printf("%s/%s/%s\thttp://%s\n", sdir, domain, file, url)
}
'


# exit normally
exit 0



# END OF FILE
