diff options
author | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 21:52:43 -0300 |
---|---|---|
committer | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 21:52:43 -0300 |
commit | 2f3b2d3684068987691242da4dbda2f09828a56c (patch) | |
tree | daed8f0d6d16a99e2d8a500606cc5b53484601c7 | |
parent | 5ce98951033bc358510026e0cec15463ba59c16e (diff) | |
download | httruta-2f3b2d3684068987691242da4dbda2f09828a56c.tar.gz httruta-2f3b2d3684068987691242da4dbda2f09828a56c.tar.bz2 |
Usage and cleaner httrack options
-rw-r--r-- | README.mdwn | 8 | ||||
-rw-r--r-- | config | 5 | ||||
-rw-r--r-- | lib/httracker/functions | 45 |
3 files changed, 32 insertions, 26 deletions
diff --git a/README.mdwn b/README.mdwn index e9c9d72..3309f60 100644 --- a/README.mdwn +++ b/README.mdwn @@ -4,8 +4,16 @@ Feed Crawler Download all links from a feed using httrack. This is the engine behind the "Cache" feature used by https://links.sarava.org Semantic Scuttle instance. +Usage +----- + +Place this script somewhere and setup a cronjob like this: + +`*/5 * * * * /var/sites/arquivo/httracker/httracker &> /dev/null` + TODO ---- - Include all sites already donwloaded by scuttler. - Support for other fetchers like youtube-dl. +- Lockfile support. @@ -4,9 +4,8 @@ FEED="https://links.sarava.org/rss?sort=date_desc&count=100" TMP="/var/sites/arquivo/tmp/httracker" URLS="$TMP/urls-httracker.txt" URLS_SCUTTLER="$TMP/urls-scuttler.txt" -LEVEL="1" -EXT_LEVEL="1" FILESIZE="" USER="arquivo" GROUP="arquivo" -DEPTH="1" +DEPTH="2" +EXT_DEPTH="1" diff --git a/lib/httracker/functions b/lib/httracker/functions index a5144c9..33152b1 100644 --- a/lib/httracker/functions +++ b/lib/httracker/functions @@ -26,19 +26,17 @@ function httracker_get { fi # Get each URL - httrack \ - --mirror \ - --continue \ - --depth=${DEPTH} \ - --near \ - --purge-old=0 \ - --index \ - --cookies=1 \ - --path ${target} \ - -r${LEVEL} ${OPTS} ${url} - #-e%${EXT_LEVEL} \ - #-m$FILESIZE \ - #--verbose + httrack \ + --mirror \ + --continue \ + --depth=${DEPTH} \ + --ext-depth ${EXT_DEPTH} \ + --near \ + --purge-old=0 \ + --index \ + --cookies=1 \ + --path ${target} \ + ${OPTS} ${url} if [ "$?" == "0" ]; then # Mark as downloaded @@ -64,16 +62,17 @@ function httracker_get_incremental { fi # Grabs URLs from the network - httrack --verbose \ - --mirror \ - --continue \ - --user links \ - --depth=${DEPTH} \ - --near \ - --purge-old=0 \ - --index \ - --cookies=1 \ - --list ${URLS} \ + httrack \ + --mirror \ + --continue \ + --depth=${DEPTH} \ + --ext-depth ${EXT_DEPTH} \ + --near \ + --purge-old=0 \ + --index \ + --cookies=1 \ + --user links \ + --list ${URLS} \ --path ${target} ${OPTS} } |