diff options
| author | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 21:52:43 -0300 | 
|---|---|---|
| committer | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 21:52:43 -0300 | 
| commit | 2f3b2d3684068987691242da4dbda2f09828a56c (patch) | |
| tree | daed8f0d6d16a99e2d8a500606cc5b53484601c7 | |
| parent | 5ce98951033bc358510026e0cec15463ba59c16e (diff) | |
| download | httruta-2f3b2d3684068987691242da4dbda2f09828a56c.tar.gz httruta-2f3b2d3684068987691242da4dbda2f09828a56c.tar.bz2 | |
Usage and cleaner httrack options
| -rw-r--r-- | README.mdwn | 8 | ||||
| -rw-r--r-- | config | 5 | ||||
| -rw-r--r-- | lib/httracker/functions | 45 | 
3 files changed, 32 insertions, 26 deletions
| diff --git a/README.mdwn b/README.mdwn index e9c9d72..3309f60 100644 --- a/README.mdwn +++ b/README.mdwn @@ -4,8 +4,16 @@ Feed Crawler  Download all links from a feed using httrack. This is the engine behind the  "Cache" feature used by https://links.sarava.org Semantic Scuttle instance. +Usage +----- + +Place this script somewhere and setup a cronjob like this: + +`*/5 * * * * /var/sites/arquivo/httracker/httracker &> /dev/null` +  TODO  ----  - Include all sites already donwloaded by scuttler.  - Support for other fetchers like youtube-dl. +- Lockfile support. @@ -4,9 +4,8 @@ FEED="https://links.sarava.org/rss?sort=date_desc&count=100"  TMP="/var/sites/arquivo/tmp/httracker"  URLS="$TMP/urls-httracker.txt"  URLS_SCUTTLER="$TMP/urls-scuttler.txt" -LEVEL="1" -EXT_LEVEL="1"  FILESIZE=""  USER="arquivo"  GROUP="arquivo" -DEPTH="1" +DEPTH="2" +EXT_DEPTH="1" diff --git a/lib/httracker/functions b/lib/httracker/functions index a5144c9..33152b1 100644 --- a/lib/httracker/functions +++ b/lib/httracker/functions @@ -26,19 +26,17 @@ function httracker_get {    fi    # Get each URL -  httrack               \ -    --mirror            \ -    --continue          \ -    --depth=${DEPTH}    \ -    --near              \ -    --purge-old=0       \ -    --index             \ -    --cookies=1         \ -    --path ${target}    \ -    -r${LEVEL} ${OPTS} ${url} -    #-e%${EXT_LEVEL}    \ -    #-m$FILESIZE        \ -    #--verbose +  httrack                    \ +    --mirror                 \ +    --continue               \ +    --depth=${DEPTH}         \ +    --ext-depth ${EXT_DEPTH} \ +    --near                   \ +    --purge-old=0            \ +    --index                  \ +    --cookies=1              \ +    --path ${target}         \ +    ${OPTS} ${url}    if [ "$?" == "0" ]; then      # Mark as downloaded @@ -64,16 +62,17 @@ function httracker_get_incremental {    fi    # Grabs URLs from the network -  httrack --verbose           \ -          --mirror            \ -          --continue          \ -          --user links        \ -          --depth=${DEPTH}    \ -          --near              \ -          --purge-old=0       \ -          --index             \ -          --cookies=1         \ -          --list ${URLS}      \ +  httrack                          \ +          --mirror                 \ +          --continue               \ +          --depth=${DEPTH}         \ +          --ext-depth ${EXT_DEPTH} \ +          --near                   \ +          --purge-old=0            \ +          --index                  \ +          --cookies=1              \ +          --user links             \ +          --list ${URLS}           \            --path ${target} ${OPTS}  } | 
