From ae92ad0d1eee7082c7e7af12a035a8059e332a26 Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Sun, 25 Aug 2013 16:32:09 -0300 Subject: Adding httracker --- httracker | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100755 httracker (limited to 'httracker') diff --git a/httracker b/httracker new file mode 100755 index 0000000..95701b1 --- /dev/null +++ b/httracker @@ -0,0 +1,67 @@ +#!/bin/bash +# +# Httrack feed downloader +# + +# Configuration +MIRRORS="/var/cache/arquivo/conteudo/links.sarava.org/assets" +FEED="https://links.sarava.org/rss?sort=date_desc&count=50" +TMP="/tmp/httracker" +URLS="$TMP/urls.txt" +LEVEL="1" +EXT_LEVEL="1" +FILESIZE="" +USER="arquivo" +GROUP="arquivo" + +function httracker_get { + local url="$1" + local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`" + local target="$MIRRORS/$hash" + + mkdir -p $target + + # We already got this one + if [ -f "$target/ok" ]; then + return + fi + + # Get each URL + httrack --verbose \ + --user $USER \ + --depth=1 \ + --purge-old=0 \ + --index \ + --cookies=1 \ + --path ${target} \ + -r$LEVEL \ + -e%$EXT_LEVEL \ + #-m$FILESIZE \ + "$url" + + if [ "$1" == "0" ]; then + # Mark as downloaded + touch $target/ok + chown -R $USER.$GROUP $target/ + else + echo "Error fetching $url." + rm -rf $target + fi +} + +# Create folders +mkdir $MIRRORS $TMP + +# Get URL +# Thanks http://stackoverflow.com/questions/443991/how-to-parse-rss-feeds-xml-in-a-shell-script +curl $FEED | grep -o '[^<]*' | grep -o "[^>]*$" > $URLS + +if [ "$?" != "0" ]; then + echo "Error downloading feed $FEED, aborting." + exit 1 +fi + +# Iterate over all URLs +for link in `cat $URLS | xargs`; do + httracker_get $link +done -- cgit v1.2.3