#!/bin/bash # # Httrack feed downloader. # # Configuration MIRRORS="/var/cache/sites/arquivo/conteudo/links.sarava.org/assets" FEED="https://links.sarava.org/rss?sort=date_desc&count=100" TMP="/tmp/httracker" URLS="$TMP/urls.txt" LEVEL="1" EXT_LEVEL="1" FILESIZE="" USER="arquivo" GROUP="arquivo" function httracker_get { local url="$1" local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`" local target="$MIRRORS/$hash" mkdir -p $target chown -R $USER.$GROUP $target/ # We already got this one if [ -f "$target/ok" ]; then return fi # Get each URL httrack \ --user $USER \ --depth=1 \ --purge-old=0 \ --index \ --cookies=1 \ --path ${target} \ -r${LEVEL} ${url} #-e%${EXT_LEVEL} \ #-m$FILESIZE \ #--verbose if [ "$?" == "0" ]; then # Mark as downloaded touch $target/ok else echo "Error fetching $url." rm -rf $target fi } # Create folders mkdir -p $MIRRORS $TMP # Get URL # Thanks http://stackoverflow.com/questions/443991/how-to-parse-rss-feeds-xml-in-a-shell-script curl -s "$FEED" | grep -o '[^<]*' | grep -o "[^>]*$" > $URLS if [ "$?" != "0" ]; then echo "Error downloading feed $FEED, aborting." exit 1 fi # Iterate over all URLs for link in `cat $URLS | xargs`; do httracker_get "$link" done # Cleanup rm -rf $TMP