aboutsummaryrefslogtreecommitdiff
path: root/httracker
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:32:09 -0300
committerSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:32:09 -0300
commitae92ad0d1eee7082c7e7af12a035a8059e332a26 (patch)
tree744facfc1b7433dc232e988cbfa7853ac405e5ed /httracker
parent7a029cd60ca04b64d76d5b0d87a608c0fdd93106 (diff)
downloadhttruta-ae92ad0d1eee7082c7e7af12a035a8059e332a26.tar.gz
httruta-ae92ad0d1eee7082c7e7af12a035a8059e332a26.tar.bz2
Adding httracker
Diffstat (limited to 'httracker')
-rwxr-xr-xhttracker67
1 files changed, 67 insertions, 0 deletions
diff --git a/httracker b/httracker
new file mode 100755
index 0000000..95701b1
--- /dev/null
+++ b/httracker
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# Httrack feed downloader
+#
+
+# Configuration
+MIRRORS="/var/cache/arquivo/conteudo/links.sarava.org/assets"
+FEED="https://links.sarava.org/rss?sort=date_desc&count=50"
+TMP="/tmp/httracker"
+URLS="$TMP/urls.txt"
+LEVEL="1"
+EXT_LEVEL="1"
+FILESIZE=""
+USER="arquivo"
+GROUP="arquivo"
+
+function httracker_get {
+ local url="$1"
+ local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`"
+ local target="$MIRRORS/$hash"
+
+ mkdir -p $target
+
+ # We already got this one
+ if [ -f "$target/ok" ]; then
+ return
+ fi
+
+ # Get each URL
+ httrack --verbose \
+ --user $USER \
+ --depth=1 \
+ --purge-old=0 \
+ --index \
+ --cookies=1 \
+ --path ${target} \
+ -r$LEVEL \
+ -e%$EXT_LEVEL \
+ #-m$FILESIZE \
+ "$url"
+
+ if [ "$1" == "0" ]; then
+ # Mark as downloaded
+ touch $target/ok
+ chown -R $USER.$GROUP $target/
+ else
+ echo "Error fetching $url."
+ rm -rf $target
+ fi
+}
+
+# Create folders
+mkdir $MIRRORS $TMP
+
+# Get URL
+# Thanks http://stackoverflow.com/questions/443991/how-to-parse-rss-feeds-xml-in-a-shell-script
+curl $FEED | grep -o '<link>[^<]*' | grep -o "[^>]*$" > $URLS
+
+if [ "$?" != "0" ]; then
+ echo "Error downloading feed $FEED, aborting."
+ exit 1
+fi
+
+# Iterate over all URLs
+for link in `cat $URLS | xargs`; do
+ httracker_get $link
+done