aboutsummaryrefslogtreecommitdiff
path: root/scuttler
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:16:56 -0300
committerSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:16:56 -0300
commit7a029cd60ca04b64d76d5b0d87a608c0fdd93106 (patch)
treef7c96ea1553492b74640cadc92555380923f6b67 /scuttler
parent399d75317c9adf4c22fdbb3b6bf4be579ce1b9f2 (diff)
downloadhttruta-7a029cd60ca04b64d76d5b0d87a608c0fdd93106.tar.gz
httruta-7a029cd60ca04b64d76d5b0d87a608c0fdd93106.tar.bz2
Initial httracker code
Diffstat (limited to 'scuttler')
-rwxr-xr-xscuttler51
1 files changed, 51 insertions, 0 deletions
diff --git a/scuttler b/scuttler
new file mode 100755
index 0000000..be0cabc
--- /dev/null
+++ b/scuttler
@@ -0,0 +1,51 @@
+#!/bin/bash
+#
+# Download all links from a Semantic Scuttle instance.
+#
+
+BASEDIR=/var/sites/links
+SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )`
+CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php
+MIRRORDIR=${BASEDIR}/mirrors
+TMPDIR=/tmp
+
+getconf() {
+ grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
+}
+
+dbuser=`getconf dbuser`
+dbpass=`getconf dbpass`
+dbname=`getconf dbname`
+dbhost=`getconf dbhost`
+
+sqlquery() {
+ mysql --skip-column-names --batch \
+ --user=${dbuser} \
+ --password=${dbpass} \
+ --database=${dbname} \
+ --host=${dbhost} \
+ --execute="${1}"
+}
+
+# grabs URLs from db
+tmpfile=`mktemp -p ${TMPDIR}`
+chown links.links ${tmpfile}
+chmod 600 ${tmpfile}
+sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile}
+
+# creates target dir
+year=`date +%Y`
+month=`date +%m`
+%day=`date +%d`
+TARGETDIR=${MIRRORDIR}/${year}/${month}
+sudo -u links mkdir -p ${TARGETDIR}
+
+# grabs URLs from the network
+httrack --verbose \
+ --user links \
+ --depth=1 \
+ --purge-old=0 \
+ --index \
+ --cookies=1 \
+ --list ${tmpfile} \
+ --path ${TARGETDIR} \