diff options
author | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 16:16:56 -0300 |
---|---|---|
committer | Silvio Rhatto <rhatto@riseup.net> | 2013-08-25 16:16:56 -0300 |
commit | 7a029cd60ca04b64d76d5b0d87a608c0fdd93106 (patch) | |
tree | f7c96ea1553492b74640cadc92555380923f6b67 /scuttler | |
parent | 399d75317c9adf4c22fdbb3b6bf4be579ce1b9f2 (diff) | |
download | httruta-7a029cd60ca04b64d76d5b0d87a608c0fdd93106.tar.gz httruta-7a029cd60ca04b64d76d5b0d87a608c0fdd93106.tar.bz2 |
Initial httracker code
Diffstat (limited to 'scuttler')
-rwxr-xr-x | scuttler | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/scuttler b/scuttler new file mode 100755 index 0000000..be0cabc --- /dev/null +++ b/scuttler @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Download all links from a Semantic Scuttle instance. +# + +BASEDIR=/var/sites/links +SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )` +CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php +MIRRORDIR=${BASEDIR}/mirrors +TMPDIR=/tmp + +getconf() { + grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$// +} + +dbuser=`getconf dbuser` +dbpass=`getconf dbpass` +dbname=`getconf dbname` +dbhost=`getconf dbhost` + +sqlquery() { + mysql --skip-column-names --batch \ + --user=${dbuser} \ + --password=${dbpass} \ + --database=${dbname} \ + --host=${dbhost} \ + --execute="${1}" +} + +# grabs URLs from db +tmpfile=`mktemp -p ${TMPDIR}` +chown links.links ${tmpfile} +chmod 600 ${tmpfile} +sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile} + +# creates target dir +year=`date +%Y` +month=`date +%m` +%day=`date +%d` +TARGETDIR=${MIRRORDIR}/${year}/${month} +sudo -u links mkdir -p ${TARGETDIR} + +# grabs URLs from the network +httrack --verbose \ + --user links \ + --depth=1 \ + --purge-old=0 \ + --index \ + --cookies=1 \ + --list ${tmpfile} \ + --path ${TARGETDIR} \ |