From 7a029cd60ca04b64d76d5b0d87a608c0fdd93106 Mon Sep 17 00:00:00 2001 From: Silvio Rhatto Date: Sun, 25 Aug 2013 16:16:56 -0300 Subject: Initial httracker code --- scuttler | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100755 scuttler (limited to 'scuttler') diff --git a/scuttler b/scuttler new file mode 100755 index 0000000..be0cabc --- /dev/null +++ b/scuttler @@ -0,0 +1,51 @@ +#!/bin/bash +# +# Download all links from a Semantic Scuttle instance. +# + +BASEDIR=/var/sites/links +SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )` +CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php +MIRRORDIR=${BASEDIR}/mirrors +TMPDIR=/tmp + +getconf() { + grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$// +} + +dbuser=`getconf dbuser` +dbpass=`getconf dbpass` +dbname=`getconf dbname` +dbhost=`getconf dbhost` + +sqlquery() { + mysql --skip-column-names --batch \ + --user=${dbuser} \ + --password=${dbpass} \ + --database=${dbname} \ + --host=${dbhost} \ + --execute="${1}" +} + +# grabs URLs from db +tmpfile=`mktemp -p ${TMPDIR}` +chown links.links ${tmpfile} +chmod 600 ${tmpfile} +sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile} + +# creates target dir +year=`date +%Y` +month=`date +%m` +%day=`date +%d` +TARGETDIR=${MIRRORDIR}/${year}/${month} +sudo -u links mkdir -p ${TARGETDIR} + +# grabs URLs from the network +httrack --verbose \ + --user links \ + --depth=1 \ + --purge-old=0 \ + --index \ + --cookies=1 \ + --list ${tmpfile} \ + --path ${TARGETDIR} \ -- cgit v1.2.3