#!/bin/bash # # Download all links from a Semantic Scuttle instance. # BASEDIR=/var/sites/links SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )` CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php MIRRORDIR=${BASEDIR}/mirrors TMPDIR=/tmp getconf() { grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$// } dbuser=`getconf dbuser` dbpass=`getconf dbpass` dbname=`getconf dbname` dbhost=`getconf dbhost` sqlquery() { mysql --skip-column-names --batch \ --user=${dbuser} \ --password=${dbpass} \ --database=${dbname} \ --host=${dbhost} \ --execute="${1}" } # grabs URLs from db tmpfile=`mktemp -p ${TMPDIR}` chown links.links ${tmpfile} chmod 600 ${tmpfile} sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile} # creates target dir year=`date +%Y` month=`date +%m` %day=`date +%d` TARGETDIR=${MIRRORDIR}/${year}/${month} sudo -u links mkdir -p ${TARGETDIR} # grabs URLs from the network httrack --verbose \ --user links \ --depth=1 \ --purge-old=0 \ --index \ --cookies=1 \ --list ${tmpfile} \ --path ${TARGETDIR} \