aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:08:20 -0300
committerSilvio Rhatto <rhatto@riseup.net>2013-08-25 16:08:20 -0300
commit399d75317c9adf4c22fdbb3b6bf4be579ce1b9f2 (patch)
tree4871bbae283c4fdda68cf5f08eae725f5e692a30
downloadhttruta-399d75317c9adf4c22fdbb3b6bf4be579ce1b9f2.tar.gz
httruta-399d75317c9adf4c22fdbb3b6bf4be579ce1b9f2.tar.bz2
Initial import
-rw-r--r--README.mdwn7
-rw-r--r--scuttle.sh48
2 files changed, 55 insertions, 0 deletions
diff --git a/README.mdwn b/README.mdwn
new file mode 100644
index 0000000..b5aff85
--- /dev/null
+++ b/README.mdwn
@@ -0,0 +1,7 @@
+Feed Crawler
+============
+
+Download all links from a feed using httrack.
+
+ curl $URL > urls.txt
+ httrack -r$LEVEL -e%$EXT_LEVEL -m$FILESIZE -Y -%L urls.txt
diff --git a/scuttle.sh b/scuttle.sh
new file mode 100644
index 0000000..d41405f
--- /dev/null
+++ b/scuttle.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+BASEDIR=/var/sites/links
+SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )`
+CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php
+MIRRORDIR=${BASEDIR}/mirrors
+TMPDIR=/tmp
+
+getconf() {
+ grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
+}
+
+dbuser=`getconf dbuser`
+dbpass=`getconf dbpass`
+dbname=`getconf dbname`
+dbhost=`getconf dbhost`
+
+sqlquery() {
+ mysql --skip-column-names --batch \
+ --user=${dbuser} \
+ --password=${dbpass} \
+ --database=${dbname} \
+ --host=${dbhost} \
+ --execute="${1}"
+}
+
+# grabs URLs from db
+tmpfile=`mktemp -p ${TMPDIR}`
+chown links.links ${tmpfile}
+chmod 600 ${tmpfile}
+sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile}
+
+# creates target dir
+year=`date +%Y`
+month=`date +%m`
+%day=`date +%d`
+TARGETDIR=${MIRRORDIR}/${year}/${month}
+sudo -u links mkdir -p ${TARGETDIR}
+
+# grabs URLs from the network
+httrack --verbose \
+ --user links \
+ --depth=1 \
+ --purge-old=0 \
+ --index \
+ --cookies=1 \
+ --list ${tmpfile} \
+ --path ${TARGETDIR} \