blob: be0cabceef3f03f9c07935100ac1f0ee21633cbd (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
|
#!/bin/bash
#
# Download all links from a Semantic Scuttle instance.
#
BASEDIR=/var/sites/links
SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )`
CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php
MIRRORDIR=${BASEDIR}/mirrors
TMPDIR=/tmp
getconf() {
grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
}
dbuser=`getconf dbuser`
dbpass=`getconf dbpass`
dbname=`getconf dbname`
dbhost=`getconf dbhost`
sqlquery() {
mysql --skip-column-names --batch \
--user=${dbuser} \
--password=${dbpass} \
--database=${dbname} \
--host=${dbhost} \
--execute="${1}"
}
# grabs URLs from db
tmpfile=`mktemp -p ${TMPDIR}`
chown links.links ${tmpfile}
chmod 600 ${tmpfile}
sqlquery "select bAddress from sc_bookmarks;" > ${tmpfile}
# creates target dir
year=`date +%Y`
month=`date +%m`
%day=`date +%d`
TARGETDIR=${MIRRORDIR}/${year}/${month}
sudo -u links mkdir -p ${TARGETDIR}
# grabs URLs from the network
httrack --verbose \
--user links \
--depth=1 \
--purge-old=0 \
--index \
--cookies=1 \
--list ${tmpfile} \
--path ${TARGETDIR} \
|