blob: 08852069723a985358472926deecf9fed49e2375 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
#!/bin/bash
#
# Misc httracker functions.
#
# Set common httrack options
function httracker_opts {
OPTS=" --mirror \
--continue \
--depth=${DEPTH} \
--ext-depth ${EXT_DEPTH} \
--near \
--purge-old=0 \
--index \
--cookies=1 \
--path ${target}"
}
# Download URLs, mirror mode
function httracker_get {
# Options
local url="$1"
local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`"
local target="$MIRRORS/$hash"
# Make sure that target exists
mkdir -p $target
# We already got this one
if [ -f "$target/ok" ]; then
return
fi
# Basic options
httracker_opts
# Additional options
if [ "`whoami`" == "root" ]; then
OPTS="$OPTS --user $USER"
fi
# Fix permissions
if [ "`whoami`" != "$USER" ] && [ "`whoami`" == "root" ]; then
chown -R $USER.$GROUP $target/
fi
# Get each URL
httrack ${OPTS} ${url}
if [ "$?" == "0" ]; then
# Mark as downloaded
touch $target/ok
else
echo "Error fetching $url."
rm -rf $target
fi
}
# Download URLs, incremental mode
function httracker_get_incremental {
# Create target dir
year=`date +%Y`
month=`date +%m`
day=`date +%d`
target=${MIRRORDIR}/${year}/${month}
sudo -u ${USER} mkdir -p ${target}
# Basic options
httracker_opts
# Additional options
if [ "`whoami`" == "root" ]; then
OPTS="--user $USER"
fi
# Grab URLs from the network
httrack ${OPTS} --list ${URLS}
}
# Get SemanticScuttle parameter
function httracker_scuttle_config() {
grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
}
# Query a mysql database
function httracker_sqlquery {
mysql --skip-column-names --batch \
--user=${DBUSER} \
--password=${DBPASS} \
--database=${DBNAME} \
--host=${DBHOST} \
--execute="${1}"
}
# Iterate over all URLs
function httracker_iterate {
for link in `cat $URLS | xargs`; do
httracker_get "$link"
done
}
# Create basic folders
function httracker_setup_folders {
mkdir -p $MIRRORS $TMP
if [ "`whoami`" == "root" ]; then
chown -R $USER.$GROUP $TMP
fi
}
|