aboutsummaryrefslogtreecommitdiff
path: root/lib/httracker/functions
blob: 08852069723a985358472926deecf9fed49e2375 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/bin/bash
#
# Misc httracker functions.
#

# Set common httrack options
function httracker_opts {
  OPTS=" --mirror                 \
         --continue               \
         --depth=${DEPTH}         \
         --ext-depth ${EXT_DEPTH} \
         --near                   \
         --purge-old=0            \
         --index                  \
         --cookies=1              \
         --path ${target}"
}

# Download URLs, mirror mode
function httracker_get {
  # Options
  local url="$1"
  local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`"
  local target="$MIRRORS/$hash"

  # Make sure that target exists
  mkdir -p $target

  # We already got this one
  if [ -f "$target/ok" ]; then
    return
  fi

  # Basic options
  httracker_opts

  # Additional options
  if [ "`whoami`" == "root" ]; then
    OPTS="$OPTS --user $USER"
  fi

  # Fix permissions
  if [ "`whoami`" != "$USER" ] && [ "`whoami`" == "root" ]; then
    chown -R $USER.$GROUP $target/
  fi

  # Get each URL
  httrack ${OPTS} ${url}

  if [ "$?" == "0" ]; then
    # Mark as downloaded
    touch $target/ok
  else
    echo "Error fetching $url."
    rm -rf $target
  fi
}

# Download URLs, incremental mode
function httracker_get_incremental {
  # Create target dir
  year=`date +%Y`
  month=`date +%m`
  day=`date +%d`
  target=${MIRRORDIR}/${year}/${month}
  sudo -u ${USER} mkdir -p ${target}

  # Basic options
  httracker_opts

  # Additional options
  if [ "`whoami`" == "root" ]; then
    OPTS="--user $USER"
  fi

  # Grab URLs from the network
  httrack ${OPTS} --list ${URLS}
}

# Get SemanticScuttle parameter
function httracker_scuttle_config() {
  grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
}

# Query a mysql database
function httracker_sqlquery {
  mysql --skip-column-names --batch \
        --user=${DBUSER}          \
        --password=${DBPASS}      \
        --database=${DBNAME}      \
        --host=${DBHOST}          \
        --execute="${1}"
}

# Iterate over all URLs
function httracker_iterate {
  for link in `cat $URLS | xargs`; do
    httracker_get "$link"
  done
}

# Create basic folders
function httracker_setup_folders {
  mkdir -p $MIRRORS $TMP

  if [ "`whoami`" == "root" ]; then
    chown -R $USER.$GROUP $TMP
  fi
}