aboutsummaryrefslogtreecommitdiff
path: root/lib/httracker/functions
blob: fa52ed4be842f22c4f434f7c41d08647952960d1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/bash
#
# Misc httracker functions.
#

function httracker_get {
  local url="$1"
  local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`"
  local target="$MIRRORS/$hash"

  mkdir -p $target

  # We already got this one
  if [ -f "$target/ok" ]; then
    return
  fi

  # Get each URL
  httrack               \
    --depth=1           \
    --purge-old=0       \
    --index             \
    --cookies=1         \
    --path ${target}    \
    -r${LEVEL} ${url}
    #--user $USER        \
    #-e%${EXT_LEVEL}    \
    #-m$FILESIZE        \
    #--verbose

  if [ "$?" == "0" ]; then
    # Mark as downloaded
    touch $target/ok
  else
    echo "Error fetching $url."
    rm -rf $target
  fi

  if [ "`whoami`" != "$USER" ] && [ "`whoami`" == "root" ]; then
    chown -R $USER.$GROUP $target/
  fi
}

function httracker_get_incremental {
  # Creates target dir
  year=`date +%Y`
  month=`date +%m`
  day=`date +%d`
  target=${MIRRORDIR}/${year}/${month}
  sudo -u links mkdir -p ${target}

  # Grabs URLs from the network
  httrack --verbose           \
          --user links        \
          --depth=1           \
          --purge-old=0       \
          --index             \
          --cookies=1         \
          --list ${URLS}      \
          --path ${target}

}

function httracker_scuttle_config() {
  grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//
}

function httracker_sqlquery {
  mysql --skip-column-names --batch \
        --user=${dbuser}          \
        --password=${dbpass}      \
        --database=${dbname}      \
        --host=${dbhost}          \
        --execute="${1}"
}

function httracker_iterate {
  for link in `cat $URLS | xargs`; do
    httracker_get "$link"
  done
}