#!/bin/bash # # Misc httracker functions. # # Set common httrack options function httracker_opts { OPTS=" --mirror \ --continue \ --depth=${DEPTH} \ --ext-depth ${EXT_DEPTH} \ --near \ --purge-old=0 \ --index \ --cookies=1 \ --path ${target}" } # Download URLs, mirror mode function httracker_get { # Options local url="$1" local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`" local target="$MIRRORS/$hash" # Make sure that target exists mkdir -p $target # We already got this one if [ -f "$target/ok" ]; then return fi # Basic options httracker_opts # Additional options if [ "`whoami`" == "root" ]; then OPTS="$OPTS --user $USER" fi # Fix permissions if [ "`whoami`" != "$USER" ] && [ "`whoami`" == "root" ]; then chown -R $USER.$GROUP $target/ fi # Get each URL httrack ${OPTS} ${url} if [ "$?" == "0" ]; then # Mark as downloaded touch $target/ok else echo "Error fetching $url." rm -rf $target fi } # Download URLs, incremental mode function httracker_get_incremental { # Create target dir year=`date +%Y` month=`date +%m` day=`date +%d` target=${MIRRORDIR}/${year}/${month} sudo -u ${USER} mkdir -p ${target} # Basic options httracker_opts # Additional options if [ "`whoami`" == "root" ]; then OPTS="--user $USER" fi # Grab URLs from the network httrack ${OPTS} --list ${URLS} } # Get SemanticScuttle parameter function httracker_scuttle_config() { grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$// } # Query a mysql database function httracker_sqlquery { mysql --skip-column-names --batch \ --user=${DBUSER} \ --password=${DBPASS} \ --database=${DBNAME} \ --host=${DBHOST} \ --execute="${1}" } # Iterate over all URLs function httracker_iterate { for link in `cat $URLS | xargs`; do httracker_get "$link" done } # Create basic folders function httracker_setup_folders { mkdir -p $MIRRORS $TMP if [ "`whoami`" == "root" ]; then chown -R $USER.$GROUP $TMP fi }