diff options
| -rw-r--r-- | config | 2 | ||||
| -rw-r--r-- | lib/httracker/functions | 26 | ||||
| -rwxr-xr-x | scuttler | 10 | 
3 files changed, 22 insertions, 16 deletions
| @@ -9,3 +9,5 @@ USER="arquivo"  GROUP="arquivo"  DEPTH="2"  EXT_DEPTH="1" +SCUTTLE_USER="links" +SCUTTLE_GROUP="links" diff --git a/lib/httracker/functions b/lib/httracker/functions index 1a4fa7f..0885206 100644 --- a/lib/httracker/functions +++ b/lib/httracker/functions @@ -3,6 +3,7 @@  # Misc httracker functions.  # +# Set common httrack options  function httracker_opts {    OPTS=" --mirror                 \           --continue               \ @@ -15,6 +16,7 @@ function httracker_opts {           --path ${target}"  } +# Download URLs, mirror mode  function httracker_get {    # Options    local url="$1" @@ -54,13 +56,14 @@ function httracker_get {    fi  } +# Download URLs, incremental mode  function httracker_get_incremental { -  # Creates target dir +  # Create target dir    year=`date +%Y`    month=`date +%m`    day=`date +%d`    target=${MIRRORDIR}/${year}/${month} -  sudo -u links mkdir -p ${target} +  sudo -u ${USER} mkdir -p ${target}    # Basic options    httracker_opts @@ -70,32 +73,33 @@ function httracker_get_incremental {      OPTS="--user $USER"    fi -  # Grabs URLs from the network -  httrack ${OPTS}                  \ -          --user links             \ -          --list ${URLS}           \ -          --path ${target} +  # Grab URLs from the network +  httrack ${OPTS} --list ${URLS}  } +# Get SemanticScuttle parameter  function httracker_scuttle_config() {    grep ${1} ${CONFIGFILE} | sed -e s/\[^\'\]\*\'// -e s/\'\.\*\$//  } +# Query a mysql database  function httracker_sqlquery {    mysql --skip-column-names --batch \ -        --user=${dbuser}          \ -        --password=${dbpass}      \ -        --database=${dbname}      \ -        --host=${dbhost}          \ +        --user=${DBUSER}          \ +        --password=${DBPASS}      \ +        --database=${DBNAME}      \ +        --host=${DBHOST}          \          --execute="${1}"  } +# Iterate over all URLs  function httracker_iterate {    for link in `cat $URLS | xargs`; do      httracker_get "$link"    done  } +# Create basic folders  function httracker_setup_folders {    mkdir -p $MIRRORS $TMP @@ -16,17 +16,17 @@ httracker_setup_folders  BASEDIR=/var/sites/links  SCUTTLEDIR=`basename $( find ${BASEDIR} -maxdepth 1 -iname "SemanticScuttle-*" | head -n 1 )`  CONFIGFILE=${BASEDIR}/${SCUTTLEDIR}/data/config.php -dbuser=`httracker_scuttle_config dbuser` -dbpass=`httracker_scuttle_config dbpass` -dbname=`httracker_scuttle_config dbname` -dbhost=`httracker_scuttle_config dbhost` +DBUSER=`httracker_scuttle_config dbuser` +DBPASS=`httracker_scuttle_config dbpass` +DBNAME=`httracker_scuttle_config dbname` +DBHOST=`httracker_scuttle_config dbhost`  # So we can run both applications in parallel  URLS="$URLS_SCUTTLER"  # Get URLs from database  touch ${URLS} -chown links.links ${URLS} +chown ${SCUTTLE_USER}.${SCUTTLE_GROUP} ${URLS}  chmod 600 ${URLS}  httracker_sqlquery "select bAddress from sc_bookmarks;" > ${URLS} | 
