aboutsummaryrefslogtreecommitdiff
path: root/lib/httracker
diff options
context:
space:
mode:
authorSilvio Rhatto <rhatto@riseup.net>2013-08-27 13:35:26 -0300
committerSilvio Rhatto <rhatto@riseup.net>2013-08-27 13:35:26 -0300
commitd56b89890db4726994374a375f97f60b36313d92 (patch)
treefc9f930d84a0e3b5bbdafdf8d9400b78057a4c83 /lib/httracker
parentf7e6767cd679ae6c40dd40f5d20889df92ae8ca1 (diff)
downloadhttruta-d56b89890db4726994374a375f97f60b36313d92.tar.gz
httruta-d56b89890db4726994374a375f97f60b36313d92.tar.bz2
TODO: Looks like some RSS convert links to entities twice
Diffstat (limited to 'lib/httracker')
-rw-r--r--lib/httracker/functions2
-rwxr-xr-xlib/httracker/html.sed2
2 files changed, 3 insertions, 1 deletions
diff --git a/lib/httracker/functions b/lib/httracker/functions
index f2dde7f..929d917 100644
--- a/lib/httracker/functions
+++ b/lib/httracker/functions
@@ -101,7 +101,7 @@ function httracker_sqlquery {
function httracker_iterate {
for link in `cat $URLS | xargs`; do
# Fix entities
- link="`echo $link | $BASE/lib/httracker/html.sed`"
+ link="`echo $link | sed -f $BASE/lib/httracker/html.sed`"
httracker_get "$link"
done
}
diff --git a/lib/httracker/html.sed b/lib/httracker/html.sed
index d12f602..859bc25 100755
--- a/lib/httracker/html.sed
+++ b/lib/httracker/html.sed
@@ -481,6 +481,8 @@ s/&#60;/</g
s/&lt;/</g
s/&#62;/>/g
s/&gt;/>/g
+# TODO: Looks like some RSS convert links to entities twice
+s/&amp;amp/\&/g
s/&#38;/\&/g
s/&amp;/\&/g
# http://www.w3schools.com/tags/ref_entities.asp