Code: Select all
# newsnow.tcl
# by doggo #omgwtfnzbs@EFNET
#########################
package require http
bind time -|- "*" newsnowuk:news
set infochan "#yourchannel"
set baseurl "http://www.newsnow.co.uk"
set dupes "logs/newsnow_dupes.txt"
proc newsnowuk:news {min hour day month year} {
global infochan baseurl dupes
if {![file exists $dupes]} {
set file [open $dupes "w"]
close $file
}
set url "http://www.newsnow.co.uk/h/Sport/Football"
::http::config -useragent "Mozilla/5.0 (X11; U; Linux i686; ru-RU; rv:1.8.1) Gecko/2006101023 Firefox/2.0"
set data [::http::geturl $url -timeout 5000]
set xx [::http::data $data]
::http::cleanup $xx
regsub -all {\n} $xx {} xx
regexp -nocase {<div class="tsl">NEW in the last (.*?)</div>(.*?)<br.*} $xx match x1 x2 x3
if {![info exists x2]} {return}
regsub -all {</div>} $x2 {|} x2
foreach xxx [split $x2 "|"] {
if {[string length $xxx]} {
regexp -nocase {<a href="(.*?)".*onclick=".*">(.*?)</a><span class=".*" ut=".*"><b>(.*?)</b>} $xxx match x4 x5 x6
set file [open $dupes r]
set data [read $file]
close $file
set isdupe 0
foreach yy [split $data \n] {
if {[string length $yy]} {
set now [clock seconds]
set cache [lindex $yy 0]
set article [lindex $yy 1]
incr now -$cache
if { [string match -nocase "*$x4*" "$yy"] == 1 } {
set isdupe 1
}
if {$now>600} {
set rmv [newsnowuk:cache $cache $dupes]
putlog "removing news article $article from cache"
}
}
}
catch {unset data}
if {$isdupe != 1} {
set x5 [newsnowuk:htmlcodes $x5]
regsub -all {\s+} $x5 " " x5
regsub -all {\–} $x5 "-" x5
regsub -all {\’|\‘} $x5 "\'" x5
set open_text [open $dupes "a"]
puts $open_text "[clock seconds] $x4"
close $open_text
puthelp "privmsg $infochan :$x5 - $baseurl$x4"
}
}
}
}
proc newsnowuk:cache {id fname} {
set data ""
set input [open $fname r]
while {![eof $input]} {
set curline [gets $input];set curline [split $curline]
if {$curline != ""} {
set data [linsert $data end $curline]
}
}
catch {close $input}
set mark -1;set match ""
foreach line $data {
incr mark
if {[lindex $line 0] == $id} {
set match $mark
break
}
}
if {$match == ""} {return}
set newdata [lreplace $data $mark $mark]
set output [open $fname w]
foreach newline $newdata {
if {$newline != ""} {
puts $output $newline
}
}
flush $output
catch {close $output}
}
#borrowed from a imdb.tcl
proc newsnowuk:htmlcodes {content} {
if {$content == ""} {
return "n/a";
}
if {![string match *&* $content]} {
return $content;
}
set escapes {
\x20 " \x22 & \x26 ' \x27 – \x2D
< \x3C > \x3E ˜ \x7E € \x80 ¡ \xA1
¢ \xA2 £ \xA3 ¤ \xA4 ¥ \xA5 ¦ \xA6
§ \xA7 ¨ \xA8 © \xA9 ª \xAA « \xAB
¬ \xAC \xAD ® \xAE &hibar; \xAF ° \xB0
± \xB1 ² \xB2 ³ \xB3 ´ \xB4 µ \xB5
¶ \xB6 · \xB7 ¸ \xB8 ¹ \xB9 º \xBA
» \xBB ¼ \xBC ½ \xBD ¾ \xBE ¿ \xBF
À \xC0 Á \xC1 Â \xC2 Ã \xC3 Ä \xC4
Å \xC5 Æ \xC6 Ç \xC7 È \xC8 É \xC9
Ê \xCA Ë \xCB Ì \xCC Í \xCD Î \xCE
Ï \xCF Ð \xD0 Ñ \xD1 Ò \xD2 Ó \xD3
Ô \xD4 Õ \xD5 Ö \xD6 × \xD7 Ø \xD8
Ù \xD9 Ú \xDA Û \xDB Ü \xDC Ý \xDD
Þ \xDE ß \xDF à \xE0 á \xE1 â \xE2
ã \xE3 ä \xE4 å \xE5 æ \xE6 ç \xE7
è \xE8 é \xE9 ê \xEA ë \xEB ì \xEC
í \xED î \xEE ï \xEF ð \xF0 ñ \xF1
ò \xF2 ó \xF3 ô \xF4 õ \xF5 ö \xF6
÷ \xF7 ø \xF8 ù \xF9 ú \xFA û \xFB
ü \xFC ý \xFD þ \xFE ÿ \xFF
};
set content [string map $escapes $content];
set content [string map [list "\]" "\\\]" "\[" "\\\[" "\$" "\\\$" "\\" "\\\\"] $content];
regsub -all -- {&#([[:digit:]]{1,5});} $content {[format %c [string trimleft "\1" "0"]]} content;
regsub -all -- {&#x([[:xdigit:]]{1,4});} $content {[format %c [scan "\1" %x]]} content;
regsub -all -- {&#?[[:alnum:]]{2,7};} $content "?" content;
return [subst $content];
}
putlog "newsnow.tcl loaded!"