Code: Select all
#to remove the http-part (as long as the number is always 6 digits long)
regsub { (href=http:).*(id__)(\d){6}} $line {} newline
#to remove the numbers and the '
regsub { (\d){3}\'} $line {} newline
Code: Select all
bind dcc * bla my:bla
proc my:bla {idx hand arg} {
set content [split [read [set f [open "tv.txt" r]]][close $f] \n]
putlog "searching..."
regsub -all -- {<td|colspan="3"|bgcolor="#EFEFEF"><div align="center"><strong>|</strong></div></td>} $content "" content
regsub -all -- {<table width="440"|cellpadding="0"|cellspacing="0"|align="center"|border=0>|<tr>|</tr>} $content "" content
regsub -all -- {valign="top"|width="40"|><font size=1>|</font></td>|width="20"|>-</td>|<B>|</B>} $content "" content
regsub -all -- {<a||bgColor='#fafafa'|bgColor='#A6A6FF'|style='text-decoration:none} $content "" content
regsub -all -- {'</font> <img|src='http://tv.kappa.ro/images/detalii.gif'|border=0 alt='detalii'></a>|</font> <img|'} $content "" content
set up [expr [lsearch $content "<!-- programul canalului ales -->"] + 2]
set down [expr [lsearch $content "*<!-- template 2000 jos-->*"] - 2]
set tosave [lrange $content $up $down]
set i 0
foreach line $tosave {
if {[llength $line] == 0} {
continue
}
putlog $line
incr i
}
putlog "finished... $i line[expr {$i==1?"":"s"}].."
}
Code: Select all
foreach line $tosave {
if {[llength $line] == 0} {
continue
}
regsub {(href=http:).*(id__)(\d){6}} $line {} line
regsub {(\d){2,}$} $line {} line
putlog $line
incr i
}
putlog "finished... $i line[expr {$i==1?"":"s"}].."
% set line "href=http://tv.kappa.ro/loc__detalii/id__469556Roseanne (SUA serial de comedie, 1988) 30"
href=http://tv.kappa.ro/loc__detalii/id__469556 Roseanne (SUA serial de comedie, 1988) 30
% regsub {(href=http:).*(id__)(\d){6}} $line {} line ;puts $line
Roseanne (SUA serial de comedie, 1988) 30
% regsub {(\d){2,}$} $line {} line ;puts $line
Roseanne (SUA serial de comedie, 1988)
%
you didn't say anything about the [01:23]caesar wrote:And from an line like "href=http://tv.kappa.ro/loc__detalii/id__469556 Roseanne (SUA serial de comedie, 1988) 30" I want to remove the url and it's random number and the last number of the line..
Code: Select all
bind dcc * bla my:bla
proc my:bla {idx hand arg} {
set content [read [set f [open "tv.txt" r]]][close $f]
putlog "searching..."
set tosave [string range $content [set f [string first "<!-- programul canalului ales -->" $content]] [string first "<!-- template 2000 jos-->" $content $f]]
regsub -all -- {</b>|<br>|</font>} $tosave { } tosave
regsub -all -- {<[^>]*>} $tosave {} tosave
regsub -all -- { } $tosave { } tosave
set i 0 ;set o {}
foreach line [split $tosave \n] {
if {[llength $line] == 0 || [lindex $line 0] == {-}} {
continue
}
regsub {(\d){2,}[']} [string trim $line] {} line
putlog $line
incr i
}
putlog "finished... $i line[expr {$i==1?"":"s"}].."
}
Date: Marti, 30-12-2003
07:00 Dennis, pericol public (SUA desene animate, rel.)
08:00 Minute de milioane
etc.
Code: Select all
foreach line [split $tosave \n] {
if {[llength $line] == 0 || [lindex $line 0] == {-}} {
continue
}
regsub {(\d){2,}[']} [string trim $line] {} line
if {[regexp {^(\d){2}:(\d){2}$} $line]} {
set nline $line
continue
} else {
append nline " $line"
putlog $nline
unset nline
}
incr i
}