urlmagic 1.0 by rojo does not read YouTube urls

x0x · Post by **x0x** » Mon Dec 23, 2013 7:06 pm

###############################################################################
# urlmagic 1.0 by rojo (EFnet #wootoff)                                       #
# Copyright 2011 Steve Church (rojo on EFnet). All rights reserved.           #
#                                                                             #
# Description:                                                                #
# Follows links posted in channel                                             #
# If content-type ~ text/* and <title>content exists</title> display title    #
# Otherwise, display content-type                                             #
# If url length > threshold, fetch and display tinyurl                        #
# If redirect, display final destination URL                                  #
# Record all this bullshit to your Twitter page                               #
# To disable the Twitter garbage, just set twitter(username) to ""            #
#                                                                             #
# If your eggdrop is not patched for UTF-8, consider doing so.  It makes web  #
# page titles containing unicode characters display as they should.  See      #
# http://eggwiki.org/Utf-8 for details.                                       #
#                                                                             #
# Please report bugs to rojo on EFnet.                                        #
#                                                                             #
# License                                                                     #
#                                                                             #
# Redistribution and use in source and binary forms, with or without          #
# modification, are permitted provided that the following conditions are met: #
#                                                                             #
#   1. Redistributions of source code must retain the above copyright notice, #
#      this list of conditions and the following disclaimer.                  #
#                                                                             #
#   2. Redistributions in binary form must reproduce the above copyright      #
#      notice, this list of conditions and the following disclaimer in the    #
#      documentation and/or other materials provided with the distribution.   #
#                                                                             #
# THIS SOFTWARE IS PROVIDED BY STEVE CHURCH "AS IS" AND ANY EXPRESS OR        #
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES   #
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN  #
# NO EVENT SHALL STEVE CHURCH OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,       #
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES          #
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR          #
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER  #
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT          #
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY   #
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH #
# DAMAGE.                                                                     #
###############################################################################

namespace eval urlmagic {

variable settings                     ; # leave this alone
variable twitter                      ; # leave this alone

set settings(max-length) 80           ; # URLs longer than this are converted to tinyurl
set settings(ignore-flags) bdkqr|dkqr ; # links posted by users with these flags are ignored
set settings(seconds-between) 10      ; # stop listening for this many seconds after processing an address
set settings(timeout) 10000           ; # wait this many milliseconds for a web server to respond
set settings(max-download) 1048576    ; # do not download pages larger than this many bytes
set settings(max-cookie-age) 2880     ; # if cookie shelf life > this many minutes, eat it sooner
set settings(udef-flag) urlmagic      ; # .chanset #channel +urlmagic
set twitter(username) user            ; # your Twitter username or registered email address
set twitter(password) ""              ; # your Twitter password

#########################
# end of user variables #
#########################

set scriptver 1.0
variable cookies
variable ns [namespace current]
variable skip_sqlite3 [catch {package require sqlite3}]

setudef flag $settings(udef-flag)

foreach lib {http htmlparse tls tdom} {
	if {[catch {package require $lib}]} {
		putlog "\00304urlmagic fail\003: Missing library \00308$lib\003.

urlmagic requires packages \00308http\00315\003, \00308htmlparse\00315\003, \00308tdom\00315\003, \00308tls\00315\003, and (optionally) \00308sqlite3\00315\003.  The http and htmlparse libraries are included in tcllib.
"
putlog "Use your distribution's package management system to install the dependencies as appropriate.

\002Debian / Ubuntu\002:
    \002\00309apt-get install tcllib tdom tcl-tls libsqlite3-tcl\003\002
\002Red Hat / SUSE / CentOS\002:
    \002\00309yum install tcllib tdom tcltls sqlite-tcl\003\002
\002Gentoo\002:
    \002\00309emerge -v tcllib tdom dev-tcltk/tls sqlite\003\002
\002FreeBSD\002:
    \002\00309pkg_add -r tcllib tdom tcltls sqlite3 sqlite3-tcl\003\002
"
		return false
	}
}

proc flood_prot {tf} {
	variable settings; variable ns

	if {$tf} {
		bind pubm - * ${ns}::find_urls
	} else {
		unbind pubm - * ${ns}::find_urls
		utimer $settings(seconds-between) [list ${ns}::flood_prot true]
	}
}

proc find_urls {nick uhost hand chan txt} {

	variable settings; variable twitter; variable skip_sqlite3; variable ns

	if {[matchattr $hand $settings(ignore-flags)] || ![channel get $chan $settings(udef-flag)]} { return }

	set rxp {(https?://|www\.|[a-z0-9\-]+\.[a-z]{2,4}/)\S+}

	if {[regexp -nocase $rxp $txt url] && [string length $url] > 7} {

		${ns}::flood_prot false

		if {![string match *://* $url]} { set url "http://$url" }

		# $details(url, content-length, tinyurl [where $url length > max], title, error [boolean])
		array set details [${ns}::get_title $url]

		set output [list PRIVMSG $chan ":<$nick>"]

		if {[info exists details(tinyurl)]} {
			set url $details(tinyurl)
			lappend output "$details(tinyurl) ->"
		} elseif {![string equal -nocase $url $details(url)]} {
			set url $details(url)
			lappend output "$details(url) ->"

		}

		lappend output "\002$details(title)\002"

		if {[info exists details(content-length)]} {
			lappend output "\($details(content-length)\)"
		}

		puthelp [join $output]

		if {[string length $twitter(username)] && [string length $twitter(password)] && !$details(error)} {

			set post "<$nick> $url -> $details(title)"

			if {$skip_sqlite3} {
				set hist 0
			} else {
				set hist [${ns}::query_history $url]
				if {!$hist} { ${ns}::record_history $url }
			}

			if {$hist} { return }

			# set post "<$nick> [${ns}::strip_codes $txt]"
			# ${ns}::tweet [string range $post 0 140]

			if {[catch {${ns}::tweet [string range $post 0 139]} err]} { putlog "Tweet fail.  $err" } { putlog "Tweet success." }
		}

	}
}

proc db {query} {

	sqlite3 urlmagic_db urlmagic.db

	urlmagic_db eval "CREATE TABLE IF NOT EXISTS urls (\
		id INTEGER PRIMARY KEY AUTOINCREMENT,\
		url TEXT NOT NULL)"

	set res {}

	urlmagic_db eval $query v {
		set row {}
		foreach col $v(*) { lappend row $v($col) }
		lappend res $row
	}

	urlmagic_db close

	return $res
}

proc query_history {url} {
	variable ns
	return [lindex [${ns}::db "SELECT COUNT(*) FROM urls WHERE url='[string map {' ''} $url]'"] 0]
}

proc record_history {url} {
	variable ns
	set url [string map {' ''} $url]
	${ns}::db "INSERT INTO urls (url) SELECT '$url' WHERE NOT EXISTS (SELECT 1 FROM urls WHERE url='$url')"
}

proc update_cookies {tok} {
	variable cookies; variable settings; variable ns
	upvar \#0 $tok state
	set domain [lindex [split $state(url) /] 2]
	if {![info exists cookies($domain)]} { set cookies($domain) [list] }
	foreach {name value} $state(meta) {

		if {[string equal -nocase $name "Set-Cookie"]} {

			if {[regexp -nocase {expires=([^;]+)} $value - expires]} {

				if {[catch {expr {([clock scan $expires -gmt 1] - [clock seconds]) / 60}} expires] || $expires < 1 } {
					set expires 15
				} elseif {$expires > $settings(max-cookie-age)} {
					set expires $settings(max-cookie-age)
				}
			} { set expires $settings(max-cookie-age) }

			set value [lindex [split $value \;] 0]
			set cookie_name [lindex [split $value =] 0]

			set expire_command [list ${ns}::expire_cookie $domain $cookie_name]

			if {[set pos [lsearch -glob $cookies($domain) ${cookie_name}=*]] > -1} {
				set cookies($domain) [lreplace $cookies($domain) $pos $pos $value]
				foreach t [timers] {
					if {[lindex $t 1] == $expire_command} { killtimer [lindex $t 2] }
				}
			} else {
				lappend cookies($domain) $value
			}

			timer $expires $expire_command
		}
	}
}

proc expire_cookie {domain cookie_name} {
	variable cookies
	if {![info exists cookies($domain)]} { return }
	if {[set pos [lsearch -glob $cookies($domain) ${cookie_name}=*]] > -1} {
		set cookies($domain) [lreplace $cookies($domain) $pos $pos]
	}
	if {![llength $cookies($domain)]} { unset cookies($domain) }
}

proc pct_encode_extended {what} {
	set enc [list { } +]
	for {set i 0} {$i < 256} {incr i} {
		if {$i > 32 && $i < 127} { continue }
		lappend enc [format %c $i] %[format %02x $i]
	}
	return [string map $enc $what]
}

proc relative {full partial} {
	if {[string match -nocase http* $partial]} { return $partial }
	set base [join [lrange [split $full /] 0 2] /]
	if {[string equal [string range $partial 0 0] /]} {
		return "${base}${partial}"
	} else {
		return "[join [lreplace [split $full /] end end] /]/$partial"
	}
}

# charsets for encoding conversion in proc fetch
# reference: http://www.w3.org/International/O-charset-lang.html
array set _charset {
	lv	iso8859-13	lt	iso8859-13	et	iso8859-15	eo	iso8859-3	mt	iso8859-3
	bg	iso8859-5	be	iso8859-5	uk	iso8859-5	mk	iso8859-5	ar	iso8859-6
	el	iso8859-7	iw	iso8859-8	tr	iso8859-9	sr	iso8859-5
	ru	koi8-r		ja	euc-jp		ko	euc-kr		cn	euc-cn
}
foreach cc {af sq eu ca da nl en fo fi fr gl de is ga it no pt gd es sv} {
	set _charset($cc) iso8859-1
}
foreach cc {hr cs hu pl ro sr sk sl} {
	set _charset($cc) iso8859-2
}
set _charset(en) utf-8; # assume utf-8 if charset not specified and lang="en"
variable _charset
proc fetch {url {post ""} {headers ""} {iterations 0} {validate 1}} {
	# follows redirects, sets cookies and allows post data
	# sets settings(content-length) if provided by server; 0 otherwise
	# sets settings(url) for redirection tracking
	# sets settings(content-type) so calling proc knows whether to parse data
	# returns data if content-type=text/html; returns content-type otherwise
	variable settings; variable cookies; variable _charset
	::http::register https 443 ::tls::socket
	
	if {[string length $post]} { set validate 0 }

	set agent "Mozilla/5.0 (compatible; TCL [info patchlevel] HTTP library) 20110501"
	set http [::http::config -useragent $agent]
	set url [pct_encode_extended $url]
	set settings(url) $url

	if {![string length $headers]} {
		set headers [list Referer $url]
		set domain [lindex [split $url /] 2]
		if {[info exists cookies($domain)] && [llength $cookies($domain)]} {
			lappend headers Cookie [join $cookies($domain) {; }]
		}
	}

	set command [list ::http::geturl $url]
	if {[string length $post]} { lappend command -query $post }
	if {[string length $headers]} { lappend command -headers $headers }
	lappend command -timeout $settings(timeout)
	if {$validate} { lappend command -validate 1 }

	if {[catch $command http]} {
		if {[catch {set data "Error [::http::ncode $http]: [::http::error $http]"}]} {
			set data "Error: Connection timed out."
		}
		::http::cleanup $http
		return $data
	} {
		update_cookies $http
		set data [::http::data $http]
	}
	
	upvar \#0 $http state
	array set raw_meta $state(meta)
	foreach {name val} [array get raw_meta] { set meta([string tolower $name]) $val }
	unset raw_meta

	::http::cleanup $http

	if {[info exists meta(location)]} {
		set meta(redirect) $meta(location)
	}

	if {[info exists meta(redirect)]} {

		set meta(redirect) [relative $url $meta(redirect)]

		if {[incr iterations] < 10} {
			return [fetch $meta(redirect) "" $headers $iterations $validate]
		} else {
			return "Error: too many redirections"
		}
	}

	if {[info exists meta(content-length)]} {
		set settings(content-length) $meta(content-length)
	} else {
		set settings(content-length) 0
	}

	if {[info exists meta(content-type)]} {
		set settings(content-type) [lindex [split $meta(content-type) ";"] 0]
	} elseif {[info exists meta(x-aspnet-version)]} {
		set settings(content-type) "text/html"
	} else {
		set settings(content-type) "unknown"
	}

	if {[string match -nocase $settings(content-type) "text/html"]\
	&& $settings(content-length) <= $settings(max-download)} {
		if {$validate} {
			return [fetch $url "" $headers [incr iterations] 0]
		} {
			# if xhtml and charset is specified, fix the charset.
			# otherwise, ignore charset= directive.
			# (I guess.  Compare the source of http://fathersday.yahoo.co.jp/
			# versus http://www.clevo.com.tw/tw/ for example.  The Yahoo! site
			# encoding does not need re-encoded; whereas the Clevo site does.)
			if {[regexp -nocase {<html[^>]+xhtml} $data]} {
				regexp -nocase {\ycharset=\"?\'?([\w\-]+)} $data - charset
			}
			if {[info exists charset]} {
				set charset [string map {iso- iso} [string tolower $charset]]
				if {[lsearch [encoding names] $charset] < 0} { unset charset }
			}
			if {![info exists charset] && [regexp -nocase {\ylang=\"?\'?(\w{2})} $data - lang]} {
				set charset $_charset([string tolower $lang])
			}
			if {[info exists charset] && ![string equal -nocase [encoding system] $charset]} {
				set data [encoding convertfrom $charset $data]
			}
			return $data
		}
	} {
		return "Content type: $settings(content-type)"
	}
}

proc get_title {url} {
#	returns $ret(url, content-length, tinyurl [where $url length > max], title)
	variable settings; variable ns

	set data [string map [list \r "" \n ""] [fetch $url]]

	if {![string equal $url $settings(url)]} {
		set url $settings(url)
	}
	set ret(error) [string match Error* $data]
 	set ret(url) $url
	set content_length $settings(content-length)
	set title ""
	if {[regexp -nocase {<title[^>]*>(.*?)</title>} $data - title]} {
		set title [string map {‪ "" ‬ "" ‏ ""} [string trim $title]]; # for YouTube
		regsub -all {\s+} $title { } title
		set ret(title) [::htmlparse::mapEscapes $title]
	} {
		set ret(title) $data
	}

	if {[string length $url] >= $settings(max-length)} {
		set ret(tinyurl) [tinyurl $url]
	}

	if {$content_length} {
		set ret(content-length) [${ns}::bytes_to_human $content_length]
	}

	return [array get ret]

}

proc bytes_to_human {bytes} {
	variable ns
	if {$bytes > 1073741824} {
		return "[${ns}::make_round $bytes 1073741824] GB"
	} elseif {$bytes > 1048576} {
		return "[${ns}::make_round $bytes 1048576] MB"
	} elseif {$bytes > 1024} {
		return "[${ns}::make_round $bytes 1024] KB"
	} else { return "$bytes B" }
}

proc make_round {num denom} {
	global tcl_precision
	set expr {1.1 + 2.2 eq 3.3}; while {![catch { incr tcl_precision }]} {}; while {![expr $expr]} { incr tcl_precision -1 }
	return [regsub {00000+[1-9]} [expr {round([expr {100.0 * $num / $denom}]) * 0.01}] ""]
}

proc strip_codes {what} {
	return [regsub -all -- {\002|\037|\026|\003(\d{1,2})?(,\d{1,2})?} $what ""]
}

proc tinyurl {url} {
	variable settings
	set data [split [fetch "http://tinyurl.com/create.php" [::http::formatQuery "url" $url]] \n]
	for {set i [llength $data]} {$i >= 0} {incr i -1} {
		putlog [lindex $data $i]
		if {[regexp {href="http://tinyurl\.com/\w+"} [lindex $data $i] url]} {
			return [string map { {href=} "" \" "" } $url]
		}
	}
	return ""
}

proc logged_in {} {
	variable cookies
	if {![info exists cookies(mobile.twitter.com)]} { return 0 }
	set idx [lsearch -glob $cookies(mobile.twitter.com) oauth_token*]
	if {$idx < 0} { return 0 }
	set oauth_token [lindex $cookies(mobile.twitter.com) $idx]
	set token [lindex [split $oauth_token =] 1]
	if {[string length $token]} { return 1 } { return 0 }
}

proc twitter_login {{tries 0}} {
	variable settings; variable cookies; variable twitter

	set data [fetch "https://mobile.twitter.com/session/new"]

	set dom [dom parse -html $data]
	set root [$dom documentElement]
	set forms [$root selectNodes {//form}]
	set form [lindex $forms 0]
	set inputs [$form selectNodes {//input}]
	set url [$form getAttribute action]

	foreach input $inputs {
		catch { set post([$input getAttribute name]) [$input getAttribute value] }
	}

	$dom delete

	set post(username) $twitter(username)
	set post(password) $twitter(password)

	foreach {name value} [array get post] {
		lappend postdata [::http::formatQuery $name $value]
	}

	fetch $url [join $postdata "&"]
	
	if {[logged_in]} { return }

	if {[incr tries] < 3} { twitter_login $tries } { putlog "Twitter login failed.  Tried $tries times." }

}

proc tweet {what} {
	variable settings; variable cookies
	if {![logged_in]} { twitter_login }

	set data [fetch "https://mobile.twitter.com/"]

	if {[catch {
		set dom [dom parse -html $data]
		set root [$dom documentElement]
		set forms [$root selectNodes {//form[@id='new_tweet']}]
		set form [lindex $forms 0]
		set inputs [$form selectNodes {//form[@id='new_tweet']//input}]
		set url [$form getAttribute action]
		set textareas [$form selectNodes {//form[@id='new_tweet']//textarea}]
		set textarea [lindex $textareas 0]
	} err]} { putlog "Damn dom.  $err"; foreach l [split $data \n] { putlog $l } }

	foreach input $inputs {
		catch { set post([$input getAttribute name]) [$input getAttribute value] }
	}

	set post([$textarea getAttribute name]) $what

	$dom delete

	foreach {name value} [array get post] {
		lappend postdata [::http::formatQuery $name $value]
	}

	fetch $url [join $postdata "&"]
}

${ns}::flood_prot true

putlog "urlmagic.tcl $scriptver loaded."

}; # end namespace

The following error appears on the partyline when reading YouTube urls:

can't read "_charset(id)": no such element in array

Anyone a clue?