2009-11-24, 05:25
I've writen a plug-in for the Xbox that accesses the webcast content from the Dalai Lama's website here: http://www.dalailama.com/page.128.htm I used the template plug-in by Voinage linked to from the tutorial here
Unfortunately the HTML href tags in the web pages that are scraped, are very inconstantly formatted - each href tag contains various attributes in different orders. I worked around this by parsing multiple combinations of the href tags that are used in the 2008 and 2009 web cast pages, though I have not worked on the 2006 and 2007 pages, with the result that many of the web casts for these earlier years are not picked up. I emailed the site web master on 25/11/09 to suggest that a standard format for the href tags is used, though I have not had anything back. If the href tags are standardised I will rework the plugin.
Unfortunately the HTML href tags in the web pages that are scraped, are very inconstantly formatted - each href tag contains various attributes in different orders. I worked around this by parsing multiple combinations of the href tags that are used in the 2008 and 2009 web cast pages, though I have not worked on the 2006 and 2007 pages, with the result that many of the web casts for these earlier years are not picked up. I emailed the site web master on 25/11/09 to suggest that a standard format for the href tags is used, though I have not had anything back. If the href tags are standardised I will rework the plugin.
Code:
import urllib,urllib2,re,xbmcplugin,xbmcgui
#Dalai Lama XBMC Plug-in v0.02
def CATEGORIES():
addDir('Dalai Lama\'s Teachings 2009','http://www.dalailama.com/page.128.htm',1,'http://www.dalailama.com/images/logo.gif')
addDir('Dalai Lama\'s Teachings 2008','http://www.dalailama.com/page.262.htm',1,'http://www.dalailama.com/images/logo.gif')
addDir('Dalai Lama\'s Teachings 2007','http://www.dalailama.com/page.222.htm',1,'http://www.dalailama.com/images/logo.gif')
addDir('Dalai Lama\'s Teachings 2006','http://www.dalailama.com/page.223.htm',1,'http://www.dalailama.com/images/logo.gif')
def INDEX(url):
req = urllib2.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
response = urllib2.urlopen(req)
link=response.read()
response.close()
match=re.compile('<a target="_blank" href="/page.(.+?).htm" title="">Streaming</a>').findall(link)+re.compile('<a href="/page.(.+?).htm" title="" target="_blank">Streaming</a>').findall(link)+re.compile('<a title="" href="/page.(.+?).htm" target="_blank">Streaming</a>').findall(link)+re.compile('<a title="" href="/page.(.+?).htm" target="_blank"><img style="margin: 0px" title="" alt="" src="../uploads/gallery/webcasts/rp.gif" align="middle" border="0" height="20" width="20" />Streaming</a>').findall(link)+re.compile('<a href="/page.(.+?).htm" target="_blank">Streaming</a>').findall(link)+re.compile('<a title="" target="_blank" href="/page.(.+?).htm"><img title="" style="margin: 0px" alt="" src="../uploads/gallery/webcasts/rp.gif" width="20" align="middle" border="0" height="20" />Streaming</a>').findall(link)+re.compile('<a title="" target="_blank" href="http://dalailama.com/page.(.+?).htm"><img title="" style="margin: 0px" alt="" src="../uploads/gallery/webcasts/rp.gif" width="20" align="middle" border="0" height="20" />Streaming</a>').findall(link)+re.compile('<a target="_blank" href="/page.(.+?).htm">Streaming</a>').findall(link)+re.compile('<a title="" target="_blank" href="/page.(.+?).htm">Streaming</a>').findall(link)+re.compile('<a href="/page.(.+?).htm"><img title="" style="margin: 0px" alt="" src="../uploads/gallery/webcasts/rp.gif" width="20" align="middle" border="0" height="20" />Streaming</a>').findall(link)+re.compile('<a target="_blank" href="/page.(.+?).htm"><img title="" style="margin: 0px" alt="" src="../uploads/gallery/webcasts/rp.gif" width="20" align="middle" border="0" height="20" />Streaming</a>').findall(link)
for videourl in match:
videourl = 'http://www.dalailama.com/page.' + videourl + '.htm'
req = urllib2.Request(videourl)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
response = urllib2.urlopen(req)
link=response.read()
response.close()
title=re.compile('<title>(.+?)</title>').findall(link)
addDir(title[0],videourl,2,'http://www.dalailama.com/images/logo.gif')
def VIDEOLINKS(url,name):
req = urllib2.Request(url)
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
response = urllib2.urlopen(req)
link=response.read()
response.close()
match=re.compile('<a href="(.+?)" title=""><strong>Video - English</strong></a>').findall(link)
#+re.compile('<a href="(.+?)" title=""><strong>Audio - English</strong></a>').findall(link)
for url in match:
addLink(name,url,'http://www.dalailama.com/images/logo.gif')
def get_params():
param=[]
paramstring=sys.argv[2]
if len(paramstring)>=2:
params=sys.argv[2]
cleanedparams=params.replace('?','')
if (params[len(params)-1]=='/'):
params=params[0:len(params)-2]
pairsofparams=cleanedparams.split('&')
param={}
for i in range(len(pairsofparams)):
splitparams={}
splitparams=pairsofparams[i].split('=')
if (len(splitparams))==2:
param[splitparams[0]]=splitparams[1]
return param
def addLink(name,url,iconimage):
ok=True
liz=xbmcgui.ListItem(name, iconImage="DefaultVideo.png", thumbnailImage=iconimage)
liz.setInfo( type="Video", infoLabels={ "Title": name } )
ok=xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]),url=url,listitem=liz)
return ok
def addDir(name,url,mode,iconimage):
u=sys.argv[0]+"?url="+urllib.quote_plus(url)+"&mode="+str(mode)+"&name="+urllib.quote_plus(name)
ok=True
liz=xbmcgui.ListItem(name, iconImage="DefaultFolder.png", thumbnailImage=iconimage)
liz.setInfo( type="Video", infoLabels={ "Title": name } )
ok=xbmcplugin.addDirectoryItem(handle=int(sys.argv[1]),url=u,listitem=liz,isFolder=True)
return ok
params=get_params()
url=None
name=None
mode=None
try:
url=urllib.unquote_plus(params["url"])
except:
pass
try:
name=urllib.unquote_plus(params["name"])
except:
pass
try:
mode=int(params["mode"])
except:
pass
print "Mode: "+str(mode)
print "URL: "+str(url)
print "Name: "+str(name)
if mode==None or url==None or len(url)<1:
print ""
CATEGORIES()
elif mode==1:
print ""+url
INDEX(url)
elif mode==2:
print ""+url
VIDEOLINKS(url,name)
xbmcplugin.endOfDirectory(int(sys.argv[1]))