2008-10-02, 03:46
Is there currently a way to access the imdb scraping from within a plugin or script? I want to be able to pass imdb urls to xbmc from another source in the video plugin to view the movie information.
def imdb(url):
req = urllib2.Request('http://www.imdb.com/find?s=all&q='+urllib.quote(url))
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
response = urllib2.urlopen(req).read()
alt=re.compile('<b>Media from <a href="/title/(.+?)/">').findall(response)
if len(alt)>0:
req = urllib2.Request('http://imdb.com/title/'+alt[0])
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
response = urllib2.urlopen(req).read()
genre=re.compile(r'<h5>Genre:</h5>\n<a href=".+?">(.+?)</a>').findall(response)
year=re.compile(r'<a href="/Sections/Years/.+?/">(.+?)</a>').findall(response)
image=re.compile(r'<img border="0" alt=".+?" title=".+?" src="(.+?)" /></a>').findall(response)
rating=re.compile(r'<div class="meta">\n<b>(.+?)</b>').findall(response)
req = urllib2.Request('http://www.imdb.com/title/'+alt[0]+'/plotsummary')
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
response = urllib2.urlopen(req).read()
plot=re.compile('<p class="plotpar">\n(.+?)\n<i>\n').findall(response)
try:
if plot[0].find('div')>0:
plot[0]='No Plot found on Imdb'
except IndexError: pass
if len(plot)<1:
req = urllib2.Request('http://www.imdb.com/title/'+alt[0]+'/synopsis')
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
plotter = urllib2.urlopen(req).read();clean=re.sub('\n','',plotter)
plot=re.compile('<div id="swiki.2.1">(.+?)</div>').findall(clean)
try:
if plot[0].find('div')>0:
plot[0]='No Plot found on Imdb'
except IndexError:
plot=['No plot found on Imdb']
return genre[0],year[0],image[0],rating[0],plot[0]
else :
genre=re.compile(r'<h5>Genre:</h5>\n<a href=".+?">(.+?)</a>').findall(response)
year=re.compile(r'<a href="/Sections/Years/.+?/">(.+?)</a>').findall(response)
image=re.compile(r'<img border="0" alt=".+?" title=".+?" src="(.+?)" /></a>').findall(response)
rating=re.compile(r'<div class="meta">\n<b>(.+?)</b>').findall(response)
bit=re.compile(r'<a class="tn15more inline" href="/title/(.+?)/plotsummary" onClick=".+?">.+?</a>').findall(response)
try:
req = urllib2.Request('http://www.imdb.com/title/'+bit[0]+'/plotsummary')
except: pass
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
response = urllib2.urlopen(req).read()
plot=re.compile('<p class="plotpar">\n(.+?)\n<i>\n').findall(response)
try:
if plot[0].find('div')>0:
plot[0]='No Plot found on Imdb'
except IndexError: pass
if len(plot)<1:
try:
req = urllib2.Request('http://www.imdb.com/title/'+bit[0]+'/synopsis')
except: pass
req.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.8.1.14) Gecko/20080404 Firefox/2.0.0.14')
plotter = urllib2.urlopen(req).read();clean=re.sub('\n','',plotter)
plot=re.compile('<div id="swiki.2.1">(.+?)</div>').findall(clean)
try:
if plot[0].find('div')>0:
plot[0]='No Plot found on Imdb'
except IndexError:
plot=['No Plot found on imdb']
return genre[0],year[0],image[0],rating[0],plot[0]