2014-06-10, 03:41
this scraper not work on os x,but work fine on windows.
when GetDetails return multi-line text(within \r\n) ,GetDetails decode wrong,anything wrong?
when GetDetails return multi-line text(within \r\n) ,GetDetails decode wrong,anything wrong?
Code:
<GetDetails dest="3">
<RegExp input="$$5" output="<details>\1</details>" dest="3">
<RegExp input="$$2" output="<id>\1</id>" dest="5">
<expression/>
</RegExp>
<RegExp input="$$1" output="<title>\1</title>" dest="5+">
<expression>"title":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="<originaltitle>\1</originaltitle><title>\1</title>" dest="5+">
<expression>"original_title":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="<year>\1</year>" dest="5+">
<expression>"year":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="<votes>\1</votes>" dest="5+">
<expression>"ratings_count":(\d+)</expression>
</RegExp>
<RegExp input="$$1" output="<rating>\1</rating>" dest="5+">
<expression>"average":([\d\.]+)</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="8">
<expression>"genres":\[(.+?)\]</expression>
</RegExp>
<RegExp input="$$8" output="<genre>\1</genre>" dest="5+">
<expression repeat="yes">"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="8">
<expression>"countries":\[(.+?)\]</expression>
</RegExp>
<RegExp input="$$8" output="<country>\1</country>" dest="5+">
<expression repeat="yes">"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="<plot>\1</plot>" dest="5+">
<expression>"summary":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="8">
<expression clear="yes">"directors":\[(.+?)\]</expression>
</RegExp>
<RegExp input="$$8" output="<director>\1</director>" dest="5+">
<expression repeat="yes">"name":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="8">
<expression clear="yes">"writers":\[(.+?)\]</expression>
</RegExp>
<RegExp input="$$8" output="<credits>\1</credits>" dest="5+">
<expression repeat="yes">"name":"(.+?)"</expression>
</RegExp>
<RegExp input="$$1" output="\1" dest="8">
<expression clear="yes">"casts":\[(.+?)\]</expression>
</RegExp>
<RegExp input="$$8" output="<actor><thumb>http://\1/img/celebrity/large/\2.jpg</thumb><name>\3</name></actor>" dest="5+">
<expression repeat="yes">"large":"http:\\/\\/(.+?)\\/img\\/celebrity\\/large\\/(\d+).jpg".+?"name":"(.+?)"</expression>
</RegExp>
<RegExp input="$$2" output="<url cache="\1-poster.html" function="GetPoster">http://movie.douban.com/subject/\1/photos?type=R</url>" dest="5+">
<expression/>
</RegExp>
<RegExp conditional="!tmdbfanart" input="$$2" output="<url cache="\1-fanart.html" function="GetFanart">http://movie.douban.com/subject/\1/photos?type=S</url>" dest="5+">
<expression/>
</RegExp>
<RegExp input="$$2" output="<url function="GetDetailsByIMDBId">http://movie.douban.com/subject/\1</url>" dest="5+">
<expression/>
</RegExp>
<expression noclean="1"/>
</RegExp>
</GetDetails>