OS X multi-line regex?
#1
this scraper not work on os x,but work fine on windows.
when GetDetails return multi-line text(within \r\n) ,GetDetails decode wrong,anything wrong?
Code:
    <GetDetails dest="3">
        <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
            <RegExp input="$$2" output="&lt;id&gt;\1&lt;/id&gt;" dest="5">
                <expression/>
            </RegExp>
            <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5+">
                <expression>&quot;title&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;originaltitle&gt;\1&lt;/originaltitle&gt;&lt;title&gt;\1&lt;/title&gt;" dest="5+">
                <expression>&quot;original_title&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
                <expression>&quot;year&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;votes&gt;\1&lt;/votes&gt;" dest="5+">
                <expression>&quot;ratings_count&quot;:(\d+)</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;" dest="5+">
                <expression>&quot;average&quot;:([\d\.]+)</expression>
            </RegExp>
            <RegExp input="$$1" output="\1" dest="8">
                <expression>&quot;genres&quot;:\[(.+?)\]</expression>
            </RegExp>
            <RegExp input="$$8" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
                <expression repeat="yes">&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="\1" dest="8">
                <expression>&quot;countries&quot;:\[(.+?)\]</expression>
            </RegExp>
            <RegExp input="$$8" output="&lt;country&gt;\1&lt;/country&gt;" dest="5+">
                <expression repeat="yes">&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
                <expression>&quot;summary&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="\1" dest="8">
                <expression clear="yes">&quot;directors&quot;:\[(.+?)\]</expression>
            </RegExp>
            <RegExp input="$$8" output="&lt;director&gt;\1&lt;/director&gt;" dest="5+">
                <expression repeat="yes">&quot;name&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="\1" dest="8">
                <expression clear="yes">&quot;writers&quot;:\[(.+?)\]</expression>
            </RegExp>
            <RegExp input="$$8" output="&lt;credits&gt;\1&lt;/credits&gt;" dest="5+">
                <expression repeat="yes">&quot;name&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$1" output="\1" dest="8">
                <expression clear="yes">&quot;casts&quot;:\[(.+?)\]</expression>
            </RegExp>
            <RegExp input="$$8" output="&lt;actor&gt;&lt;thumb&gt;http://\1/img/celebrity/large/\2.jpg&lt;/thumb&gt;&lt;name&gt;\3&lt;/name&gt;&lt;/actor&gt;" dest="5+">
                <expression repeat="yes">&quot;large&quot;:&quot;http:\\/\\/(.+?)\\/img\\/celebrity\\/large\\/(\d+).jpg&quot;.+?&quot;name&quot;:&quot;(.+?)&quot;</expression>
            </RegExp>
            <RegExp input="$$2" output="&lt;url cache=&quot;\1-poster.html&quot; function=&quot;GetPoster&quot;&gt;http://movie.douban.com/subject/\1/photos?type=R&lt;/url&gt;" dest="5+">
                <expression/>
            </RegExp>
            <RegExp conditional="!tmdbfanart" input="$$2" output="&lt;url cache=&quot;\1-fanart.html&quot; function=&quot;GetFanart&quot;&gt;http://movie.douban.com/subject/\1/photos?type=S&lt;/url&gt;" dest="5+">
                <expression/>
            </RegExp>
            <RegExp input="$$2" output="&lt;url function=&quot;GetDetailsByIMDBId&quot;&gt;http://movie.douban.com/subject/\1&lt;/url&gt;" dest="5+">
                <expression/>
            </RegExp>
            <expression noclean="1"/>
        </RegExp>
    </GetDetails>
Reply

Logout Mark Read Team Forum Stats Members Help
multi-line regex?0