fix bing videos engine

This commit is contained in:
Noémi Ványi 2018-02-10 19:44:07 +01:00
parent ccc6955f0c
commit c645915171
1 changed files with 5 additions and 17 deletions

View File

@ -69,22 +69,11 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for result in dom.xpath('//div[@class="dg_u"]'): for result in dom.xpath('//div[@class="dg_u"]'):
url = result.xpath('./div[@class="mc_vtvc"]/a/@href')[0]
# try to extract the url url = 'https://bing.com' + url
url_container = result.xpath('.//div[@class="sa_wrapper"]/@data-eventpayload') title = extract_text(result.xpath('./div/a/div/div[@class="mc_vtvc_title"]/@title'))
if len(url_container) > 0: content = extract_text(result.xpath('./div/a/div/div/div/div/text()'))
url = loads(url_container[0])['purl'] thumbnail = result.xpath('./div/a/div/div/img/@src')[0]
else:
url = result.xpath('./a/@href')[0]
# discard results that do not return an external url
# very recent results sometimes don't return the video's url
if url.startswith('/videos/search?'):
continue
title = extract_text(result.xpath('./a//div[@class="tl"]'))
content = extract_text(result.xpath('.//div[@class="pubInfo"]'))
thumbnail = result.xpath('.//div[@class="vthumb"]/img/@src')[0]
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
@ -92,7 +81,6 @@ def response(resp):
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'template': 'videos.html'}) 'template': 'videos.html'})
# first page ignores requested number of results
if len(results) >= number_of_results: if len(results) >= number_of_results:
break break