Merge pull request #830 from davidar/se

Semantic Scholar
This commit is contained in:
Adam Tauber 2017-01-18 23:50:11 +01:00 committed by GitHub
commit 57149661e4
2 changed files with 15 additions and 4 deletions

View File

@ -31,8 +31,6 @@ if xpath_results is a string element, then it's already done
def extract_text(xpath_results): def extract_text(xpath_results):
if type(xpath_results) == list: if type(xpath_results) == list:
# it's list of result : concat everything using recursive call # it's list of result : concat everything using recursive call
if not xpath_results:
raise Exception('Empty url resultset')
result = '' result = ''
for e in xpath_results: for e in xpath_results:
result = result + extract_text(e) result = result + extract_text(e)
@ -48,6 +46,8 @@ def extract_text(xpath_results):
def extract_url(xpath_results, search_url): def extract_url(xpath_results, search_url):
if xpath_results == []:
raise Exception('Empty url resultset')
url = extract_text(xpath_results) url = extract_text(xpath_results)
if url.startswith('//'): if url.startswith('//'):
@ -103,8 +103,8 @@ def response(resp):
if results_xpath: if results_xpath:
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
url = extract_url(result.xpath(url_xpath), search_url) url = extract_url(result.xpath(url_xpath), search_url)
title = extract_text(result.xpath(title_xpath)[0]) title = extract_text(result.xpath(title_xpath))
content = extract_text(result.xpath(content_xpath)[0]) content = extract_text(result.xpath(content_xpath))
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})
else: else:
for url, title, content in zip( for url, title, content in zip(

View File

@ -462,6 +462,17 @@ engines:
# - ... # - ...
# disabled : True # disabled : True
- name : semantic scholar
engine : xpath
paging : True
search_url : https://www.semanticscholar.org/search?q={query}&sort=relevance&page={pageno}&ae=false
results_xpath : //article
url_xpath : .//div[@class="search-result-title"]/a/@href
title_xpath : .//div[@class="search-result-title"]/a
content_xpath : .//div[@class="search-result-abstract"]
shortcut : se
categories : science
- name : spotify - name : spotify
engine : spotify engine : spotify
shortcut : stf shortcut : stf