Merge branch 'master' into wolframalpha

This commit is contained in:
a01200356 2016-01-03 16:00:05 -06:00
commit e5d51a0e98
1 changed files with 29 additions and 3 deletions

View File

@ -10,17 +10,19 @@
@parse url, title, content, publishedDate, embedded @parse url, title, content, publishedDate, embedded
""" """
import re
from StringIO import StringIO
from json import loads from json import loads
from lxml import etree
from urllib import urlencode, quote_plus from urllib import urlencode, quote_plus
from dateutil import parser from dateutil import parser
from searx import logger
from searx.poolrequests import get as http_get
# engine dependent config # engine dependent config
categories = ['music'] categories = ['music']
paging = True paging = True
# api-key
guest_client_id = 'b45b1aa10f1ac2941910a7f0d10f8e28'
# search-url # search-url
url = 'https://api.soundcloud.com/' url = 'https://api.soundcloud.com/'
search_url = url + 'search?{query}'\ search_url = url + 'search?{query}'\
@ -35,6 +37,30 @@ embedded_url = '<iframe width="100%" height="166" ' +\
'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>'
def get_client_id():
response = http_get("https://soundcloud.com")
rx_namespace = {"re": "http://exslt.org/regular-expressions"}
if response.ok:
tree = etree.parse(StringIO(response.content), etree.HTMLParser())
script_tags = tree.xpath("//script[re:match(@src, '(.*app.*js)')]", namespaces=rx_namespace)
app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None]
# extracts valid app_js urls from soundcloud.com content
for app_js_url in app_js_urls:
# gets app_js and searches for the clientid
response = http_get(app_js_url)
if response.ok:
cids = re.search(r'client_id:"([^"]*)"', response.content, re.M | re.I)
if cids is not None and len(cids.groups()):
return cids.groups()[0]
logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!")
return ""
# api-key
guest_client_id = get_client_id()
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 20 offset = (params['pageno'] - 1) * 20