From 4eeb5fb76bde960fe56e4a59668fc62358cc5033 Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Sat, 16 Apr 2016 16:22:31 +0600 Subject: [PATCH 1/2] [fix] incorrect URLs in Reddit results - closes #538 --- searx/engines/reddit.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/searx/engines/reddit.py b/searx/engines/reddit.py index 9729898..3ca7e44 100644 --- a/searx/engines/reddit.py +++ b/searx/engines/reddit.py @@ -13,7 +13,7 @@ import json from cgi import escape from urllib import urlencode -from urlparse import urlparse +from urlparse import urlparse, urljoin from datetime import datetime # engine dependent config @@ -21,7 +21,8 @@ categories = ['general', 'images', 'news', 'social media'] page_size = 25 # search-url -search_url = 'https://www.reddit.com/search.json?{query}' +base_url = 'https://www.reddit.com/' +search_url = base_url + 'search.json?{query}' # do search-request @@ -52,7 +53,7 @@ def response(resp): # extract post information params = { - 'url': data['url'], + 'url': urljoin(base_url, data['permalink']), 'title': data['title'] } @@ -61,6 +62,7 @@ def response(resp): url_info = urlparse(thumbnail) # netloc & path if url_info[1] != '' and url_info[2] != '': + params['img_src'] = data['url'] params['thumbnail_src'] = thumbnail params['template'] = 'images.html' img_results.append(params) From d98024c26eb4f3e45643a2d2d8124a0616f863e1 Mon Sep 17 00:00:00 2001 From: Kirill Isakov Date: Sat, 16 Apr 2016 16:45:49 +0600 Subject: [PATCH 2/2] Update Reddit unit test -- check for img_src --- tests/unit/engines/test_reddit.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/unit/engines/test_reddit.py b/tests/unit/engines/test_reddit.py index 51589e3..9c94f4e 100644 --- a/tests/unit/engines/test_reddit.py +++ b/tests/unit/engines/test_reddit.py @@ -25,7 +25,8 @@ class TestRedditEngine(SearxTestCase): "data": { "children": [{ "data": { - "url": "http://google.com/", + "url": "http://google2.com/", + "permalink": "http://google.com/", "title": "Title number one", "selftext": "Sample", "created_utc": 1401219957.0, @@ -33,7 +34,8 @@ class TestRedditEngine(SearxTestCase): } }, { "data": { - "url": "https://reddit.com/", + "url": "https://reddit2.com/", + "permalink": "https://reddit.com/", "title": "Title number two", "selftext": "Dominus vobiscum", "created_utc": 1438792533.0, @@ -55,6 +57,7 @@ class TestRedditEngine(SearxTestCase): self.assertEqual(r['url'], 'http://google.com/') self.assertEqual(r['title'], 'Title number one') self.assertEqual(r['template'], 'images.html') + self.assertEqual(r['img_src'], 'http://google2.com/') self.assertEqual(r['thumbnail_src'], 'http://image.com/picture.jpg') # testing second result (self-post) @@ -65,3 +68,4 @@ class TestRedditEngine(SearxTestCase): created = datetime.fromtimestamp(1438792533.0) self.assertEqual(r['publishedDate'], created) self.assertTrue('thumbnail_src' not in r) + self.assertTrue('img_src' not in r)