A bit of cleanup of the code

- regexes in a array
- regexes applied only on the last part of the url
This commit is contained in:
Cqoicebordel 2015-06-15 20:34:02 +02:00
parent 617495cca8
commit e93f5314d7

View file

@ -18,10 +18,9 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >.
from flask.ext.babel import gettext from flask.ext.babel import gettext
import re import re
re1 = re.compile(r'utm_[^&]+&?') regexes = {re.compile(r'utm_[^&]+&?'),
re2 = re.compile(r'(wkey|wemail)[^&]+&?') re.compile(r'(wkey|wemail)[^&]+&?'),
re3 = re.compile(r'&$') re.compile(r'&$')}
re4 = re.compile(r'^\?$')
name = gettext('Tracker URL remover') name = gettext('Tracker URL remover')
description = gettext('Remove trackers arguments from the returned URL') description = gettext('Remove trackers arguments from the returned URL')
@ -29,12 +28,17 @@ default_on = True
def on_result(request, ctx): def on_result(request, ctx):
url = ctx['result']['url'] splited_url = ctx['result']['url'].split('?')
if len(splited_url) is not 2:
return True
for reg in regexes:
splited_url[1] = reg.sub('', splited_url[1])
if splited_url[1] == "":
ctx['result']['url'] = splited_url[0]
else:
ctx['result']['url'] = splited_url[0] + '?' + splited_url[1]
url = re1.sub('', url)
url = re2.sub('', url)
url = re3.sub('', url)
url = re4.sub('', url)
ctx['result']['url'] = url
return True return True