refactor engine's search language handling

Add match_language function in utils to match any user given
language code with a list of engine's supported languages.

Also add language_aliases dict on each engine to translate
standard language codes into the custom codes used by the engine.
This commit is contained in:
Marc Abonce Seguin 2018-02-28 22:30:48 -06:00
parent d1eae9359f
commit 772c048d01
42 changed files with 275 additions and 171 deletions

File diff suppressed because one or more lines are too long

View File

@ -20,13 +20,14 @@ import sys
import threading import threading
from os.path import realpath, dirname from os.path import realpath, dirname
from io import open from io import open
from babel.localedata import locale_identifiers
from flask_babel import gettext from flask_babel import gettext
from operator import itemgetter from operator import itemgetter
from json import loads from json import loads
from requests import get from requests import get
from searx import settings from searx import settings
from searx import logger from searx import logger
from searx.utils import load_module from searx.utils import load_module, match_language
logger = logger.getChild('engines') logger = logger.getChild('engines')
@ -38,6 +39,8 @@ engines = {}
categories = {'general': []} categories = {'general': []}
languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read()) languages = loads(open(engine_dir + '/../data/engines_languages.json', 'r', encoding='utf-8').read())
babel_langs = [lang_parts[0] + '-' + lang_parts[-1] if len(lang_parts) > 1 else lang_parts[0]
for lang_parts in (lang_code.split('_') for lang_code in locale_identifiers())]
engine_shortcuts = {} engine_shortcuts = {}
engine_default_args = {'paging': False, engine_default_args = {'paging': False,
@ -97,6 +100,22 @@ def load_engine(engine_data):
if engine_data['name'] in languages: if engine_data['name'] in languages:
setattr(engine, 'supported_languages', languages[engine_data['name']]) setattr(engine, 'supported_languages', languages[engine_data['name']])
# find custom aliases for non standard language codes
if hasattr(engine, 'supported_languages'):
if hasattr(engine, 'language_aliases'):
language_aliases = getattr(engine, 'language_aliases')
else:
language_aliases = {}
for engine_lang in getattr(engine, 'supported_languages'):
iso_lang = match_language(engine_lang, babel_langs, fallback=None)
if iso_lang and iso_lang != engine_lang and not engine_lang.startswith(iso_lang) and \
iso_lang not in getattr(engine, 'supported_languages'):
language_aliases[iso_lang] = engine_lang
if language_aliases:
setattr(engine, 'language_aliases', language_aliases)
# assign language fetching method if auxiliary method exists # assign language fetching method if auxiliary method exists
if hasattr(engine, '_fetch_supported_languages'): if hasattr(engine, '_fetch_supported_languages'):
setattr(engine, 'fetch_supported_languages', setattr(engine, 'fetch_supported_languages',

View File

@ -99,13 +99,13 @@ supported_languages = dict(lang_urls, **main_langs)
# do search-request # do search-request
def request(query, params): def request(query, params):
# translate the locale (e.g. 'en_US') to language code ('en') # translate the locale (e.g. 'en-US') to language code ('en')
language = locale_to_lang_code(params['language']) language = locale_to_lang_code(params['language'])
# if our language is hosted on the main site, we need to add its name # if our language is hosted on the main site, we need to add its name
# to the query in order to narrow the results to that language # to the query in order to narrow the results to that language
if language in main_langs: if language in main_langs:
query += '(' + main_langs[language] + ')' query += b' (' + main_langs[language] + b')'
# prepare the request parameters # prepare the request parameters
query = urlencode({'search': query}) query = urlencode({'search': query})

View File

@ -16,12 +16,14 @@
from lxml import html from lxml import html
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
paging = True paging = True
language_support = True language_support = True
supported_languages_url = 'https://www.bing.com/account/general' supported_languages_url = 'https://www.bing.com/account/general'
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
# search-url # search-url
base_url = 'https://www.bing.com/' base_url = 'https://www.bing.com/'
@ -32,9 +34,9 @@ search_string = 'search?{query}&first={offset}'
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
lang = params['language'].split('-')[0].upper() lang = match_language(params['language'], supported_languages, language_aliases)
query = u'language:{} {}'.format(lang, query.decode('utf-8')).encode('utf-8') query = u'language:{} {}'.format(lang.split('-')[0].upper(), query.decode('utf-8')).encode('utf-8')
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'q': query}), query=urlencode({'q': query}),

View File

@ -19,6 +19,7 @@ from lxml import html
from json import loads from json import loads
import re import re
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['images'] categories = ['images']
@ -46,26 +47,6 @@ safesearch_types = {2: 'STRICT',
_quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U) _quote_keys_regex = re.compile('({|,)([a-z][a-z0-9]*):(")', re.I | re.U)
# get supported region code
def get_region_code(lang, lang_list=None):
region = None
if lang in (lang_list or supported_languages):
region = lang
elif lang.startswith('no'):
region = 'nb-NO'
else:
# try to get a supported country code with language
lang = lang.split('-')[0]
for lc in (lang_list or supported_languages):
if lang == lc.split('-')[0]:
region = lc
break
if region:
return region.lower()
else:
return 'en-us'
# do search-request # do search-request
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
@ -74,7 +55,7 @@ def request(query, params):
query=urlencode({'q': query}), query=urlencode({'q': query}),
offset=offset) offset=offset)
language = get_region_code(params['language']) language = match_language(params['language'], supported_languages).lower()
params['cookies']['SRCHHPGUSR'] = \ params['cookies']['SRCHHPGUSR'] = \
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')

View File

@ -14,8 +14,8 @@
from datetime import datetime from datetime import datetime
from dateutil import parser from dateutil import parser
from lxml import etree from lxml import etree
from searx.utils import list_get from searx.utils import list_get, match_language
from searx.engines.bing import _fetch_supported_languages, supported_languages_url from searx.engines.bing import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.url_utils import urlencode, urlparse, parse_qsl from searx.url_utils import urlencode, urlparse, parse_qsl
# engine dependent config # engine dependent config
@ -71,7 +71,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
language = params['language'] language = match_language(params['language'], supported_languages, language_aliases)
params['url'] = _get_url(query, language, offset, params['time_range']) params['url'] = _get_url(query, language, offset, params['time_range'])

View File

@ -12,9 +12,10 @@
from json import loads from json import loads
from lxml import html from lxml import html
from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url, get_region_code from searx.engines.bing_images import _fetch_supported_languages, supported_languages_url
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
categories = ['videos'] categories = ['videos']
@ -47,8 +48,8 @@ def request(query, params):
'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE') 'ADLT=' + safesearch_types.get(params['safesearch'], 'DEMOTE')
# language cookie # language cookie
region = get_region_code(params['language'], lang_list=supported_languages) language = match_language(params['language'], supported_languages).lower()
params['cookies']['_EDGE_S'] = 'mkt=' + region + '&F=1' params['cookies']['_EDGE_S'] = 'mkt=' + language + '&F=1'
# query and paging # query and paging
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),

View File

@ -15,6 +15,7 @@
from json import loads from json import loads
from datetime import datetime from datetime import datetime
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
@ -32,7 +33,7 @@ supported_languages_url = 'https://api.dailymotion.com/languages'
# do search-request # do search-request
def request(query, params): def request(query, params):
locale = params['language'] locale = match_language(params['language'], supported_languages)
params['url'] = search_url.format( params['url'] = search_url.format(
query=urlencode({'search': query, 'localization': locale}), query=urlencode({'search': query, 'localization': locale}),

View File

@ -18,6 +18,7 @@ from json import loads
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.poolrequests import get from searx.poolrequests import get
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -26,6 +27,16 @@ language_support = True
supported_languages_url = 'https://duckduckgo.com/util/u172.js' supported_languages_url = 'https://duckduckgo.com/util/u172.js'
time_range_support = True time_range_support = True
language_aliases = {
'ar-SA': 'ar-XA',
'es-419': 'es-XL',
'ja': 'jp-JP',
'ko': 'kr-KR',
'sl-SI': 'sl-SL',
'zh-TW': 'tzh-TW',
'zh-HK': 'tzh-HK'
}
# search-url # search-url
url = 'https://duckduckgo.com/html?{query}&s={offset}&dc={dc_param}' url = 'https://duckduckgo.com/html?{query}&s={offset}&dc={dc_param}'
time_range_url = '&df={range}' time_range_url = '&df={range}'
@ -42,34 +53,12 @@ content_xpath = './/a[@class="result__snippet"]'
# match query's language to a region code that duckduckgo will accept # match query's language to a region code that duckduckgo will accept
def get_region_code(lang, lang_list=None): def get_region_code(lang, lang_list=[]):
# custom fixes for languages lang_code = match_language(lang, lang_list, language_aliases, 'wt-WT')
if lang[:2] == 'ja': lang_parts = lang_code.split('-')
region_code = 'jp-jp'
elif lang[:2] == 'sl': # country code goes first
region_code = 'sl-sl' return lang_parts[1].lower() + '-' + lang_parts[0].lower()
elif lang == 'zh-TW':
region_code = 'tw-tzh'
elif lang == 'zh-HK':
region_code = 'hk-tzh'
elif lang[-2:] == 'SA':
region_code = 'xa-' + lang.split('-')[0]
elif lang[-2:] == 'GB':
region_code = 'uk-' + lang.split('-')[0]
else:
region_code = lang.split('-')
if len(region_code) == 2:
# country code goes first
region_code = region_code[1].lower() + '-' + region_code[0].lower()
else:
# tries to get a country code from language
region_code = region_code[0].lower()
for lc in (lang_list or supported_languages):
lc = lc.split('-')
if region_code == lc[0]:
region_code = lc[1].lower() + '-' + lc[0].lower()
break
return region_code
# do search-request # do search-request
@ -79,7 +68,7 @@ def request(query, params):
offset = (params['pageno'] - 1) * 30 offset = (params['pageno'] - 1) * 30
region_code = get_region_code(params['language']) region_code = get_region_code(params['language'], supported_languages)
params['url'] = url.format( params['url'] = url.format(
query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset) query=urlencode({'q': query, 'kl': region_code}), offset=offset, dc_param=offset)

View File

@ -2,9 +2,9 @@ import json
from lxml import html from lxml import html
from re import compile from re import compile
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, language_aliases
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import html_to_text from searx.utils import html_to_text, match_language
url = 'https://api.duckduckgo.com/'\ url = 'https://api.duckduckgo.com/'\
+ '?{query}&format=json&pretty=0&no_redirect=1&d=1' + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
@ -24,7 +24,8 @@ def result_to_text(url, text, htmlResult):
def request(query, params): def request(query, params):
params['url'] = url.format(query=urlencode({'q': query})) params['url'] = url.format(query=urlencode({'q': query}))
params['headers']['Accept-Language'] = params['language'].split('-')[0] language = match_language(params['language'], supported_languages, language_aliases)
params['headers']['Accept-Language'] = language.split('-')[0]
return params return params

View File

@ -15,7 +15,10 @@
from json import loads from json import loads
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.duckduckgo import _fetch_supported_languages, supported_languages_url, get_region_code from searx.engines.duckduckgo import (
_fetch_supported_languages, supported_languages_url,
get_region_code, language_aliases
)
from searx.poolrequests import get from searx.poolrequests import get
from searx.url_utils import urlencode from searx.url_utils import urlencode

View File

@ -14,6 +14,7 @@ from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx import logger from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl from searx.url_utils import urlencode, urlparse, parse_qsl
from searx.utils import match_language
logger = logger.getChild('google engine') logger = logger.getChild('google engine')
@ -165,22 +166,20 @@ def extract_text_from_dom(result, xpath):
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 offset = (params['pageno'] - 1) * 10
# temporary fix until a way of supporting en-US is found language = match_language(params['language'], supported_languages)
if params['language'] == 'en-US': language_array = language.split('-')
params['language'] = 'en-GB' if params['language'].find('-') > 0:
country = params['language'].split('-')[1]
if params['language'][:2] == 'jv': elif len(language_array) == 2:
language = 'jw' country = language_array[1]
country = 'ID'
url_lang = 'lang_jw'
else: else:
language_array = params['language'].lower().split('-') country = 'US'
if len(language_array) == 2:
country = language_array[1] # temporary fix until a way of supporting en-US is found
else: if language == 'en-US':
country = 'US' country = 'GB'
language = language_array[0] + ',' + language_array[0] + '-' + country
url_lang = 'lang_' + language_array[0] url_lang = 'lang_' + language
if use_locale_domain: if use_locale_domain:
google_hostname = country_to_hostname.get(country.upper(), default_hostname) google_hostname = country_to_hostname.get(country.upper(), default_hostname)
@ -196,7 +195,7 @@ def request(query, params):
if params['time_range'] in time_range_dict: if params['time_range'] in time_range_dict:
params['url'] += time_range_search.format(range=time_range_dict[params['time_range']]) params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
params['headers']['Accept-Language'] = language params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
params['google_hostname'] = google_hostname params['google_hostname'] = google_hostname

View File

@ -13,6 +13,7 @@
from lxml import html from lxml import html
from searx.engines.google import _fetch_supported_languages, supported_languages_url from searx.engines.google import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# search-url # search-url
categories = ['news'] categories = ['news']
@ -50,8 +51,9 @@ def request(query, params):
params['url'] = search_url.format(query=urlencode({'q': query}), params['url'] = search_url.format(query=urlencode({'q': query}),
search_options=urlencode(search_options)) search_options=urlencode(search_options))
language_array = params['language'].lower().split('-') language = match_language(params['language'], supported_languages).split('-')[0]
params['url'] += '&lr=lang_' + language_array[0] if language:
params['url'] += '&lr=lang_' + language
return params return params

View File

@ -14,6 +14,7 @@ from datetime import datetime
from json import loads from json import loads
from searx.utils import html_to_text from searx.utils import html_to_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = None categories = None
@ -45,16 +46,8 @@ def request(query, params):
offset=offset) offset=offset)
# add language tag # add language tag
if params['language'] == 'no' or params['language'].startswith('no-'): language = match_language(params['language'], supported_languages)
params['language'] = params['language'].replace('no', 'nb', 1) params['url'] += '&locale=' + language.replace('-', '_').lower()
if params['language'].find('-') < 0:
# tries to get a country code from language
for lang in supported_languages:
lc = lang.split('-')
if params['language'] == lc[0]:
params['language'] = lang
break
params['url'] += '&locale=' + params['language'].replace('-', '_').lower()
return params return params

View File

@ -14,6 +14,7 @@ from json import loads
import re import re
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import unquote, urlencode from searx.url_utils import unquote, urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['general', 'images'] categories = ['general', 'images']
@ -35,11 +36,8 @@ regex_img_url_remove_start = re.compile(b'^https?://i\.swisscows\.ch/\?link=')
# do search-request # do search-request
def request(query, params): def request(query, params):
if params['language'].split('-')[0] == 'no': region = match_language(params['language'], supported_languages)
region = 'nb-NO' ui_language = region.split('-')[0]
else:
region = params['language']
ui_language = params['language'].split('-')[0]
search_path = search_string.format( search_path = search_string.format(
query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}), query=urlencode({'query': query, 'uiLanguage': ui_language, 'region': region}),

View File

@ -16,6 +16,7 @@ from searx.poolrequests import get
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url from searx.engines.wikipedia import _fetch_supported_languages, supported_languages_url
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
@ -56,7 +57,7 @@ calendar_name_xpath = './/sup[contains(@class,"wb-calendar-name")]'
def request(query, params): def request(query, params):
language = params['language'].split('-')[0] language = match_language(params['language'], supported_languages).split('-')[0]
params['url'] = url_search.format( params['url'] = url_search.format(
query=urlencode({'label': query, 'language': language})) query=urlencode({'label': query, 'language': language}))
@ -68,7 +69,7 @@ def response(resp):
html = fromstring(resp.text) html = fromstring(resp.text)
wikidata_ids = html.xpath(wikidata_ids_xpath) wikidata_ids = html.xpath(wikidata_ids_xpath)
language = resp.search_params['language'].split('-')[0] language = match_language(resp.search_params['language'], supported_languages).split('-')[0]
# TODO: make requests asynchronous to avoid timeout when result_count > 1 # TODO: make requests asynchronous to avoid timeout when result_count > 1
for wikidata_id in wikidata_ids[:result_count]: for wikidata_id in wikidata_ids[:result_count]:

View File

@ -13,6 +13,7 @@
from json import loads from json import loads
from lxml.html import fromstring from lxml.html import fromstring
from searx.url_utils import quote, urlencode from searx.url_utils import quote, urlencode
from searx.utils import match_language
# search-url # search-url
base_url = u'https://{language}.wikipedia.org/' base_url = u'https://{language}.wikipedia.org/'
@ -30,13 +31,7 @@ supported_languages_url = 'https://meta.wikimedia.org/wiki/List_of_Wikipedias'
# set language in base_url # set language in base_url
def url_lang(lang): def url_lang(lang):
lang = lang.split('-')[0] return match_language(lang, supported_languages).split('-')[0]
if lang not in supported_languages:
language = 'en'
else:
language = lang
return language
# do search-request # do search-request

View File

@ -14,6 +14,7 @@
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.url_utils import unquote, urlencode from searx.url_utils import unquote, urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['general'] categories = ['general']
@ -39,6 +40,8 @@ time_range_dict = {'day': ['1d', 'd'],
'week': ['1w', 'w'], 'week': ['1w', 'w'],
'month': ['1m', 'm']} 'month': ['1m', 'm']}
language_aliases = {'zh-CN': 'zh-CHS', 'zh-TW': 'zh-CHT', 'zh-HK': 'zh-CHT'}
# remove yahoo-specific tracking-url # remove yahoo-specific tracking-url
def parse_url(url_string): def parse_url(url_string):
@ -70,23 +73,16 @@ def _get_url(query, offset, language, time_range):
lang=language) lang=language)
def _get_language(params):
if params['language'][:2] == 'zh':
if params['language'] == 'zh' or params['language'] == 'zh-CH':
return 'szh'
else:
return 'tzh'
else:
return params['language'].split('-')[0]
# do search-request # do search-request
def request(query, params): def request(query, params):
if params['time_range'] and params['time_range'] not in time_range_dict: if params['time_range'] and params['time_range'] not in time_range_dict:
return params return params
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
language = _get_language(params) language = match_language(params['language'], supported_languages, language_aliases)
if language not in language_aliases.values():
language = language.split('-')[0]
language = language.replace('-', '_').lower()
params['url'] = _get_url(query, offset, language, params['time_range']) params['url'] = _get_url(query, offset, language, params['time_range'])
@ -145,7 +141,11 @@ def _fetch_supported_languages(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
options = dom.xpath('//div[@id="yschlang"]/span/label/input') options = dom.xpath('//div[@id="yschlang"]/span/label/input')
for option in options: for option in options:
code = option.xpath('./@value')[0][5:].replace('_', '-') code_parts = option.xpath('./@value')[0][5:].split('_')
if len(code_parts) == 2:
code = code_parts[0] + '-' + code_parts[1].upper()
else:
code = code_parts[0]
supported_languages.append(code) supported_languages.append(code)
return supported_languages return supported_languages

View File

@ -13,9 +13,12 @@ import re
from datetime import datetime, timedelta from datetime import datetime, timedelta
from lxml import html from lxml import html
from searx.engines.xpath import extract_text, extract_url from searx.engines.xpath import extract_text, extract_url
from searx.engines.yahoo import parse_url, _fetch_supported_languages, supported_languages_url from searx.engines.yahoo import (
parse_url, _fetch_supported_languages, supported_languages_url, language_aliases
)
from dateutil import parser from dateutil import parser
from searx.url_utils import urlencode from searx.url_utils import urlencode
from searx.utils import match_language
# engine dependent config # engine dependent config
categories = ['news'] categories = ['news']
@ -38,7 +41,7 @@ suggestion_xpath = '//div[contains(@class,"VerALSOTRY")]//a'
def request(query, params): def request(query, params):
offset = (params['pageno'] - 1) * 10 + 1 offset = (params['pageno'] - 1) * 10 + 1
language = params['language'].split('-')[0] language = match_language(params['language'], supported_languages, language_aliases).split('-')[0]
params['url'] = search_url.format(offset=offset, params['url'] = search_url.format(offset=offset,
query=urlencode({'p': query}), query=urlencode({'p': query}),

View File

@ -115,10 +115,6 @@ class SearchLanguageSetting(EnumStringSetting):
pass pass
elif lang in self.choices: elif lang in self.choices:
data = lang data = lang
elif data == 'nb-NO':
data = 'no-NO'
elif data == 'ar-XA':
data = 'ar-SA'
else: else:
data = self.value data = self.value
self.value = data self.value = data

View File

@ -96,9 +96,13 @@ class RawTextQuery(object):
break break
# user may set a valid, yet not selectable language # user may set a valid, yet not selectable language
if not self.languages and VALID_LANGUAGE_CODE.match(lang): if VALID_LANGUAGE_CODE.match(lang):
self.languages.append(lang) lang_parts = lang.split('-')
parse_next = True if len(lang_parts) > 1:
lang = lang_parts[0].lower() + '-' + lang_parts[1].upper()
if lang not in self.languages:
self.languages.append(lang)
parse_next = True
# this force a engine or category # this force a engine or category
if query_part[0] == '!' or query_part[0] == '?': if query_part[0] == '!' or query_part[0] == '?':

View File

@ -187,7 +187,7 @@
</td> </td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td class="name">{{ shortcuts[search_engine.name] }}</td> <td class="name">{{ shortcuts[search_engine.name] }}</td>
<td>{{ support_toggle(current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> <td>{{ support_toggle(stats[search_engine.name].supports_selected_language) }}</td>
<td>{{ support_toggle(search_engine.safesearch==True) }}</td> <td>{{ support_toggle(search_engine.safesearch==True) }}</td>
<td>{{ support_toggle(search_engine.time_range_support==True) }}</td> <td>{{ support_toggle(search_engine.time_range_support==True) }}</td>
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
@ -197,7 +197,7 @@
<td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td> <td class="{{ 'danger' if stats[search_engine.name]['warn_time'] else '' }}">{{ 'N/A' if stats[search_engine.name].time==None else stats[search_engine.name].time }}</td>
<td>{{ support_toggle(search_engine.time_range_support==True) }}</td> <td>{{ support_toggle(search_engine.time_range_support==True) }}</td>
<td>{{ support_toggle(search_engine.safesearch==True) }}</td> <td>{{ support_toggle(search_engine.safesearch==True) }}</td>
<td>{{ support_toggle(current_language in search_engine.supported_languages or current_language.split('-')[0] in search_engine.supported_languages) }}</td> <td>{{ support_toggle(stats[search_engine.name].supports_selected_language) }}</td>
<td>{{ shortcuts[search_engine.name] }}</td> <td>{{ shortcuts[search_engine.name] }}</td>
<th>{{ search_engine.name }}</th> <th>{{ search_engine.name }}</th>
<td class="onoff-checkbox"> <td class="onoff-checkbox">

View File

@ -4,6 +4,7 @@ import hmac
import os import os
import re import re
from babel.core import get_global
from babel.dates import format_date from babel.dates import format_date
from codecs import getincrementalencoder from codecs import getincrementalencoder
from imp import load_source from imp import load_source
@ -12,6 +13,7 @@ from os.path import splitext, join
from random import choice from random import choice
import sys import sys
from searx import settings
from searx.version import VERSION_STRING from searx.version import VERSION_STRING
from searx.languages import language_codes from searx.languages import language_codes
from searx import settings from searx import settings
@ -322,6 +324,65 @@ def is_valid_lang(lang):
return False return False
# auxiliary function to match lang_code in lang_list
def _match_language(lang_code, lang_list=[], custom_aliases={}):
# replace language code with a custom alias if necessary
if lang_code in custom_aliases:
lang_code = custom_aliases[lang_code]
if lang_code in lang_list:
return lang_code
# try to get the most likely country for this language
subtags = get_global('likely_subtags').get(lang_code)
if subtags:
subtag_parts = subtags.split('_')
new_code = subtag_parts[0] + '-' + subtag_parts[-1]
if new_code in custom_aliases:
new_code = custom_aliases[new_code]
if new_code in lang_list:
return new_code
# try to get the any supported country for this language
for lc in lang_list:
if lang_code == lc.split('-')[0]:
return lc
return None
# get the language code from lang_list that best matches locale_code
def match_language(locale_code, lang_list=[], custom_aliases={}, fallback='en-US'):
# try to get language from given locale_code
language = _match_language(locale_code, lang_list, custom_aliases)
if language:
return language
locale_parts = locale_code.split('-')
lang_code = locale_parts[0]
# try to get language using an equivalent country code
if len(locale_parts) > 1:
country_alias = get_global('territory_aliases').get(locale_parts[-1])
if country_alias:
language = _match_language(lang_code + '-' + country_alias[0], lang_list, custom_aliases)
if language:
return language
# try to get language using an equivalent language code
alias = get_global('language_aliases').get(lang_code)
if alias:
language = _match_language(alias, lang_list, custom_aliases)
if language:
return language
if lang_code != locale_code:
# try to get language from given language without giving the country
language = _match_language(lang_code, lang_list, custom_aliases)
return language or fallback
def load_module(filename, module_dir): def load_module(filename, module_dir):
modname = splitext(filename)[0] modname = splitext(filename)[0]
if modname in sys.modules: if modname in sys.modules:

View File

@ -58,16 +58,16 @@ from searx.engines import (
from searx.utils import ( from searx.utils import (
UnicodeWriter, highlight_content, html_to_text, get_resources_directory, UnicodeWriter, highlight_content, html_to_text, get_resources_directory,
get_static_files, get_result_templates, get_themes, gen_useragent, get_static_files, get_result_templates, get_themes, gen_useragent,
dict_subset, prettify_url dict_subset, prettify_url, match_language
) )
from searx.version import VERSION_STRING from searx.version import VERSION_STRING
from searx.languages import language_codes from searx.languages import language_codes as languages
from searx.search import SearchWithPlugins, get_search_query_from_webapp from searx.search import SearchWithPlugins, get_search_query_from_webapp
from searx.query import RawTextQuery from searx.query import RawTextQuery
from searx.autocomplete import searx_bang, backends as autocomplete_backends from searx.autocomplete import searx_bang, backends as autocomplete_backends
from searx.plugins import plugins from searx.plugins import plugins
from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.plugins.oa_doi_rewrite import get_doi_resolver
from searx.preferences import Preferences, ValidationException from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES
from searx.answerers import answerers from searx.answerers import answerers
from searx.url_utils import urlencode, urlparse, urljoin from searx.url_utils import urlencode, urlparse, urljoin
from searx.utils import new_hmac from searx.utils import new_hmac
@ -133,7 +133,7 @@ if not searx_debug \
babel = Babel(app) babel = Babel(app)
rtl_locales = ['ar', 'arc', 'bcc', 'bqi', 'ckb', 'dv', 'fa', 'glk', 'he', rtl_locales = ['ar', 'arc', 'bcc', 'bqi', 'ckb', 'dv', 'fa', 'glk', 'he',
'ku', 'mzn', 'pnb'', ''ps', 'sd', 'ug', 'ur', 'yi'] 'ku', 'mzn', 'pnb', 'ps', 'sd', 'ug', 'ur', 'yi']
# used when translating category names # used when translating category names
_category_names = (gettext('files'), _category_names = (gettext('files'),
@ -352,9 +352,11 @@ def render(template_name, override_theme=None, **kwargs):
kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['safesearch'] = str(request.preferences.get_value('safesearch'))
kwargs['language_codes'] = language_codes kwargs['language_codes'] = languages
if 'current_language' not in kwargs: if 'current_language' not in kwargs:
kwargs['current_language'] = request.preferences.get_value('language') kwargs['current_language'] = match_language(request.preferences.get_value('language'),
LANGUAGE_CODES,
fallback=settings['search']['language'])
# override url_for function in templates # override url_for function in templates
kwargs['url_for'] = url_for_theme kwargs['url_for'] = url_for_theme
@ -590,7 +592,9 @@ def index():
infoboxes=result_container.infoboxes, infoboxes=result_container.infoboxes,
paging=result_container.paging, paging=result_container.paging,
unresponsive_engines=result_container.unresponsive_engines, unresponsive_engines=result_container.unresponsive_engines,
current_language=search_query.lang, current_language=match_language(search_query.lang,
LANGUAGE_CODES,
fallback=settings['search']['language']),
base_url=get_base_url(), base_url=get_base_url(),
theme=get_current_theme_name(), theme=get_current_theme_name(),
favicons=global_favicons[themes.index(get_current_theme_name())] favicons=global_favicons[themes.index(get_current_theme_name())]
@ -687,6 +691,10 @@ def preferences():
'warn_time': False} 'warn_time': False}
if e.timeout > settings['outgoing']['request_timeout']: if e.timeout > settings['outgoing']['request_timeout']:
stats[e.name]['warn_timeout'] = True stats[e.name]['warn_timeout'] = True
if match_language(request.preferences.get_value('language'),
getattr(e, 'supported_languages', []),
getattr(e, 'language_aliases', {}), None):
stats[e.name]['supports_selected_language'] = True
# get first element [0], the engine time, # get first element [0], the engine time,
# and then the second element [1] : the time (the first one is the label) # and then the second element [1] : the time (the first one is the label)

View File

@ -19,12 +19,17 @@ class TestArchLinuxEngine(SearxTestCase):
query = 'test_query' query = 'test_query'
dic = defaultdict(dict) dic = defaultdict(dict)
dic['pageno'] = 1 dic['pageno'] = 1
dic['language'] = 'en_US' dic['language'] = 'en-US'
params = archlinux.request(query, dic) params = archlinux.request(query, dic)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])
self.assertTrue('wiki.archlinux.org' in params['url']) self.assertTrue('wiki.archlinux.org' in params['url'])
for lang, name in archlinux.main_langs:
dic['language'] = lang
params = archlinux.request(query, dic)
self.assertTrue(name in params['url'])
for lang, domain in domains.items(): for lang, domain in domains.items():
dic['language'] = lang dic['language'] = lang
params = archlinux.request(query, dic) params = archlinux.request(query, dic)

View File

@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestBingEngine(SearxTestCase): class TestBingEngine(SearxTestCase):
def test_request(self): def test_request(self):
bing.supported_languages = ['en', 'fr', 'zh-CHS', 'zh-CHT', 'pt-PT', 'pt-BR']
query = u'test_query' query = u'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0

View File

@ -9,7 +9,6 @@ class TestBingImagesEngine(SearxTestCase):
def test_request(self): def test_request(self):
bing_images.supported_languages = ['fr-FR', 'en-US'] bing_images.supported_languages = ['fr-FR', 'en-US']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1

View File

@ -8,10 +8,11 @@ import lxml
class TestBingNewsEngine(SearxTestCase): class TestBingNewsEngine(SearxTestCase):
def test_request(self): def test_request(self):
bing_news.supported_languages = ['en', 'fr']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
dicto['time_range'] = '' dicto['time_range'] = ''
params = bing_news.request(query, dicto) params = bing_news.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)

View File

@ -9,7 +9,6 @@ class TestBingVideosEngine(SearxTestCase):
def test_request(self): def test_request(self):
bing_videos.supported_languages = ['fr-FR', 'en-US'] bing_videos.supported_languages = ['fr-FR', 'en-US']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1

View File

@ -8,10 +8,11 @@ from searx.testing import SearxTestCase
class TestDailymotionEngine(SearxTestCase): class TestDailymotionEngine(SearxTestCase):
def test_request(self): def test_request(self):
dailymotion.supported_languages = ['en', 'fr']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
params = dailymotion.request(query, dicto) params = dailymotion.request(query, dicto)
self.assertTrue('url' in params) self.assertTrue('url' in params)
self.assertTrue(query in params['url']) self.assertTrue(query in params['url'])

View File

@ -1,18 +1,21 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from collections import defaultdict from collections import defaultdict
import mock import mock
from searx.engines import duckduckgo from searx.engines import load_engine, duckduckgo
from searx.testing import SearxTestCase from searx.testing import SearxTestCase
class TestDuckduckgoEngine(SearxTestCase): class TestDuckduckgoEngine(SearxTestCase):
def test_request(self): def test_request(self):
duckduckgo = load_engine({'engine': 'duckduckgo', 'name': 'duckduckgo'})
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'de-CH'
dicto['time_range'] = '' dicto['time_range'] = ''
dicto['language'] = 'de-CH'
params = duckduckgo.request(query, dicto) params = duckduckgo.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
@ -20,16 +23,19 @@ class TestDuckduckgoEngine(SearxTestCase):
self.assertIn('ch-de', params['url']) self.assertIn('ch-de', params['url'])
self.assertIn('s=0', params['url']) self.assertIn('s=0', params['url'])
# when ddg uses non standard code # when ddg uses non standard codes
dicto['language'] = 'zh-HK'
params = duckduckgo.request(query, dicto)
self.assertIn('hk-tzh', params['url'])
dicto['language'] = 'en-GB' dicto['language'] = 'en-GB'
params = duckduckgo.request(query, dicto) params = duckduckgo.request(query, dicto)
self.assertIn('uk-en', params['url']) self.assertIn('uk-en', params['url'])
# no country given # no country given
duckduckgo.supported_languages = ['de-CH', 'en-US'] dicto['language'] = 'en'
dicto['language'] = 'de'
params = duckduckgo.request(query, dicto) params = duckduckgo.request(query, dicto)
self.assertIn('ch-de', params['url']) self.assertIn('us-en', params['url'])
def test_no_url_in_request_year_time_range(self): def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict) dicto = defaultdict(dict)

View File

@ -18,6 +18,7 @@ class TestDDGDefinitionsEngine(SearxTestCase):
self.assertEqual(result, 'Text in link') self.assertEqual(result, 'Text in link')
def test_request(self): def test_request(self):
duckduckgo_definitions.supported_languages = ['en-US', 'es-ES']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1

View File

@ -9,7 +9,6 @@ class TestDuckduckgoImagesEngine(SearxTestCase):
def test_request(self): def test_request(self):
duckduckgo_images.supported_languages = ['de-CH', 'en-US'] duckduckgo_images.supported_languages = ['de-CH', 'en-US']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['is_test'] = True dicto['is_test'] = True

View File

@ -15,6 +15,8 @@ class TestGoogleEngine(SearxTestCase):
return response return response
def test_request(self): def test_request(self):
google.supported_languages = ['en', 'fr', 'zh-CN']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
@ -31,6 +33,11 @@ class TestGoogleEngine(SearxTestCase):
self.assertIn('google.co', params['url']) self.assertIn('google.co', params['url'])
self.assertIn('en', params['headers']['Accept-Language']) self.assertIn('en', params['headers']['Accept-Language'])
dicto['language'] = 'zh'
params = google.request(query, dicto)
self.assertIn('google.com', params['url'])
self.assertIn('zh-CN', params['headers']['Accept-Language'])
def test_response(self): def test_response(self):
self.assertRaises(AttributeError, google.response, None) self.assertRaises(AttributeError, google.response, None)
self.assertRaises(AttributeError, google.response, []) self.assertRaises(AttributeError, google.response, [])

View File

@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
class TestGoogleNewsEngine(SearxTestCase): class TestGoogleNewsEngine(SearxTestCase):
def test_request(self): def test_request(self):
google_news.supported_languages = ['en-US', 'fr-FR']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1

View File

@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestQwantEngine(SearxTestCase): class TestQwantEngine(SearxTestCase):
def test_request(self): def test_request(self):
qwant.supported_languages = ['en-US', 'fr-CA', 'fr-FR']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 0 dicto['pageno'] = 0
@ -26,7 +27,6 @@ class TestQwantEngine(SearxTestCase):
self.assertIn('en_us', params['url']) self.assertIn('en_us', params['url'])
self.assertIn('news', params['url']) self.assertIn('news', params['url'])
qwant.supported_languages = ['en', 'fr-FR', 'fr-CA']
dicto['language'] = 'fr' dicto['language'] = 'fr'
params = qwant.request(query, dicto) params = qwant.request(query, dicto)
self.assertIn('fr_fr', params['url']) self.assertIn('fr_fr', params['url'])

View File

@ -7,6 +7,7 @@ from searx.testing import SearxTestCase
class TestSwisscowsEngine(SearxTestCase): class TestSwisscowsEngine(SearxTestCase):
def test_request(self): def test_request(self):
swisscows.supported_languages = ['de-AT', 'de-DE']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1

View File

@ -9,6 +9,7 @@ from searx.testing import SearxTestCase
class TestWikidataEngine(SearxTestCase): class TestWikidataEngine(SearxTestCase):
def test_request(self): def test_request(self):
wikidata.supported_languages = ['en', 'es']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['language'] = 'en-US' dicto['language'] = 'en-US'

View File

@ -25,11 +25,12 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual('https://this.is.the.url/', url) self.assertEqual('https://this.is.the.url/', url)
def test_request(self): def test_request(self):
yahoo.supported_languages = ['en', 'fr', 'zh-CHT', 'zh-CHS']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['time_range'] = '' dicto['time_range'] = ''
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
params = yahoo.request(query, dicto) params = yahoo.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])
@ -39,6 +40,16 @@ class TestYahooEngine(SearxTestCase):
self.assertIn('sB', params['cookies']) self.assertIn('sB', params['cookies'])
self.assertIn('fr', params['cookies']['sB']) self.assertIn('fr', params['cookies']['sB'])
dicto['language'] = 'zh'
params = yahoo.request(query, dicto)
self.assertIn('zh_chs', params['url'])
self.assertIn('zh_chs', params['cookies']['sB'])
dicto['language'] = 'zh-TW'
params = yahoo.request(query, dicto)
self.assertIn('zh_cht', params['url'])
self.assertIn('zh_cht', params['cookies']['sB'])
def test_no_url_in_request_year_time_range(self): def test_no_url_in_request_year_time_range(self):
dicto = defaultdict(dict) dicto = defaultdict(dict)
query = 'test_query' query = 'test_query'
@ -168,5 +179,5 @@ class TestYahooEngine(SearxTestCase):
self.assertEqual(type(languages), list) self.assertEqual(type(languages), list)
self.assertEqual(len(languages), 3) self.assertEqual(len(languages), 3)
self.assertIn('ar', languages) self.assertIn('ar', languages)
self.assertIn('zh-chs', languages) self.assertIn('zh-CHS', languages)
self.assertIn('zh-cht', languages) self.assertIn('zh-CHT', languages)

View File

@ -9,10 +9,11 @@ from searx.testing import SearxTestCase
class TestYahooNewsEngine(SearxTestCase): class TestYahooNewsEngine(SearxTestCase):
def test_request(self): def test_request(self):
yahoo_news.supported_languages = ['en', 'fr']
query = 'test_query' query = 'test_query'
dicto = defaultdict(dict) dicto = defaultdict(dict)
dicto['pageno'] = 1 dicto['pageno'] = 1
dicto['language'] = 'fr_FR' dicto['language'] = 'fr-FR'
params = yahoo_news.request(query, dicto) params = yahoo_news.request(query, dicto)
self.assertIn('url', params) self.assertIn('url', params)
self.assertIn(query, params['url']) self.assertIn(query, params['url'])

View File

@ -65,6 +65,31 @@ class TestUtils(SearxTestCase):
for test_url, expected in data: for test_url, expected in data:
self.assertEqual(utils.prettify_url(test_url, max_length=32), expected) self.assertEqual(utils.prettify_url(test_url, max_length=32), expected)
def test_match_language(self):
self.assertEqual(utils.match_language('es', ['es']), 'es')
self.assertEqual(utils.match_language('es', [], fallback='fallback'), 'fallback')
self.assertEqual(utils.match_language('ja', ['jp'], {'ja': 'jp'}), 'jp')
aliases = {'en-GB': 'en-UK', 'he': 'iw'}
# guess country
self.assertEqual(utils.match_language('de-DE', ['de']), 'de')
self.assertEqual(utils.match_language('de', ['de-DE']), 'de-DE')
self.assertEqual(utils.match_language('es-CO', ['es-AR', 'es-ES', 'es-MX']), 'es-ES')
self.assertEqual(utils.match_language('es-CO', ['es-MX']), 'es-MX')
self.assertEqual(utils.match_language('en-UK', ['en-AU', 'en-GB', 'en-US']), 'en-GB')
self.assertEqual(utils.match_language('en-GB', ['en-AU', 'en-UK', 'en-US'], aliases), 'en-UK')
# language aliases
self.assertEqual(utils.match_language('iw', ['he']), 'he')
self.assertEqual(utils.match_language('he', ['iw'], aliases), 'iw')
self.assertEqual(utils.match_language('iw-IL', ['he']), 'he')
self.assertEqual(utils.match_language('he-IL', ['iw'], aliases), 'iw')
self.assertEqual(utils.match_language('iw', ['he-IL']), 'he-IL')
self.assertEqual(utils.match_language('he', ['iw-IL'], aliases), 'iw-IL')
self.assertEqual(utils.match_language('iw-IL', ['he-IL']), 'he-IL')
self.assertEqual(utils.match_language('he-IL', ['iw-IL'], aliases), 'iw-IL')
class TestHTMLTextExtractor(SearxTestCase): class TestHTMLTextExtractor(SearxTestCase):

View File

@ -19,19 +19,6 @@ from searx.engines import initialize_engines, engines
engines_languages_file = 'engines_languages.json' engines_languages_file = 'engines_languages.json'
languages_file = 'languages.py' languages_file = 'languages.py'
# custom fixes for non standard locale codes
# sl-SL is technically not invalid, but still a mistake
# TODO: move to respective engines
locale_fixes = {
'sl-sl': 'sl-SI',
'ar-xa': 'ar-SA',
'es-xl': 'es-419',
'zh-chs': 'zh-Hans-CN',
'zh-cht': 'zh-Hant-TW',
'tzh-tw': 'zh-Hant-TW',
'tzh-hk': 'zh-Hant-HK'
}
# Fetchs supported languages for each engine and writes json file with those. # Fetchs supported languages for each engine and writes json file with those.
def fetch_supported_languages(): def fetch_supported_languages():
@ -76,8 +63,9 @@ def join_language_lists(engines_languages):
for lang_code in engines_languages[engine_name]: for lang_code in engines_languages[engine_name]:
# apply custom fixes if necessary # apply custom fixes if necessary
if lang_code.lower() in locale_fixes: if lang_code in getattr(engines[engine_name], 'language_aliases', {}).values():
lang_code = locale_fixes[lang_code.lower()] lang_code = next(lc for lc, alias in engines[engine_name].language_aliases.items()
if lang_code == alias)
locale = get_locale(lang_code) locale = get_locale(lang_code)