Updated script for better splitting of synonyms
This commit is contained in:
1074
json/da.json
1074
json/da.json
File diff suppressed because it is too large
Load Diff
865
json/de.json
865
json/de.json
File diff suppressed because it is too large
Load Diff
634
json/en.json
634
json/en.json
File diff suppressed because it is too large
Load Diff
3431
json/es.json
3431
json/es.json
File diff suppressed because it is too large
Load Diff
1557
json/fi.json
1557
json/fi.json
File diff suppressed because it is too large
Load Diff
4089
json/fr.json
4089
json/fr.json
File diff suppressed because it is too large
Load Diff
3436
json/it.json
3436
json/it.json
File diff suppressed because it is too large
Load Diff
1300
json/nl.json
1300
json/nl.json
File diff suppressed because it is too large
Load Diff
878
json/no.json
878
json/no.json
File diff suppressed because it is too large
Load Diff
3024
json/pl.json
3024
json/pl.json
File diff suppressed because it is too large
Load Diff
3750
json/pt-br.json
3750
json/pt-br.json
File diff suppressed because it is too large
Load Diff
3784
json/pt.json
3784
json/pt.json
File diff suppressed because it is too large
Load Diff
2996
json/ru.json
2996
json/ru.json
File diff suppressed because it is too large
Load Diff
1349
json/sv.json
1349
json/sv.json
File diff suppressed because it is too large
Load Diff
3486
json/tr.json
3486
json/tr.json
File diff suppressed because it is too large
Load Diff
2836
json/ua.json
2836
json/ua.json
File diff suppressed because it is too large
Load Diff
@@ -80,7 +80,10 @@ for lang in langs:
|
||||
syns_raw = category['Synonyms']
|
||||
assert type(syns_raw) == str, \
|
||||
f"Synonym list (lang: {lang}, {category['CatID']}) was not readable"
|
||||
syn_list = re.split(r'\W+', syns_raw)
|
||||
split_pattern = r',\s*'
|
||||
if lang in ['zh','ar','kr','ja','tw']:
|
||||
split_pattern = r'\W+'
|
||||
syn_list = re.split(split_pattern, syns_raw)
|
||||
category['Synonyms'] = [s.lower() for s in syn_list]
|
||||
|
||||
schedule.append(category)
|
||||
|
Reference in New Issue
Block a user