Updated script for better splitting of synonyms
This commit is contained in:
1072
json/da.json
1072
json/da.json
File diff suppressed because it is too large
Load Diff
865
json/de.json
865
json/de.json
File diff suppressed because it is too large
Load Diff
634
json/en.json
634
json/en.json
File diff suppressed because it is too large
Load Diff
3429
json/es.json
3429
json/es.json
File diff suppressed because it is too large
Load Diff
1555
json/fi.json
1555
json/fi.json
File diff suppressed because it is too large
Load Diff
4089
json/fr.json
4089
json/fr.json
File diff suppressed because it is too large
Load Diff
3436
json/it.json
3436
json/it.json
File diff suppressed because it is too large
Load Diff
1300
json/nl.json
1300
json/nl.json
File diff suppressed because it is too large
Load Diff
870
json/no.json
870
json/no.json
File diff suppressed because it is too large
Load Diff
3022
json/pl.json
3022
json/pl.json
File diff suppressed because it is too large
Load Diff
3748
json/pt-br.json
3748
json/pt-br.json
File diff suppressed because it is too large
Load Diff
3782
json/pt.json
3782
json/pt.json
File diff suppressed because it is too large
Load Diff
2996
json/ru.json
2996
json/ru.json
File diff suppressed because it is too large
Load Diff
1349
json/sv.json
1349
json/sv.json
File diff suppressed because it is too large
Load Diff
3486
json/tr.json
3486
json/tr.json
File diff suppressed because it is too large
Load Diff
2836
json/ua.json
2836
json/ua.json
File diff suppressed because it is too large
Load Diff
@@ -80,7 +80,10 @@ for lang in langs:
|
|||||||
syns_raw = category['Synonyms']
|
syns_raw = category['Synonyms']
|
||||||
assert type(syns_raw) == str, \
|
assert type(syns_raw) == str, \
|
||||||
f"Synonym list (lang: {lang}, {category['CatID']}) was not readable"
|
f"Synonym list (lang: {lang}, {category['CatID']}) was not readable"
|
||||||
syn_list = re.split(r'\W+', syns_raw)
|
split_pattern = r',\s*'
|
||||||
|
if lang in ['zh','ar','kr','ja','tw']:
|
||||||
|
split_pattern = r'\W+'
|
||||||
|
syn_list = re.split(split_pattern, syns_raw)
|
||||||
category['Synonyms'] = [s.lower() for s in syn_list]
|
category['Synonyms'] = [s.lower() for s in syn_list]
|
||||||
|
|
||||||
schedule.append(category)
|
schedule.append(category)
|
||||||
|
Reference in New Issue
Block a user