Merge branch 'master' of github.com:Helsinki-NLP/OPUS-MT-train

This commit is contained in:
Joerg Tiedemann 2021-11-02 18:48:19 +02:00
commit dbcdab4d6b
2 changed files with 34 additions and 0 deletions

View File

@ -1,4 +1,8 @@
# Integration
https://github.com/UKPLab/EasyNMT
# more efficient parallelisation

30
scripts/fix_vocab.py Executable file
View File

@ -0,0 +1,30 @@
#!/usr/bin/env python3
#-*-python-*-
import yaml
import sys
from shutil import copyfile
filename = sys.argv[1]
try:
input = open(filename, 'r')
yaml.load(input)
except:
print('YAML file is broken - try to fix it!')
print(f'copy {filename} to {filename}.bak')
copyfile(filename, f'{filename}.bak')
vocab={}
# for line in sys.stdin:
with open(filename) as fh:
for line in fh:
parts = line.rstrip().split(': ')
parts[0] = parts[0][1:-1]
vocab[parts[0]] = int(parts[1])
print(f'write a new version of {filename}')
output = open(filename, 'w')
yaml.dump(vocab, output, allow_unicode=True)