mirror of
https://github.com/rsennrich/subword-nmt.git
synced 2024-11-22 23:30:16 +03:00
progress bar and version bump
This commit is contained in:
parent
823c880e4b
commit
7bae758b2e
@ -1,6 +1,12 @@
|
||||
CHANGELOG
|
||||
---------
|
||||
|
||||
v0.3.8:
|
||||
- multiprocessing support (get_vocab and apply_bpe)
|
||||
- progress bar for learn_bpe
|
||||
- seed parameter for deterministic BPE dropout
|
||||
- ignore some unicode line separators which would crash subword-nmt
|
||||
|
||||
v0.3.7:
|
||||
- BPE dropout (Provilkov et al., 2019)
|
||||
- more efficient glossaries (https://github.com/rsennrich/subword-nmt/pull/69)
|
||||
|
4
setup.py
4
setup.py
@ -11,7 +11,7 @@ def test_suite():
|
||||
|
||||
setup(
|
||||
name='subword_nmt',
|
||||
version='0.3.7',
|
||||
version='0.3.8',
|
||||
description='Unsupervised Word Segmentation for Neural Machine Translation and Text Generation',
|
||||
long_description=(codecs.open("README.md", encoding='utf-8').read() +
|
||||
"\n\n" + codecs.open("CHANGELOG.md", encoding='utf-8').read()),
|
||||
@ -28,6 +28,8 @@ setup(
|
||||
'Programming Language :: Python :: 2',
|
||||
'Programming Language :: Python :: 3',
|
||||
],
|
||||
install_requires=['mock',
|
||||
'tqdm'],
|
||||
packages=find_packages(),
|
||||
entry_points={
|
||||
'console_scripts': ['subword-nmt=subword_nmt.subword_nmt:main'],
|
||||
|
@ -25,6 +25,12 @@ import tempfile
|
||||
from multiprocessing import Pool, cpu_count
|
||||
from collections import defaultdict, Counter
|
||||
|
||||
try:
|
||||
from tqdm import tqdm
|
||||
except ImportError:
|
||||
def tqdm(iterator, *args, **kwargs):
|
||||
return iterator
|
||||
|
||||
# hack for python2/3 compatibility
|
||||
from io import open
|
||||
argparse.open = open
|
||||
@ -294,7 +300,7 @@ def learn_bpe(infile, outfile, num_symbols, min_frequency=2, verbose=False, is_d
|
||||
|
||||
# threshold is inspired by Zipfian assumption, but should only affect speed
|
||||
threshold = max(stats.values()) / 10
|
||||
for i in range(num_symbols):
|
||||
for i in tqdm(range(num_symbols)):
|
||||
if stats:
|
||||
most_frequent = max(stats, key=lambda x: (stats[x], x))
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user