mirror of
https://github.com/facebookresearch/fairseq.git
synced 2024-09-21 22:27:16 +03:00
e75cff5f2c
Summary: The previous BSD+PATENTS license was controversial. We have been approved to relicense fairseq under the MIT license. Pull Request resolved: https://github.com/fairinternal/fairseq-py/pull/786 Differential Revision: D16560654 Pulled By: myleott fbshipit-source-id: f78b1beb4f2895dd7b9bfc79f5f952a2bfb94034
35 lines
787 B
Lua
35 lines
787 B
Lua
-- Copyright (c) Facebook, Inc. and its affiliates.
|
|
--
|
|
-- This source code is licensed under the MIT license found in the
|
|
-- LICENSE file in the root directory of this source tree.
|
|
--
|
|
-- Usage: convert_dictionary.lua <dict.th7>
|
|
require 'fairseq'
|
|
require 'torch'
|
|
require 'paths'
|
|
|
|
if #arg < 1 then
|
|
print('usage: convert_dictionary.lua <dict.th7>')
|
|
os.exit(1)
|
|
end
|
|
if not paths.filep(arg[1]) then
|
|
print('error: file does not exit: ' .. arg[1])
|
|
os.exit(1)
|
|
end
|
|
|
|
dict = torch.load(arg[1])
|
|
dst = paths.basename(arg[1]):gsub('.th7', '.txt')
|
|
assert(dst:match('.txt$'))
|
|
|
|
f = io.open(dst, 'w')
|
|
for idx, symbol in ipairs(dict.index_to_symbol) do
|
|
if idx > dict.cutoff then
|
|
break
|
|
end
|
|
f:write(symbol)
|
|
f:write(' ')
|
|
f:write(dict.index_to_freq[idx])
|
|
f:write('\n')
|
|
end
|
|
f:close()
|