Updated dependencies download script for transformers>=2.10

This commit is contained in:
Guillaume B 2020-07-20 19:30:19 +02:00
parent 73571e9b92
commit 65135227cf
27 changed files with 322 additions and 82 deletions

View File

@ -30,7 +30,7 @@ all-tests = []
features = [ "doc-only" ]
[dependencies]
rust_tokenizers = "~3.1.5"
rust_tokenizers = {version = "~3.1.6", path = "E:/coding/backup-rust/rust-tokenizers/main/"}
tch = "~0.1.7"
serde_json = "1.0.51"
serde = {version = "1.0.106", features = ["derive"]}

View File

@ -1,2 +1,2 @@
torch == 1.5.0
transformers == 2.8.0
transformers == 2.10.0

View File

@ -1,7 +1,6 @@
from transformers import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_albert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,13 +10,13 @@ import subprocess
config_path = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["albert-base-v2"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["albert-base-v2"]
weights_path = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP["albert-base-v2"]
weights_path = "albert-base-v2"
target_path = Path.home() / 'rustbert' / 'albert-base-v2'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,7 +1,6 @@
from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_bart import vocab_url, merges_url
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -12,14 +11,14 @@ import subprocess
config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large']
vocab_path = vocab_url
merges_path = merges_url
weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large']
weights_path = 'bart-large'
target_path = Path.home() / 'rustbert' / 'bart-large'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,7 +1,6 @@
from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_bart import vocab_url, merges_url
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -12,14 +11,14 @@ import subprocess
config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large-cnn']
vocab_path = vocab_url
merges_path = merges_url
weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large-cnn']
weights_path = 'bart-large-cnn'
target_path = Path.home() / 'rustbert' / 'bart-large-cnn'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,7 +1,6 @@
from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_bart import vocab_url, merges_url
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -12,14 +11,14 @@ import subprocess
config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large-xsum']
vocab_path = vocab_url
merges_path = merges_url
weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large-xsum']
weights_path = 'bart-large-xsum'
target_path = Path.home() / 'rustbert' / 'bart-large-xsum'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_bert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP["bert-base-uncased"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["bert-base-uncased"]
weights_path = BERT_PRETRAINED_MODEL_ARCHIVE_MAP["bert-base-uncased"]
weights_path = "bert-base-uncased"
target_path = Path.home() / 'rustbert' / 'bert'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_bert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP["bert-large-cased-whole-word-masking-finetuned-squad"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["bert-large-cased-whole-word-masking-finetuned-squad"]
weights_path = BERT_PRETRAINED_MODEL_ARCHIVE_MAP["bert-large-cased-whole-word-masking-finetuned-squad"]
weights_path = "bert-large-cased-whole-word-masking-finetuned-squad"
target_path = Path.home() / 'rustbert' / 'bert-qa'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-cased-distilled-squad"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-cased-distilled-squad"]
weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-cased-distilled-squad"]
weights_path = "distilbert-base-cased-distilled-squad"
target_path = Path.home() / 'rustbert' / 'distilbert-qa'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-uncased"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-uncased"]
weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-uncased"]
weights_path = "distilbert-base-uncased"
target_path = Path.home() / 'rustbert' / 'distilbert'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["distilgpt2"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilgpt2"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["distilgpt2"]
weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["distilgpt2"]
weights_path = "distilgpt2"
target_path = Path.home() / 'rustbert' / 'distilgpt2'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_electra import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP["google/electra-base-discriminator"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["google/electra-base-discriminator"]
weights_path = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP["google/electra-base-discriminator"]
weights_path = "google/electra-base-discriminator"
target_path = Path.home() / 'rustbert' / 'electra-discriminator'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_electra import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP["google/electra-base-generator"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["google/electra-base-generator"]
weights_path = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP["google/electra-base-generator"]
weights_path = "google/electra-base-generator"
target_path = Path.home() / 'rustbert' / 'electra-generator'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-large"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-large"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-large"]
weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-large"]
weights_path = "gpt2-large"
target_path = Path.home() / 'rustbert' / 'gpt2-large'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-medium"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-medium"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-medium"]
weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-medium"]
weights_path = "gpt2-medium"
target_path = Path.home() / 'rustbert' / 'gpt2-medium'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-xl"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-xl"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-xl"]
weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-xl"]
weights_path = "gpt2-xl"
target_path = Path.home() / 'rustbert' / 'gpt2-xl'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2"]
weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2"]
weights_path = "gpt2"
target_path = Path.home() / 'rustbert' / 'gpt2'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_openai import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP["openai-gpt"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["openai-gpt"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["openai-gpt"]
weights_path = OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP["openai-gpt"]
weights_path = "openai-gpt"
target_path = Path.home() / 'rustbert' / 'openai-gpt'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_roberta import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -11,14 +11,14 @@ import subprocess
config_path = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["roberta-base"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["roberta-base"]
merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["roberta-base"]
weights_path = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP["roberta-base"]
weights_path = "roberta-base"
target_path = Path.home() / 'rustbert' / 'roberta'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -0,0 +1,52 @@
from transformers.file_utils import get_from_cache, S3_BUCKET_PREFIX
from pathlib import Path
import shutil
import os
import numpy as np
import torch
import subprocess
ROOT_PATH = S3_BUCKET_PREFIX + '/deepset/roberta-base-squad2'
config_path = ROOT_PATH + '/config.json'
vocab_path = ROOT_PATH + '/vocab.json'
merges_path = ROOT_PATH + '/merges.txt'
weights_path = ROOT_PATH + '/pytorch_model.bin'
target_path = Path.home() / 'rustbert' / 'roberta-qa'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_merges = get_from_cache(merges_path)
temp_weights = get_from_cache(weights_path)
os.makedirs(str(target_path), exist_ok=True)
config_path = str(target_path / 'config.json')
vocab_path = str(target_path / 'vocab.json')
merges_path = str(target_path / 'merges.txt')
model_path = str(target_path / 'model.bin')
shutil.copy(temp_config, config_path)
shutil.copy(temp_vocab, vocab_path)
shutil.copy(temp_merges, merges_path)
shutil.copy(temp_weights, model_path)
weights = torch.load(temp_weights, map_location='cpu')
nps = {}
for k, v in weights.items():
k = k.replace("gamma", "weight").replace("beta", "bias")
nps[k] = np.ascontiguousarray(v.cpu().numpy())
np.savez(target_path / 'model.npz', **nps)
source = str(target_path / 'model.npz')
target = str(target_path / 'model.ot')
toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
subprocess.call(
['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
os.remove(str(target_path / 'model.bin'))
os.remove(str(target_path / 'model.npz'))

View File

@ -1,6 +1,6 @@
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-uncased-finetuned-sst-2-english"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-uncased"]
weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-uncased-finetuned-sst-2-english"]
weights_path = "distilbert-base-uncased-finetuned-sst-2-english"
target_path = Path.home() / 'rustbert' / 'distilbert-sst2'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers import T5_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_t5 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = T5_PRETRAINED_CONFIG_ARCHIVE_MAP['t5-base']
vocab_path = PRETRAINED_VOCAB_FILES_MAP['vocab_file']['t5-base']
weights_path = T5_PRETRAINED_MODEL_ARCHIVE_MAP['t5-base']
weights_path = 't5-base'
target_path = Path.home() / 'rustbert' / 't5-base'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -1,6 +1,6 @@
from transformers import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers import T5_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_t5 import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache
from transformers.file_utils import get_from_cache, hf_bucket_url
from pathlib import Path
import shutil
import os
@ -10,13 +10,13 @@ import subprocess
config_path = T5_PRETRAINED_CONFIG_ARCHIVE_MAP['t5-small']
vocab_path = PRETRAINED_VOCAB_FILES_MAP['vocab_file']['t5-small']
weights_path = T5_PRETRAINED_MODEL_ARCHIVE_MAP['t5-small']
weights_path = 't5-small'
target_path = Path.home() / 'rustbert' / 't5-small'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(weights_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)

View File

@ -0,0 +1,48 @@
from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache,hf_bucket_url
from pathlib import Path
import shutil
import os
import numpy as np
import torch
import subprocess
config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll02-dutch"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll02-dutch"]
weights_path = "xlm-roberta-large-finetuned-conll02-dutch"
target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-nl'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)
config_path = str(target_path / 'config.json')
vocab_path = str(target_path / 'spiece.model')
model_path = str(target_path / 'model.bin')
shutil.copy(temp_config, config_path)
shutil.copy(temp_vocab, vocab_path)
shutil.copy(temp_weights, model_path)
weights = torch.load(temp_weights, map_location='cpu')
nps = {}
for k, v in weights.items():
k = k.replace("gamma", "weight").replace("beta", "bias")
nps[k] = np.ascontiguousarray(v.cpu().numpy())
np.savez(target_path / 'model.npz', **nps)
source = str(target_path / 'model.npz')
target = str(target_path / 'model.ot')
toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
subprocess.call(
['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
os.remove(str(target_path / 'model.bin'))
os.remove(str(target_path / 'model.npz'))

View File

@ -0,0 +1,48 @@
from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache,hf_bucket_url
from pathlib import Path
import shutil
import os
import numpy as np
import torch
import subprocess
config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll03-english"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll03-english"]
weights_path = "xlm-roberta-large-finetuned-conll03-english"
target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-en'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)
config_path = str(target_path / 'config.json')
vocab_path = str(target_path / 'spiece.model')
model_path = str(target_path / 'model.bin')
shutil.copy(temp_config, config_path)
shutil.copy(temp_vocab, vocab_path)
shutil.copy(temp_weights, model_path)
weights = torch.load(temp_weights, map_location='cpu')
nps = {}
for k, v in weights.items():
k = k.replace("gamma", "weight").replace("beta", "bias")
nps[k] = np.ascontiguousarray(v.cpu().numpy())
np.savez(target_path / 'model.npz', **nps)
source = str(target_path / 'model.npz')
target = str(target_path / 'model.ot')
toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
subprocess.call(
['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
os.remove(str(target_path / 'model.bin'))
os.remove(str(target_path / 'model.npz'))

View File

@ -0,0 +1,48 @@
from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache,hf_bucket_url
from pathlib import Path
import shutil
import os
import numpy as np
import torch
import subprocess
config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll03-german"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll03-german"]
weights_path = "xlm-roberta-large-finetuned-conll03-german"
target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-de'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)
config_path = str(target_path / 'config.json')
vocab_path = str(target_path / 'spiece.model')
model_path = str(target_path / 'model.bin')
shutil.copy(temp_config, config_path)
shutil.copy(temp_vocab, vocab_path)
shutil.copy(temp_weights, model_path)
weights = torch.load(temp_weights, map_location='cpu')
nps = {}
for k, v in weights.items():
k = k.replace("gamma", "weight").replace("beta", "bias")
nps[k] = np.ascontiguousarray(v.cpu().numpy())
np.savez(target_path / 'model.npz', **nps)
source = str(target_path / 'model.npz')
target = str(target_path / 'model.ot')
toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
subprocess.call(
['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
os.remove(str(target_path / 'model.bin'))
os.remove(str(target_path / 'model.npz'))

View File

@ -0,0 +1,48 @@
from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
from transformers.file_utils import get_from_cache,hf_bucket_url
from pathlib import Path
import shutil
import os
import numpy as np
import torch
import subprocess
config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll02-spanish"]
vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll02-spanish"]
weights_path = "xlm-roberta-large-finetuned-conll02-spanish"
target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-es'
temp_config = get_from_cache(config_path)
temp_vocab = get_from_cache(vocab_path)
temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
os.makedirs(str(target_path), exist_ok=True)
config_path = str(target_path / 'config.json')
vocab_path = str(target_path / 'spiece.model')
model_path = str(target_path / 'model.bin')
shutil.copy(temp_config, config_path)
shutil.copy(temp_vocab, vocab_path)
shutil.copy(temp_weights, model_path)
weights = torch.load(temp_weights, map_location='cpu')
nps = {}
for k, v in weights.items():
k = k.replace("gamma", "weight").replace("beta", "bias")
nps[k] = np.ascontiguousarray(v.cpu().numpy())
np.savez(target_path / 'model.npz', **nps)
source = str(target_path / 'model.npz')
target = str(target_path / 'model.ot')
toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
subprocess.call(
['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
os.remove(str(target_path / 'model.bin'))
os.remove(str(target_path / 'model.npz'))