diff --git a/Cargo.toml b/Cargo.toml
index 3400588..ecd1cfa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -30,7 +30,7 @@ all-tests = []
 features = [ "doc-only" ]
 
 [dependencies]
-rust_tokenizers = "~3.1.5"
+rust_tokenizers = {version = "~3.1.6", path = "E:/coding/backup-rust/rust-tokenizers/main/"}
 tch = "~0.1.7"
 serde_json = "1.0.51"
 serde = {version = "1.0.106", features = ["derive"]}
diff --git a/requirements.txt b/requirements.txt
index 000fefe..4663c9f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,2 @@
 torch == 1.5.0
-transformers == 2.8.0
\ No newline at end of file
+transformers == 2.10.0
\ No newline at end of file
diff --git a/utils/download-dependencies_albert.py b/utils/download-dependencies_albert.py
index 64bc432..58388a3 100644
--- a/utils/download-dependencies_albert.py
+++ b/utils/download-dependencies_albert.py
@@ -1,7 +1,6 @@
-from transformers import ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
 from transformers.configuration_albert import ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_albert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,13 +10,13 @@ import subprocess
 
 config_path = ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["albert-base-v2"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["albert-base-v2"]
-weights_path = ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP["albert-base-v2"]
+weights_path = "albert-base-v2"
 
 target_path = Path.home() / 'rustbert' / 'albert-base-v2'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_bart.py b/utils/download-dependencies_bart.py
index fbaa196..0361312 100644
--- a/utils/download-dependencies_bart.py
+++ b/utils/download-dependencies_bart.py
@@ -1,7 +1,6 @@
-from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
 from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_bart import vocab_url, merges_url
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -12,14 +11,14 @@ import subprocess
 config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large']
 vocab_path = vocab_url
 merges_path = merges_url
-weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large']
+weights_path = 'bart-large'
 
 target_path = Path.home() / 'rustbert' / 'bart-large'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_bart_cnn.py b/utils/download-dependencies_bart_cnn.py
index 1ec119c..b524cf8 100644
--- a/utils/download-dependencies_bart_cnn.py
+++ b/utils/download-dependencies_bart_cnn.py
@@ -1,7 +1,6 @@
-from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
 from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_bart import vocab_url, merges_url
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -12,14 +11,14 @@ import subprocess
 config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large-cnn']
 vocab_path = vocab_url
 merges_path = merges_url
-weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large-cnn']
+weights_path = 'bart-large-cnn'
 
 target_path = Path.home() / 'rustbert' / 'bart-large-cnn'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_bart_xsum.py b/utils/download-dependencies_bart_xsum.py
index c1e79a8..8aff45f 100644
--- a/utils/download-dependencies_bart_xsum.py
+++ b/utils/download-dependencies_bart_xsum.py
@@ -1,7 +1,6 @@
-from transformers import BART_PRETRAINED_MODEL_ARCHIVE_MAP
 from transformers.configuration_bart import BART_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_bart import vocab_url, merges_url
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -12,14 +11,14 @@ import subprocess
 config_path = BART_PRETRAINED_CONFIG_ARCHIVE_MAP['bart-large-xsum']
 vocab_path = vocab_url
 merges_path = merges_url
-weights_path = BART_PRETRAINED_MODEL_ARCHIVE_MAP['bart-large-xsum']
+weights_path = 'bart-large-xsum'
 
 target_path = Path.home() / 'rustbert' / 'bart-large-xsum'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_bert.py b/utils/download-dependencies_bert.py
index 2078407..2c79784 100644
--- a/utils/download-dependencies_bert.py
+++ b/utils/download-dependencies_bert.py
@@ -1,6 +1,6 @@
-from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_bert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP["bert-base-uncased"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["bert-base-uncased"]
-weights_path = BERT_PRETRAINED_MODEL_ARCHIVE_MAP["bert-base-uncased"]
+weights_path = "bert-base-uncased"
 
 target_path = Path.home() / 'rustbert' / 'bert'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_bert_qa.py b/utils/download-dependencies_bert_qa.py
index 7d68baa..f0595d9 100644
--- a/utils/download-dependencies_bert_qa.py
+++ b/utils/download-dependencies_bert_qa.py
@@ -1,6 +1,6 @@
-from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP, BERT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import BERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_bert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = BERT_PRETRAINED_CONFIG_ARCHIVE_MAP["bert-large-cased-whole-word-masking-finetuned-squad"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["bert-large-cased-whole-word-masking-finetuned-squad"]
-weights_path = BERT_PRETRAINED_MODEL_ARCHIVE_MAP["bert-large-cased-whole-word-masking-finetuned-squad"]
+weights_path = "bert-large-cased-whole-word-masking-finetuned-squad"
 
 target_path = Path.home() / 'rustbert' / 'bert-qa'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_distilbert-qa.py b/utils/download-dependencies_distilbert-qa.py
index 5141881..29b026e 100644
--- a/utils/download-dependencies_distilbert-qa.py
+++ b/utils/download-dependencies_distilbert-qa.py
@@ -1,6 +1,6 @@
-from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-cased-distilled-squad"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-cased-distilled-squad"]
-weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-cased-distilled-squad"]
+weights_path = "distilbert-base-cased-distilled-squad"
 
 target_path = Path.home() / 'rustbert' / 'distilbert-qa'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_distilbert.py b/utils/download-dependencies_distilbert.py
index 4f2b524..11cc339 100644
--- a/utils/download-dependencies_distilbert.py
+++ b/utils/download-dependencies_distilbert.py
@@ -1,6 +1,6 @@
-from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-uncased"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-uncased"]
-weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-uncased"]
+weights_path = "distilbert-base-uncased"
 
 target_path = Path.home() / 'rustbert' / 'distilbert'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_distilgpt2.py b/utils/download-dependencies_distilgpt2.py
index 6123c7a..52ce411 100644
--- a/utils/download-dependencies_distilgpt2.py
+++ b/utils/download-dependencies_distilgpt2.py
@@ -1,6 +1,6 @@
-from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["distilgpt2"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilgpt2"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["distilgpt2"]
-weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["distilgpt2"]
+weights_path = "distilgpt2"
 
 target_path = Path.home() / 'rustbert' / 'distilgpt2'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_electra-discriminator.py b/utils/download-dependencies_electra-discriminator.py
index d687e31..b9c4435 100644
--- a/utils/download-dependencies_electra-discriminator.py
+++ b/utils/download-dependencies_electra-discriminator.py
@@ -1,6 +1,6 @@
-from transformers import ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_electra import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP["google/electra-base-discriminator"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["google/electra-base-discriminator"]
-weights_path = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP["google/electra-base-discriminator"]
+weights_path = "google/electra-base-discriminator"
 
 target_path = Path.home() / 'rustbert' / 'electra-discriminator'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_electra-generator.py b/utils/download-dependencies_electra-generator.py
index 9834acf..ee7fb44 100644
--- a/utils/download-dependencies_electra-generator.py
+++ b/utils/download-dependencies_electra-generator.py
@@ -1,6 +1,6 @@
-from transformers import ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP, ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers import ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_electra import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = ELECTRA_PRETRAINED_CONFIG_ARCHIVE_MAP["google/electra-base-generator"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["google/electra-base-generator"]
-weights_path = ELECTRA_PRETRAINED_MODEL_ARCHIVE_MAP["google/electra-base-generator"]
+weights_path = "google/electra-base-generator"
 
 target_path = Path.home() / 'rustbert' / 'electra-generator'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_gpt2-large.py b/utils/download-dependencies_gpt2-large.py
index 7a447c6..1cf7290 100644
--- a/utils/download-dependencies_gpt2-large.py
+++ b/utils/download-dependencies_gpt2-large.py
@@ -1,6 +1,6 @@
-from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-large"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-large"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-large"]
-weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-large"]
+weights_path = "gpt2-large"
 
 target_path = Path.home() / 'rustbert' / 'gpt2-large'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_gpt2-medium.py b/utils/download-dependencies_gpt2-medium.py
index 9dc6291..0c75847 100644
--- a/utils/download-dependencies_gpt2-medium.py
+++ b/utils/download-dependencies_gpt2-medium.py
@@ -1,6 +1,6 @@
-from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-medium"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-medium"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-medium"]
-weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-medium"]
+weights_path = "gpt2-medium"
 
 target_path = Path.home() / 'rustbert' / 'gpt2-medium'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_gpt2-xl.py b/utils/download-dependencies_gpt2-xl.py
index 4b0916a..e95bc44 100644
--- a/utils/download-dependencies_gpt2-xl.py
+++ b/utils/download-dependencies_gpt2-xl.py
@@ -1,6 +1,6 @@
-from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2-xl"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2-xl"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2-xl"]
-weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2-xl"]
+weights_path = "gpt2-xl"
 
 target_path = Path.home() / 'rustbert' / 'gpt2-xl'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_gpt2.py b/utils/download-dependencies_gpt2.py
index 34f0b48..75193e1 100644
--- a/utils/download-dependencies_gpt2.py
+++ b/utils/download-dependencies_gpt2.py
@@ -1,6 +1,6 @@
-from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP, GPT2_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_gpt2 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = GPT2_PRETRAINED_CONFIG_ARCHIVE_MAP["gpt2"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["gpt2"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["gpt2"]
-weights_path = GPT2_PRETRAINED_MODEL_ARCHIVE_MAP["gpt2"]
+weights_path = "gpt2"
 
 target_path = Path.home() / 'rustbert' / 'gpt2'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_openaigpt.py b/utils/download-dependencies_openaigpt.py
index 8d2cc84..5a3335b 100644
--- a/utils/download-dependencies_openaigpt.py
+++ b/utils/download-dependencies_openaigpt.py
@@ -1,6 +1,6 @@
-from transformers import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP, OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_openai import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = OPENAI_GPT_PRETRAINED_CONFIG_ARCHIVE_MAP["openai-gpt"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["openai-gpt"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["openai-gpt"]
-weights_path = OPENAI_GPT_PRETRAINED_MODEL_ARCHIVE_MAP["openai-gpt"]
+weights_path = "openai-gpt"
 
 target_path = Path.home() / 'rustbert' / 'openai-gpt'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_roberta.py b/utils/download-dependencies_roberta.py
index 4ef29c9..8f7b024 100644
--- a/utils/download-dependencies_roberta.py
+++ b/utils/download-dependencies_roberta.py
@@ -1,6 +1,6 @@
-from transformers import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP, ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_roberta import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -11,14 +11,14 @@ import subprocess
 config_path = ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["roberta-base"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["roberta-base"]
 merges_path = PRETRAINED_VOCAB_FILES_MAP["merges_file"]["roberta-base"]
-weights_path = ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP["roberta-base"]
+weights_path = "roberta-base"
 
 target_path = Path.home() / 'rustbert' / 'roberta'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
 temp_merges = get_from_cache(merges_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_roberta_qa.py b/utils/download-dependencies_roberta_qa.py
new file mode 100644
index 0000000..c819f99
--- /dev/null
+++ b/utils/download-dependencies_roberta_qa.py
@@ -0,0 +1,52 @@
+from transformers.file_utils import get_from_cache, S3_BUCKET_PREFIX
+from pathlib import Path
+import shutil
+import os
+import numpy as np
+import torch
+import subprocess
+
+ROOT_PATH = S3_BUCKET_PREFIX + '/deepset/roberta-base-squad2'
+
+config_path = ROOT_PATH + '/config.json'
+vocab_path = ROOT_PATH + '/vocab.json'
+merges_path = ROOT_PATH + '/merges.txt'
+weights_path = ROOT_PATH + '/pytorch_model.bin'
+
+target_path = Path.home() / 'rustbert' / 'roberta-qa'
+
+temp_config = get_from_cache(config_path)
+temp_vocab = get_from_cache(vocab_path)
+temp_merges = get_from_cache(merges_path)
+temp_weights = get_from_cache(weights_path)
+
+os.makedirs(str(target_path), exist_ok=True)
+
+config_path = str(target_path / 'config.json')
+vocab_path = str(target_path / 'vocab.json')
+merges_path = str(target_path / 'merges.txt')
+model_path = str(target_path / 'model.bin')
+
+shutil.copy(temp_config, config_path)
+shutil.copy(temp_vocab, vocab_path)
+shutil.copy(temp_merges, merges_path)
+shutil.copy(temp_weights, model_path)
+
+weights = torch.load(temp_weights, map_location='cpu')
+nps = {}
+for k, v in weights.items():
+    k = k.replace("gamma", "weight").replace("beta", "bias")
+    nps[k] = np.ascontiguousarray(v.cpu().numpy())
+
+np.savez(target_path / 'model.npz', **nps)
+
+source = str(target_path / 'model.npz')
+target = str(target_path / 'model.ot')
+
+toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
+
+subprocess.call(
+    ['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
+
+os.remove(str(target_path / 'model.bin'))
+os.remove(str(target_path / 'model.npz'))
diff --git a/utils/download-dependencies_sst2_sentiment.py b/utils/download-dependencies_sst2_sentiment.py
index c9efd3b..fd5a430 100644
--- a/utils/download-dependencies_sst2_sentiment.py
+++ b/utils/download-dependencies_sst2_sentiment.py
@@ -1,6 +1,6 @@
-from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP, DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
+from transformers import DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_distilbert import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = DISTILBERT_PRETRAINED_CONFIG_ARCHIVE_MAP["distilbert-base-uncased-finetuned-sst-2-english"]
 vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["distilbert-base-uncased"]
-weights_path = DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP["distilbert-base-uncased-finetuned-sst-2-english"]
+weights_path = "distilbert-base-uncased-finetuned-sst-2-english"
 
 target_path = Path.home() / 'rustbert' / 'distilbert-sst2'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_t5_base.py b/utils/download-dependencies_t5_base.py
index 4274ea1..c711e61 100644
--- a/utils/download-dependencies_t5_base.py
+++ b/utils/download-dependencies_t5_base.py
@@ -1,6 +1,6 @@
-from transformers import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers import T5_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_t5 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = T5_PRETRAINED_CONFIG_ARCHIVE_MAP['t5-base']
 vocab_path = PRETRAINED_VOCAB_FILES_MAP['vocab_file']['t5-base']
-weights_path = T5_PRETRAINED_MODEL_ARCHIVE_MAP['t5-base']
+weights_path = 't5-base'
 
 target_path = Path.home() / 'rustbert' / 't5-base'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_t5_small.py b/utils/download-dependencies_t5_small.py
index 31e754d..bcb47e3 100644
--- a/utils/download-dependencies_t5_small.py
+++ b/utils/download-dependencies_t5_small.py
@@ -1,6 +1,6 @@
-from transformers import T5_PRETRAINED_MODEL_ARCHIVE_MAP, T5_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers import T5_PRETRAINED_CONFIG_ARCHIVE_MAP
 from transformers.tokenization_t5 import PRETRAINED_VOCAB_FILES_MAP
-from transformers.file_utils import get_from_cache
+from transformers.file_utils import get_from_cache, hf_bucket_url
 from pathlib import Path
 import shutil
 import os
@@ -10,13 +10,13 @@ import subprocess
 
 config_path = T5_PRETRAINED_CONFIG_ARCHIVE_MAP['t5-small']
 vocab_path = PRETRAINED_VOCAB_FILES_MAP['vocab_file']['t5-small']
-weights_path = T5_PRETRAINED_MODEL_ARCHIVE_MAP['t5-small']
+weights_path = 't5-small'
 
 target_path = Path.home() / 'rustbert' / 't5-small'
 
 temp_config = get_from_cache(config_path)
 temp_vocab = get_from_cache(vocab_path)
-temp_weights = get_from_cache(weights_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
 
 os.makedirs(str(target_path), exist_ok=True)
 
diff --git a/utils/download-dependencies_xlm_roberta_ner_dutch.py b/utils/download-dependencies_xlm_roberta_ner_dutch.py
new file mode 100644
index 0000000..c63ee84
--- /dev/null
+++ b/utils/download-dependencies_xlm_roberta_ner_dutch.py
@@ -0,0 +1,48 @@
+from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
+from transformers.file_utils import get_from_cache,hf_bucket_url
+from pathlib import Path
+import shutil
+import os
+import numpy as np
+import torch
+import subprocess
+
+config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll02-dutch"]
+vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll02-dutch"]
+weights_path = "xlm-roberta-large-finetuned-conll02-dutch"
+
+target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-nl'
+
+temp_config = get_from_cache(config_path)
+temp_vocab = get_from_cache(vocab_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
+
+os.makedirs(str(target_path), exist_ok=True)
+
+config_path = str(target_path / 'config.json')
+vocab_path = str(target_path / 'spiece.model')
+model_path = str(target_path / 'model.bin')
+
+shutil.copy(temp_config, config_path)
+shutil.copy(temp_vocab, vocab_path)
+shutil.copy(temp_weights, model_path)
+
+weights = torch.load(temp_weights, map_location='cpu')
+nps = {}
+for k, v in weights.items():
+    k = k.replace("gamma", "weight").replace("beta", "bias")
+    nps[k] = np.ascontiguousarray(v.cpu().numpy())
+
+np.savez(target_path / 'model.npz', **nps)
+
+source = str(target_path / 'model.npz')
+target = str(target_path / 'model.ot')
+
+toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
+
+subprocess.call(
+    ['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
+
+os.remove(str(target_path / 'model.bin'))
+os.remove(str(target_path / 'model.npz'))
diff --git a/utils/download-dependencies_xlm_roberta_ner_english.py b/utils/download-dependencies_xlm_roberta_ner_english.py
new file mode 100644
index 0000000..33c60dc
--- /dev/null
+++ b/utils/download-dependencies_xlm_roberta_ner_english.py
@@ -0,0 +1,48 @@
+from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
+from transformers.file_utils import get_from_cache,hf_bucket_url
+from pathlib import Path
+import shutil
+import os
+import numpy as np
+import torch
+import subprocess
+
+config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll03-english"]
+vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll03-english"]
+weights_path = "xlm-roberta-large-finetuned-conll03-english"
+
+target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-en'
+
+temp_config = get_from_cache(config_path)
+temp_vocab = get_from_cache(vocab_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
+
+os.makedirs(str(target_path), exist_ok=True)
+
+config_path = str(target_path / 'config.json')
+vocab_path = str(target_path / 'spiece.model')
+model_path = str(target_path / 'model.bin')
+
+shutil.copy(temp_config, config_path)
+shutil.copy(temp_vocab, vocab_path)
+shutil.copy(temp_weights, model_path)
+
+weights = torch.load(temp_weights, map_location='cpu')
+nps = {}
+for k, v in weights.items():
+    k = k.replace("gamma", "weight").replace("beta", "bias")
+    nps[k] = np.ascontiguousarray(v.cpu().numpy())
+
+np.savez(target_path / 'model.npz', **nps)
+
+source = str(target_path / 'model.npz')
+target = str(target_path / 'model.ot')
+
+toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
+
+subprocess.call(
+    ['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
+
+os.remove(str(target_path / 'model.bin'))
+os.remove(str(target_path / 'model.npz'))
diff --git a/utils/download-dependencies_xlm_roberta_ner_german.py b/utils/download-dependencies_xlm_roberta_ner_german.py
new file mode 100644
index 0000000..42ef2f3
--- /dev/null
+++ b/utils/download-dependencies_xlm_roberta_ner_german.py
@@ -0,0 +1,48 @@
+from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
+from transformers.file_utils import get_from_cache,hf_bucket_url
+from pathlib import Path
+import shutil
+import os
+import numpy as np
+import torch
+import subprocess
+
+config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll03-german"]
+vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll03-german"]
+weights_path = "xlm-roberta-large-finetuned-conll03-german"
+
+target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-de'
+
+temp_config = get_from_cache(config_path)
+temp_vocab = get_from_cache(vocab_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
+
+os.makedirs(str(target_path), exist_ok=True)
+
+config_path = str(target_path / 'config.json')
+vocab_path = str(target_path / 'spiece.model')
+model_path = str(target_path / 'model.bin')
+
+shutil.copy(temp_config, config_path)
+shutil.copy(temp_vocab, vocab_path)
+shutil.copy(temp_weights, model_path)
+
+weights = torch.load(temp_weights, map_location='cpu')
+nps = {}
+for k, v in weights.items():
+    k = k.replace("gamma", "weight").replace("beta", "bias")
+    nps[k] = np.ascontiguousarray(v.cpu().numpy())
+
+np.savez(target_path / 'model.npz', **nps)
+
+source = str(target_path / 'model.npz')
+target = str(target_path / 'model.ot')
+
+toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
+
+subprocess.call(
+    ['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
+
+os.remove(str(target_path / 'model.bin'))
+os.remove(str(target_path / 'model.npz'))
diff --git a/utils/download-dependencies_xlm_roberta_ner_spanish.py b/utils/download-dependencies_xlm_roberta_ner_spanish.py
new file mode 100644
index 0000000..edacb95
--- /dev/null
+++ b/utils/download-dependencies_xlm_roberta_ner_spanish.py
@@ -0,0 +1,48 @@
+from transformers import XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP
+from transformers.tokenization_xlm_roberta import PRETRAINED_VOCAB_FILES_MAP
+from transformers.file_utils import get_from_cache,hf_bucket_url
+from pathlib import Path
+import shutil
+import os
+import numpy as np
+import torch
+import subprocess
+
+config_path = XLM_ROBERTA_PRETRAINED_CONFIG_ARCHIVE_MAP["xlm-roberta-large-finetuned-conll02-spanish"]
+vocab_path = PRETRAINED_VOCAB_FILES_MAP["vocab_file"]["xlm-roberta-large-finetuned-conll02-spanish"]
+weights_path = "xlm-roberta-large-finetuned-conll02-spanish"
+
+target_path = Path.home() / 'rustbert' / 'xlm-roberta-ner-es'
+
+temp_config = get_from_cache(config_path)
+temp_vocab = get_from_cache(vocab_path)
+temp_weights = get_from_cache(hf_bucket_url(weights_path, filename="pytorch_model.bin", use_cdn=True))
+
+os.makedirs(str(target_path), exist_ok=True)
+
+config_path = str(target_path / 'config.json')
+vocab_path = str(target_path / 'spiece.model')
+model_path = str(target_path / 'model.bin')
+
+shutil.copy(temp_config, config_path)
+shutil.copy(temp_vocab, vocab_path)
+shutil.copy(temp_weights, model_path)
+
+weights = torch.load(temp_weights, map_location='cpu')
+nps = {}
+for k, v in weights.items():
+    k = k.replace("gamma", "weight").replace("beta", "bias")
+    nps[k] = np.ascontiguousarray(v.cpu().numpy())
+
+np.savez(target_path / 'model.npz', **nps)
+
+source = str(target_path / 'model.npz')
+target = str(target_path / 'model.ot')
+
+toml_location = (Path(__file__).resolve() / '..' / '..' / 'Cargo.toml').resolve()
+
+subprocess.call(
+    ['cargo', 'run', '--bin=convert-tensor', '--manifest-path=%s' % toml_location, '--', source, target])
+
+os.remove(str(target_path / 'model.bin'))
+os.remove(str(target_path / 'model.npz'))