Updated tokenization dependency (no longer requires rust nightly)

This commit is contained in:
Guillaume B 2020-02-15 11:37:50 +01:00
parent 9ba56e1b81
commit a7264ff0f2
4 changed files with 9 additions and 10 deletions

View File

@ -21,7 +21,7 @@ name = "convert-tensor"
path = "src/convert-tensor.rs"
[dependencies]
rust_transformers = "0.2.0"
rust_tokenizers = "1.0.0"
tch = "0.1.6"
serde_json = "1.0.45"
serde = {version = "1.0.104", features = ["derive"]}

View File

@ -33,10 +33,9 @@ The model configuration and vocabulary are downloaded directly from Huggingface'
The model weights need to be converter to a binary format that can be read by Libtorch (the original `.pth` files are pickled and cannot be used directly). A Python script for downloading the required files & running the necessary steps is provided.
1. Install the Rust nightly toolchain (https://www.rust-lang.org/tools/install)
2. Compile the package: `cargo build --release`
3. Download the model files & perform necessary conversions
1. Compile the package: `cargo build --release`
2. Download the model files & perform necessary conversions
- Set-up a virtual environment and install dependencies
- run the conversion script `python /utils/download-dependencies.py`. The dependencies will be downloaded to the user's home directory, under `~/rustbert`
4. Run the example `cargo run --release`
3. Run the example `cargo run --release`

View File

@ -1,9 +1,9 @@
use std::path::PathBuf;
use tch::{Device, Tensor, nn, no_grad};
use rust_bert::distilbert::distilbert::{DistilBertModelMaskedLM, DistilBertConfig};
use rust_transformers::preprocessing::tokenizer::base_tokenizer::{Tokenizer, TruncationStrategy};
use rust_transformers::bert_tokenizer::BertTokenizer;
use rust_transformers::preprocessing::vocab::base_vocab::Vocab;
use rust_tokenizers::preprocessing::tokenizer::base_tokenizer::{Tokenizer, TruncationStrategy};
use rust_tokenizers::bert_tokenizer::BertTokenizer;
use rust_tokenizers::preprocessing::vocab::base_vocab::Vocab;
extern crate failure;
extern crate dirs;

View File

@ -1,9 +1,9 @@
use rust_transformers::bert_tokenizer::BertTokenizer;
use rust_tokenizers::bert_tokenizer::BertTokenizer;
use crate::distilbert::distilbert::{DistilBertModelClassifier, DistilBertConfig};
use std::path::Path;
use tch::{Device, Tensor, Kind, no_grad};
use tch::nn::VarStore;
use rust_transformers::preprocessing::tokenizer::base_tokenizer::{TruncationStrategy, MultiThreadedTokenizer};
use rust_tokenizers::preprocessing::tokenizer::base_tokenizer::{TruncationStrategy, MultiThreadedTokenizer};
#[derive(Debug, PartialEq)]