rust-bert/examples/sentence_embeddings_local.rs
guillaume-be 84561ec82b
Tokenizer special token map update (#330)
* Updates for compatibility with tokenizers special token rework

* Updated mask pipline methods

* Bumped version

* Fix clippy warnings
2023-01-30 17:53:18 +00:00

38 lines
1.3 KiB
Rust

use rust_bert::pipelines::sentence_embeddings::SentenceEmbeddingsBuilder;
/// Download model:
/// ```sh
/// git lfs install
/// git -C resources clone https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2
/// ```
/// Prepare model:
/// ```sh
/// python ./utils/convert_model.py resources/all-MiniLM-L12-v2/pytorch_model.bin
/// ```
///
/// For models missing the prefix in their saved weights (e.g. Distil-based models), the
/// conversion needs to be updated to include this prefix so that the weights can be found:
/// ```sh
/// python ./utils/convert_model.py resources/path/to/pytorch_model.bin --prefix distilbert.
/// ```
///
/// For models including a dense projection layer (e.g. Distil-based models), these weights
/// need to be converted as well:
/// ```sh
/// python ../utils/convert_model.py resources/path/to/2_Dense/pytorch_model.bin --suffix
/// ```
fn main() -> anyhow::Result<()> {
// Set-up sentence embeddings model
let model = SentenceEmbeddingsBuilder::local("resources/all-MiniLM-L12-v2")
.with_device(tch::Device::cuda_if_available())
.create_model()?;
// Define input
let sentences = ["this is an example sentence", "each sentence is converted"];
// Generate Embeddings
let embeddings = model.encode(&sentences)?;
println!("{embeddings:?}");
Ok(())
}