0.22.0 Release (#440)

* Fix Clippy warnings * bump version, updated dependencies and changelog
2024-10-03 23:57:15 +03:00 · 2024-01-20 09:42:49 +00:00 · 2024-01-20 09:42:49 +00:00 · c3a3f39468
commit c3a3f39468
parent 1f4d344668
4 changed files with 12 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,8 @@
 All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

 ## [Unreleased]
+
+## [0.22.0] - 2024-01-20
 ## Added
 - Addition of `new_with_tokenizer` constructor for `SentenceEmbeddingsModel` allowing passing custom tokenizers for sentence embeddings pipelines.
 - Support for [Tokenizers](https://github.com/huggingface/tokenizers) in pipelines, allowing loading `tokenizer.json` and `special_token_map.json` tokenizer files. 
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "rust-bert"
-version = "0.21.0"
+version = "0.22.0"
 authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
 edition = "2018"
 description = "Ready-to-use NLP pipelines and language models"
@ -86,19 +86,19 @@ half = "2"
 regex = "1.6"

 cached-path = { version = "0.6", default-features = false, optional = true }
-dirs = { version = "4", optional = true }
+dirs = { version = "5", optional = true }
 lazy_static = { version = "1", optional = true }
 ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
 ndarray = {version="0.15", optional = true}
-tokenizers = {version="0.13.3", optional=true, default-features = false, features = ["onig"]}
+tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}

 [dev-dependencies]
 anyhow = "1"
 csv = "1"
-criterion = "0.4"
-tokio = { version = "1.24", features = ["sync", "rt-multi-thread", "macros"] }
+criterion = "0.5"
+tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
 torch-sys =  "0.14.0"
 tempfile = "3"
-itertools = "0.10"
+itertools = "0.12"
 tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
-ort = {version="~1.15.2", features = ["load-dynamic"]}
+ort = {version="~1.15.5", features = ["load-dynamic"]}
--- a/src/pipelines/sentence_embeddings/config.rs
+++ b/src/pipelines/sentence_embeddings/config.rs
@ -309,7 +309,7 @@ impl Config for SentenceEmbeddingsModulesConfig {}

 impl SentenceEmbeddingsModulesConfig {
    pub fn validate(self) -> Result<Self, RustBertError> {
-        match self.get(0) {
+        match self.first() {
            Some(SentenceEmbeddingsModuleConfig {
                module_type: SentenceEmbeddingsModuleType::Transformer,
                ..
@ -347,7 +347,7 @@ impl SentenceEmbeddingsModulesConfig {
    }

    pub fn transformer_module(&self) -> &SentenceEmbeddingsModuleConfig {
-        self.get(0).as_ref().unwrap()
+        self.first().as_ref().unwrap()
    }

    pub fn pooling_module(&self) -> &SentenceEmbeddingsModuleConfig {
--- a/tests/gpt_j.rs
+++ b/tests/gpt_j.rs
@ -7,7 +7,6 @@ use rust_bert::resources::{load_weights, RemoteResource, ResourceProvider};
 use rust_bert::Config;
 use rust_tokenizers::tokenizer::{Gpt2Tokenizer, Tokenizer};
 use rust_tokenizers::vocab::Vocab;
-use std::convert::TryFrom;
 use tch::{nn, Device, Kind, Tensor};

 /// Equivalent Python code:
@ -107,7 +106,7 @@ fn gpt_j_correctness() -> anyhow::Result<()> {
            Tensor::from_slice(
                &input
                    .iter()
-                    .map(|&e| i64::try_from(e != pad_token).unwrap())
+                    .map(|&e| i64::from(e != pad_token))
                    .collect::<Vec<_>>(),
            )
            .to(device)