0.22.0 Release (#440)

* Fix Clippy warnings

* bump version, updated dependencies and changelog
This commit is contained in:
guillaume-be 2024-01-20 09:42:49 +00:00 committed by GitHub
parent 1f4d344668
commit c3a3f39468
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 12 additions and 11 deletions

View File

@ -2,6 +2,8 @@
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
## [Unreleased]
## [0.22.0] - 2024-01-20
## Added
- Addition of `new_with_tokenizer` constructor for `SentenceEmbeddingsModel` allowing passing custom tokenizers for sentence embeddings pipelines.
- Support for [Tokenizers](https://github.com/huggingface/tokenizers) in pipelines, allowing loading `tokenizer.json` and `special_token_map.json` tokenizer files.

View File

@ -1,6 +1,6 @@
[package]
name = "rust-bert"
version = "0.21.0"
version = "0.22.0"
authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
edition = "2018"
description = "Ready-to-use NLP pipelines and language models"
@ -86,19 +86,19 @@ half = "2"
regex = "1.6"
cached-path = { version = "0.6", default-features = false, optional = true }
dirs = { version = "4", optional = true }
dirs = { version = "5", optional = true }
lazy_static = { version = "1", optional = true }
ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
ndarray = {version="0.15", optional = true}
tokenizers = {version="0.13.3", optional=true, default-features = false, features = ["onig"]}
tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}
[dev-dependencies]
anyhow = "1"
csv = "1"
criterion = "0.4"
tokio = { version = "1.24", features = ["sync", "rt-multi-thread", "macros"] }
criterion = "0.5"
tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
torch-sys = "0.14.0"
tempfile = "3"
itertools = "0.10"
itertools = "0.12"
tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
ort = {version="~1.15.2", features = ["load-dynamic"]}
ort = {version="~1.15.5", features = ["load-dynamic"]}

View File

@ -309,7 +309,7 @@ impl Config for SentenceEmbeddingsModulesConfig {}
impl SentenceEmbeddingsModulesConfig {
pub fn validate(self) -> Result<Self, RustBertError> {
match self.get(0) {
match self.first() {
Some(SentenceEmbeddingsModuleConfig {
module_type: SentenceEmbeddingsModuleType::Transformer,
..
@ -347,7 +347,7 @@ impl SentenceEmbeddingsModulesConfig {
}
pub fn transformer_module(&self) -> &SentenceEmbeddingsModuleConfig {
self.get(0).as_ref().unwrap()
self.first().as_ref().unwrap()
}
pub fn pooling_module(&self) -> &SentenceEmbeddingsModuleConfig {

View File

@ -7,7 +7,6 @@ use rust_bert::resources::{load_weights, RemoteResource, ResourceProvider};
use rust_bert::Config;
use rust_tokenizers::tokenizer::{Gpt2Tokenizer, Tokenizer};
use rust_tokenizers::vocab::Vocab;
use std::convert::TryFrom;
use tch::{nn, Device, Kind, Tensor};
/// Equivalent Python code:
@ -107,7 +106,7 @@ fn gpt_j_correctness() -> anyhow::Result<()> {
Tensor::from_slice(
&input
.iter()
.map(|&e| i64::try_from(e != pad_token).unwrap())
.map(|&e| i64::from(e != pad_token))
.collect::<Vec<_>>(),
)
.to(device)