mirror of
https://github.com/guillaume-be/rust-bert.git
synced 2024-10-03 23:57:15 +03:00
0.22.0 Release (#440)
* Fix Clippy warnings * bump version, updated dependencies and changelog
This commit is contained in:
parent
1f4d344668
commit
c3a3f39468
@ -2,6 +2,8 @@
|
||||
All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
## [0.22.0] - 2024-01-20
|
||||
## Added
|
||||
- Addition of `new_with_tokenizer` constructor for `SentenceEmbeddingsModel` allowing passing custom tokenizers for sentence embeddings pipelines.
|
||||
- Support for [Tokenizers](https://github.com/huggingface/tokenizers) in pipelines, allowing loading `tokenizer.json` and `special_token_map.json` tokenizer files.
|
||||
|
14
Cargo.toml
14
Cargo.toml
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "rust-bert"
|
||||
version = "0.21.0"
|
||||
version = "0.22.0"
|
||||
authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
|
||||
edition = "2018"
|
||||
description = "Ready-to-use NLP pipelines and language models"
|
||||
@ -86,19 +86,19 @@ half = "2"
|
||||
regex = "1.6"
|
||||
|
||||
cached-path = { version = "0.6", default-features = false, optional = true }
|
||||
dirs = { version = "4", optional = true }
|
||||
dirs = { version = "5", optional = true }
|
||||
lazy_static = { version = "1", optional = true }
|
||||
ort = {version="~1.15.2", optional = true, default-features = false, features = ["half"]}
|
||||
ndarray = {version="0.15", optional = true}
|
||||
tokenizers = {version="0.13.3", optional=true, default-features = false, features = ["onig"]}
|
||||
tokenizers = {version="0.15", optional=true, default-features = false, features = ["onig"]}
|
||||
|
||||
[dev-dependencies]
|
||||
anyhow = "1"
|
||||
csv = "1"
|
||||
criterion = "0.4"
|
||||
tokio = { version = "1.24", features = ["sync", "rt-multi-thread", "macros"] }
|
||||
criterion = "0.5"
|
||||
tokio = { version = "1.35", features = ["sync", "rt-multi-thread", "macros"] }
|
||||
torch-sys = "0.14.0"
|
||||
tempfile = "3"
|
||||
itertools = "0.10"
|
||||
itertools = "0.12"
|
||||
tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }
|
||||
ort = {version="~1.15.2", features = ["load-dynamic"]}
|
||||
ort = {version="~1.15.5", features = ["load-dynamic"]}
|
@ -309,7 +309,7 @@ impl Config for SentenceEmbeddingsModulesConfig {}
|
||||
|
||||
impl SentenceEmbeddingsModulesConfig {
|
||||
pub fn validate(self) -> Result<Self, RustBertError> {
|
||||
match self.get(0) {
|
||||
match self.first() {
|
||||
Some(SentenceEmbeddingsModuleConfig {
|
||||
module_type: SentenceEmbeddingsModuleType::Transformer,
|
||||
..
|
||||
@ -347,7 +347,7 @@ impl SentenceEmbeddingsModulesConfig {
|
||||
}
|
||||
|
||||
pub fn transformer_module(&self) -> &SentenceEmbeddingsModuleConfig {
|
||||
self.get(0).as_ref().unwrap()
|
||||
self.first().as_ref().unwrap()
|
||||
}
|
||||
|
||||
pub fn pooling_module(&self) -> &SentenceEmbeddingsModuleConfig {
|
||||
|
@ -7,7 +7,6 @@ use rust_bert::resources::{load_weights, RemoteResource, ResourceProvider};
|
||||
use rust_bert::Config;
|
||||
use rust_tokenizers::tokenizer::{Gpt2Tokenizer, Tokenizer};
|
||||
use rust_tokenizers::vocab::Vocab;
|
||||
use std::convert::TryFrom;
|
||||
use tch::{nn, Device, Kind, Tensor};
|
||||
|
||||
/// Equivalent Python code:
|
||||
@ -107,7 +106,7 @@ fn gpt_j_correctness() -> anyhow::Result<()> {
|
||||
Tensor::from_slice(
|
||||
&input
|
||||
.iter()
|
||||
.map(|&e| i64::try_from(e != pad_token).unwrap())
|
||||
.map(|&e| i64::from(e != pad_token))
|
||||
.collect::<Vec<_>>(),
|
||||
)
|
||||
.to(device)
|
||||
|
Loading…
Reference in New Issue
Block a user