mirror of
https://github.com/guillaume-be/rust-bert.git
synced 2024-10-03 23:57:15 +03:00
Fix clippy warnings (#466)
This commit is contained in:
parent
8802997c5f
commit
3df3816219
@ -91,7 +91,7 @@
|
||||
//! ### Manual installation (recommended)
|
||||
//!
|
||||
//! 1. Download `libtorch` from <https://pytorch.org/get-started/locally/>. This package requires `v2.2`: if this version is no longer available on the "get started" page,
|
||||
//! the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.0%2Bcu121.zip` for a Linux version with CUDA12.
|
||||
//! the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu121/libtorch-cxx11-abi-shared-with-deps-2.2.0%2Bcu121.zip` for a Linux version with CUDA12.
|
||||
//! 2. Extract the library to a location of your choice
|
||||
//! 3. Set the following environment variables
|
||||
//! ##### Linux:
|
||||
|
@ -16,6 +16,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `BertTokenizer` using a `vocab.txt` vocabulary
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -369,7 +369,7 @@ fn _shift_tokens_right(input_ids: &Tensor, pad_token_id: i64) -> Tensor {
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `BartEncoder` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `BartDecoder` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `pad_token_id`: padding token id
|
||||
pub struct BartModel {
|
||||
pub(crate) encoder: BartEncoder,
|
||||
@ -437,7 +437,7 @@ impl BartModel {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
///
|
||||
@ -597,7 +597,7 @@ impl BartForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
@ -798,7 +798,7 @@ impl BartForSequenceClassification {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -11,6 +11,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `RobertaTokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -16,6 +16,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `BertTokenizer` using a `vocab.txt` vocabulary
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -12,6 +12,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `DebertaTokenizer` using a `vocab.json` vocabulary and `merges.txt` merges file
|
||||
//!
|
||||
//! Pretrained models for a number of language pairs are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -12,6 +12,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `DebertaV2Tokenizer` using a `spiece.model` SentencePiece model file
|
||||
//!
|
||||
//! Pretrained models for a number of language pairs are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -14,6 +14,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `BertTokenizer` using a `vocab.txt` vocabulary
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -19,6 +19,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `BertTokenizer` using a `vocab.txt` vocabulary
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -14,6 +14,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `FNetTokenizer` using a `spiece.model` SentencePiece (BPE) model file
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -11,6 +11,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `Gpt2Tokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -174,7 +174,7 @@ impl From<&LongT5Config> for T5Config {
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `T5Stack` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `T5Stack` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `embeddings`: `nn::Embedding` Shared embeddings for the encoder and decoder.
|
||||
pub struct LongT5Model {
|
||||
pub(crate) encoder: LongT5Stack,
|
||||
@ -248,7 +248,7 @@ impl LongT5Model {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). This or `input_embeds` must be provided.
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.
|
||||
@ -436,7 +436,7 @@ impl LongT5ForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). This or `input_embeds` must be provided.
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.
|
||||
|
@ -126,7 +126,7 @@ fn _shift_tokens_right(
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `M2M100Encoder` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `M2M100Decoder` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `pad_token_id`: padding token id
|
||||
pub struct M2M100Model {
|
||||
pub(crate) encoder: M2M100Encoder,
|
||||
@ -197,7 +197,7 @@ impl M2M100Model {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
///
|
||||
@ -365,7 +365,7 @@ impl M2M100ForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -12,6 +12,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `M2M100Tokenizer` using a `config.json` vocabulary and a `spiece.model` SentencePiece BPE model
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -579,7 +579,7 @@ impl MarianForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -229,7 +229,7 @@ impl MBartClassificationHead {
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `MBartEncoder` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `MBartDecoder` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `pad_token_id`: padding token id
|
||||
pub struct MBartModel {
|
||||
pub(crate) encoder: MBartEncoder,
|
||||
@ -297,7 +297,7 @@ impl MBartModel {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
///
|
||||
@ -470,7 +470,7 @@ impl MBartForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
@ -621,7 +621,7 @@ impl MBartForSequenceClassification {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -11,6 +11,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `MBart50Tokenizer` using a `spiece.model` SentencePiece model
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -13,6 +13,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `BertTokenizer` using a `vocab.txt` vocabulary
|
||||
//!
|
||||
//! Pretrained models for a number of language pairs are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -10,6 +10,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `GptTokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -11,6 +11,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `PegasusTokenizer` using a `spiece.model` vocabulary and unigram model.
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -87,7 +87,7 @@ fn _shift_tokens_right(
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `PegasusEncoder` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `PegasusDecoder` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
pub struct PegasusModel {
|
||||
pub(crate) encoder: PegasusEncoder,
|
||||
decoder: PegasusDecoder,
|
||||
@ -152,7 +152,7 @@ impl PegasusModel {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
///
|
||||
@ -322,7 +322,7 @@ impl PegasusForConditionalGeneration {
|
||||
/// * `input_ids` - Optional input tensor of shape (*batch size*, *source_sequence_length*). Must be provided when not running in generation mode
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -224,7 +224,7 @@ impl ProphetNetModel {
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_hidden_states` - Optional tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) corresponding to pre-calculated encoder hidden states (useful for conditional generation)
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `old_layer_states` - Optional Vector `Option<Vec<Option<&LayerState>, Option<&LayerState>>>` of length *n_layer* containing tuples with the past keys and values for both the self attention and the encoder cross attention of each layer of the decoder.
|
||||
/// * `decoder_input_embeds` - Optional input tensor of shape (*batch size*, *target_sequence_length*, *embeddings dimension*). This or `decoder_input_ids` must be provided.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
@ -431,7 +431,7 @@ impl ProphetNetForConditionalGeneration {
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). Must be provided when running in generation mode (e.g. initialized with a BOS token)
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `encoder_hidden_states` - Optional tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) corresponding to pre-calculated encoder hidden states (useful for conditional generation)
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `old_layer_states` - Optional Vector `Option<Vec<Option<&LayerState>, Option<&LayerState>>>` of length *n_layer* containing tuples with the past keys and values for both the self attention and the encoder cross attention of each layer of the decoder.
|
||||
/// * `decoder_input_embeds` - Optional input tensor of shape (*batch size*, *target_sequence_length*, *embeddings dimension*). This or `decoder_input_ids` must be provided.
|
||||
/// * `train` - boolean flag to turn on/off the dropout layers in the model. Should be set to false for inference.
|
||||
|
@ -11,6 +11,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `ReformerTokenizer` using a `spiece.model` BPE model
|
||||
//!
|
||||
//! Pretrained models on "Crime and Punishment" (Dostoevsky) are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -221,7 +221,7 @@ pub struct PaddedReformerInput {
|
||||
/// It is made of the following blocks:
|
||||
/// - `embeddings`: `ReformerEmbeddings` Reformer embeddings, combining word and position embeddings
|
||||
/// - `encoder`: `ReformerEncoder` (transformer) made of a vector of Reformer layer with local or LSH attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `least_common_mult_chunk_length`: least common chunk length for all attention layers
|
||||
/// - `min_chunk_length`: minimum chunk length for all attention layers
|
||||
/// - `pad_token_id`: padding token id used to pad to chunk length multiple if input is long enough to be chunked.
|
||||
|
@ -15,6 +15,7 @@
|
||||
//! - Configuration file expected to have a structure following the [Transformers library](https://github.com/huggingface/transformers)
|
||||
//! - Model weights are expected to have a structure and parameter names following the [Transformers library](https://github.com/huggingface/transformers). A conversion using the Python utility scripts is required to convert the `.bin` weights to the `.ot` format.
|
||||
//! - `RobertaTokenizer` using a `vocab.txt` vocabulary and `merges.txt` 2-gram merges
|
||||
//!
|
||||
//! Pretrained models are available and can be downloaded using RemoteResources.
|
||||
//!
|
||||
//! ```no_run
|
||||
|
@ -237,7 +237,7 @@ impl Default for T5Config {
|
||||
/// It is made of the following blocks:
|
||||
/// - `encoder`: `T5Stack` (transformer) made of a vector of encoding layers
|
||||
/// - `decoder`: `T5Stack` (transformer) made of a vector of decoding layers with self attention and encoder cross-attention.
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// caching is implemented for the decoder to avoid recalculating static states (encoder key/values and previously calculated decoder key/values)
|
||||
/// - `embeddings`: `nn::Embedding` Shared embeddings for the encoder and decoder.
|
||||
pub struct T5Model {
|
||||
pub(crate) encoder: T5Stack,
|
||||
@ -312,7 +312,7 @@ impl T5Model {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.
|
||||
/// * `decoder_input_embeds` - Optional input tensor of shape (*batch size*, *target_sequence_length*, *embeddings dimension*). This or `decoder_input_ids` must be provided.
|
||||
@ -509,7 +509,7 @@ impl T5ForConditionalGeneration {
|
||||
/// * `attention_mask` - Optional attention mask of shape (*batch size*, *source_sequence_length*) for the encoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `decoder_input_ids` - Optional input tensor of shape (*batch size*, *target_sequence_length*). This or `decoder_input_embeds` must be provided.
|
||||
/// * `encoder_outputs` - Optional tuple made of a tensor of shape (*batch size*, *source_sequence_length*, *encoder_hidden_dim*) and optional vectors of tensors of length *num_encoder_layers* with shape (*batch size*, *source_sequence_length*, *hidden_size*).
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// These correspond to the encoder last hidden state and optional hidden states/attention weights for encoder layers. When provided, the encoder hidden state will not be recalculated. Useful for generation tasks.
|
||||
/// * `decoder_attention_mask` - Optional attention mask of shape (*batch size*, *target_sequence_length*) for the decoder positions. Positions with a mask with value 0 will be masked.
|
||||
/// * `input_embeds` - Optional input tensor of shape (*batch size*, *source_sequence_length*, *embeddings dimension*). This or `input_ids` must be provided.
|
||||
/// * `decoder_input_embeds` - Optional input tensor of shape (*batch size*, *target_sequence_length*, *embeddings dimension*). This or `decoder_input_ids` must be provided.
|
||||
|
@ -421,6 +421,7 @@ impl Conversation {
|
||||
/// # Arguments
|
||||
/// - texts: sequence of strings, alternating between past user inputs and past generated responses.
|
||||
/// - ids: sequence of sequence of ids, alternating between past user inputs and past generated responses.
|
||||
///
|
||||
/// These can be generated via a `ConversationModel`'s `encode_prompts`.
|
||||
///
|
||||
/// # Example:
|
||||
|
@ -1,3 +1,4 @@
|
||||
#[allow(clippy::doc_lazy_continuation)]
|
||||
/// BSD 3-Clause License
|
||||
///
|
||||
/// Copyright (c) 2007-2022 The scikit-learn developers.
|
||||
|
@ -23,7 +23,7 @@
|
||||
//! All resources for this model can be downloaded using the Python utility script included in this repository.
|
||||
//! 1. Set-up a Python virtual environment and install dependencies (in ./requirements.txt)
|
||||
//! 2. Run the conversion script python /utils/download-dependencies_bert_ner.py.
|
||||
//! The dependencies will be downloaded to the user's home directory, under ~/rustbert/bert-ner
|
||||
//! The dependencies will be downloaded to the user's home directory, under ~/rustbert/bert-ner
|
||||
//!
|
||||
//! The example below illustrate how to run the model for the default English NER model
|
||||
//! ```no_run
|
||||
|
@ -7,7 +7,7 @@
|
||||
//! installation is to use dynamic linking by pointing to an existing library location:
|
||||
//! - Use the `load-dynamic` cargo feature for `ort`
|
||||
//! - set the `ORT_DYLIB_PATH` to point to the location of downloaded onnxruntime library (`onnxruntime.dll`/`libonnxruntime.so`/`libonnxruntime.dylib`
|
||||
//! depending on the operating system). These can be downloaded from the [release page](https://github.com/microsoft/onnxruntime/releases) of the onnxruntime project
|
||||
//! depending on the operating system). These can be downloaded from the [release page](https://github.com/microsoft/onnxruntime/releases) of the onnxruntime project
|
||||
//!
|
||||
//! For troubleshooting issues when using an ONNX model, it is recommended to add the `tracing-subscriber = { version = "0.3", default-features = false, features = [ "env-filter", "fmt" ] }`
|
||||
//! dependency, and use the `tracing_subscriber::fmt::init();` instruction in the `main` binary.
|
||||
|
@ -25,8 +25,8 @@
|
||||
//! Two APIs exist to build text generation models:
|
||||
//! - `TextGenerationModel` is a high-level module that exposes text generation capabilities with a set of reasonable defaults
|
||||
//! - the `LanguageGenerator` trait exposes lower-level text generation capabilities allowing the user to provide additional
|
||||
//! generation options when building the model (via `GenerateConfig`) and at each query (via `GenerateOptions`). Please check the
|
||||
//! [`generation_utils` module](../generation_utils/index.html) for more details
|
||||
//! generation options when building the model (via `GenerateConfig`) and at each query (via `GenerateOptions`). Please check the
|
||||
//! [`generation_utils` module](../generation_utils/index.html) for more details
|
||||
//!
|
||||
//!
|
||||
//! Customized text generation models models can be loaded by overwriting the resources in the configuration.
|
||||
|
@ -30,12 +30,12 @@ enum ModelSize {
|
||||
/// The logic for selecting the most appropriate model is as follows:
|
||||
/// - If not specified, the model will be executed on a CUDA device if available, otherwise on the CPU
|
||||
/// - If the model type is specified (e.g. `Marian`), a model with this architecture will be created. The compatibility of the model
|
||||
/// with the source and target languages will be verified, and the builder will error if the settings provided are not supported.
|
||||
/// with the source and target languages will be verified, and the builder will error if the settings provided are not supported.
|
||||
/// - If the model size is specified, a model of the corresponding size class (computational budget) will be created. The compatibility of the model
|
||||
/// with the source and target languages will be verified, and the builder will error if the settings provided are not supported.
|
||||
/// with the source and target languages will be verified, and the builder will error if the settings provided are not supported.
|
||||
/// - If no source or target languages are provided, a multilingual M2M100 model will be returned
|
||||
/// - If no model type is provided, an average sized-model (Marian) will be returned if a pretrained model exists that covers the requested source/target languages provided.
|
||||
/// Otherwise a M2M100 multi-lingual model will be returned.
|
||||
/// Otherwise a M2M100 multi-lingual model will be returned.
|
||||
///
|
||||
/// The options for the builder are provided with dedicated "builder function", the call to `create_model()` creates a model
|
||||
/// from the builder.
|
||||
|
Loading…
Reference in New Issue
Block a user