Merge remote-tracking branch 'origin/master' into m2m100_implementation

# Conflicts: # Cargo.toml
2024-10-26 14:07:25 +03:00 · 2021-06-28 18:53:46 +02:00 · 2021-06-28 18:53:46 +02:00 · 0b2e339e87
commit 0b2e339e87
parent 2f6b26bb88 3d3eac91be
44 changed files with 415 additions and 206 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,11 +5,13 @@ All notable changes to this project will be documented in this file. The format
 ## Added
 - (BREAKING) Support for `prefix_allowed_tokens_fn` argument for generation, allowing users to control the generation via custom functions
 - (BREAKING) Support for `forced_bos_token_id` argument for generation, allowing users to force a given BOS token for generation (useful for MBart/M2M-class models)
+- (BREAKING) Support for `output_scores` boolean argument for generation, allowing users to output the log-probability scores of generated sequences. Updated the return type of low-level generate API to `GeneratedTextOutput` and `GeneratedIndicesOutput` containing optional scores along with the generated output.
 - Addition of the MBart Language model and support for text generation / direct translation between 50 language
 - Addition of the M2M100 Language model and support for text generation / direct translation between 100 language

 ## Changed
 - Updated GPT2 architecture to re-use embeddings for the output projection layer (resulting in smaller model weights files and memory footprint)
+- Upgraded `tch` version to 0.5.0 (using `libtorch` 1.9.0)

 ## [0.15.1] - 2021-06-01
 ### Fixed
--- a/Cargo.toml
+++ b/Cargo.toml
@ -58,7 +58,7 @@ features = ["doc-only"]

 [dependencies]
 rust_tokenizers = { version = "~6.2.4", path = "E:/Coding/backup-rust/rust-tokenizers/main" }
-tch = "~0.4.1"
+tch = "~0.5.0"
 serde_json = "1.0.64"
 serde = { version = "1.0.126", features = ["derive"] }
 dirs = "3.0.2"
@ -72,5 +72,5 @@ thiserror = "1.0.24"
 anyhow = "1.0.40"
 csv = "1.1.6"
 criterion = "0.3.4"
-torch-sys = "0.4.1"
+torch-sys = "0.5.0"
 tempfile = "3.2.0"
--- a/README.md
+++ b/README.md
@ -71,8 +71,8 @@ This cache location defaults to `~/.cache/.rustbert`, but can be changed by sett

 ### Manual installation (recommended)

-1. Download `libtorch` from https://pytorch.org/get-started/locally/. This package requires `v1.8.1`: if this version is no longer available on the "get started" page,
-the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.8.1%2Bcu111.zip` for a Linux version with CUDA11.
+1. Download `libtorch` from https://pytorch.org/get-started/locally/. This package requires `v1.9.0`: if this version is no longer available on the "get started" page,
+the file should be accessible by modifying the target link, for example `https://download.pytorch.org/libtorch/cu111/libtorch-shared-with-deps-1.9.0%2Bcu111.zip` for a Linux version with CUDA11.
 2. Extract the library to a location of your choice
 3. Set the following environment variables
 ##### Linux:
--- a/examples/generation_gpt2.rs
+++ b/examples/generation_gpt2.rs
@ -20,17 +20,17 @@ fn main() -> anyhow::Result<()> {
    let generate_config = TextGenerationConfig {
        model_type: ModelType::GPT2,
        max_length: 30,
-        do_sample: true,
-        num_beams: 5,
-        temperature: 1.1,
-        num_return_sequences: 3,
+        do_sample: false,
+        num_beams: 1,
+        temperature: 1.0,
+        num_return_sequences: 1,
        ..Default::default()
    };
    let model = TextGenerationModel::new(generate_config)?;

    let input_context = "The dog";
-    let second_input_context = "The cat was";
-    let output = model.generate(&[input_context, second_input_context], None);
+    // let second_input_context = "The cat was";
+    let output = model.generate(&[input_context], None);

    for sentence in output {
        println!("{:?}", sentence);
--- a/examples/translation_mbart.rs
+++ b/examples/translation_mbart.rs
@ -50,10 +50,11 @@ fn main() -> anyhow::Result<()> {
        None,
        target_language,
        None,
+        false,
    );

    for sentence in output {
-        println!("{:?}", sentence);
+        println!("{:?}", sentence.text);
    }
    Ok(())
 }
--- a/examples/translation_t5.rs
+++ b/examples/translation_t5.rs
@ -41,7 +41,16 @@ fn main() -> anyhow::Result<()> {
    //    Define input
    let input = ["translate English to German: This sentence will get translated to German"];

-    let output = t5_model.generate(Some(input.to_vec()), None, None, None, None, None, None);
+    let output = t5_model.generate(
+        Some(input.to_vec()),
+        None,
+        None,
+        None,
+        None,
+        None,
+        None,
+        false,
+    );
    println!("{:?}", output);

    Ok(())
--- a/src/albert/albert_model.rs
+++ b/src/albert/albert_model.rs
@ -848,8 +848,8 @@ impl AlbertForQuestionAnswering {
            .apply(&self.qa_outputs)
            .split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        AlbertQuestionAnsweringOutput {
            start_logits,
--- a/src/bart/bart_model.rs
+++ b/src/bart/bart_model.rs
@ -240,7 +240,7 @@ pub(crate) fn _make_causal_mask(
    );
    let mask_cond = Tensor::arange(target_length, (dtype, device));
    let _ = mask.masked_fill_(
-        &mask_cond.lt1(&(&mask_cond + 1).view([target_length, 1])),
+        &mask_cond.lt_tensor(&(&mask_cond + 1).view([target_length, 1])),
        0,
    );

@ -306,7 +306,10 @@ pub(crate) fn _prepare_decoder_attention_mask(
 }

 fn _shift_tokens_right(input_ids: &Tensor, pad_token_id: i64) -> Tensor {
-    let index_eos: Tensor = input_ids.ne(pad_token_id).sum1(&[-1], true, Int64) - 1;
+    let index_eos: Tensor = input_ids
+        .ne(pad_token_id)
+        .sum_dim_intlist(&[-1], true, Int64)
+        - 1;
    let output = input_ids.empty_like().to_kind(Int64);
    output
        .select(1, 0)
@ -809,7 +812,7 @@ impl BartForSequenceClassification {
            train,
        );
        let eos_mask = input_ids.eq(self.eos_token_id);
-        let reshape = eos_mask.sum1(&[1], true, Int64);
+        let reshape = eos_mask.sum_dim_intlist(&[1], true, Int64);
        let sentence_representation = base_model_output
            .decoder_output
            .permute(&[2, 0, 1])
--- a/src/bart/embeddings.rs
+++ b/src/bart/embeddings.rs
@ -64,7 +64,7 @@ impl LearnedPositionalEmbedding {
    pub fn forward(&self, input: &Tensor, past_key_values_length: i64) -> Tensor {
        let input_shape = input.size();
        let (_, sequence_length) = (input_shape[0], input_shape[1]);
-        let positions = Tensor::arange1(
+        let positions = Tensor::arange_start(
            past_key_values_length,
            past_key_values_length + sequence_length,
            (Int64, input.device()),
@ -99,7 +99,7 @@ impl SinusoidalPositionalEmbedding {
    pub fn forward(&self, input: &Tensor, past_key_values_length: i64) -> Tensor {
        let input_shape = input.size();
        let (_, sequence_length) = (input_shape[0], input_shape[1]);
-        let positions = Tensor::arange1(
+        let positions = Tensor::arange_start(
            past_key_values_length,
            past_key_values_length + sequence_length,
            (Int64, input.device()),
--- a/src/bert/bert_model.rs
+++ b/src/bert/bert_model.rs
@ -323,7 +323,7 @@ impl<T: BertEmbedding> BertModel<T> {
                        input_shape[1],
                        1,
                    ]);
-                    let causal_mask = causal_mask.le1(&seq_ids.unsqueeze(0).unsqueeze(-1));
+                    let causal_mask = causal_mask.le_tensor(&seq_ids.unsqueeze(0).unsqueeze(-1));
                    causal_mask * mask.unsqueeze(1).unsqueeze(1)
                } else {
                    mask.unsqueeze(1).unsqueeze(1)
@ -1161,8 +1161,8 @@ impl BertForQuestionAnswering {
        let sequence_output = base_model_output.hidden_state.apply(&self.qa_outputs);
        let logits = sequence_output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        BertQuestionAnsweringOutput {
            start_logits,
--- a/src/bert/encoder.rs
+++ b/src/bert/encoder.rs
@ -131,11 +131,12 @@ impl BertLayer {
        encoder_mask: &Option<Tensor>,
        train: bool,
    ) -> BertLayerOutput {
-        let (attention_output, attention_scores, cross_attention_scores) =
-            if self.is_decoder & encoder_hidden_states.is_some() {
        let (attention_output, attention_weights) =
            self.attention
                .forward_t(hidden_states, mask, &None, &None, train);
+
+        let (attention_output, attention_scores, cross_attention_scores) =
+            if self.is_decoder & encoder_hidden_states.is_some() {
                let (attention_output, cross_attention_weights) =
                    self.cross_attention.as_ref().unwrap().forward_t(
                        &attention_output,
@ -146,9 +147,6 @@ impl BertLayer {
                    );
                (attention_output, attention_weights, cross_attention_weights)
            } else {
-                let (attention_output, attention_weights) =
-                    self.attention
-                        .forward_t(hidden_states, mask, &None, &None, train);
                (attention_output, attention_weights, None)
            };

--- a/src/common/summary.rs
+++ b/src/common/summary.rs
@ -78,7 +78,7 @@ impl SequenceSummary {
    {
        let p = p.borrow();

-        let summary_type = config.summary_type.clone().unwrap_or(SummaryType::last);
+        let summary_type = config.summary_type.unwrap_or(SummaryType::last);
        let summary = if let Some(summary_use_proj) = config.summary_use_proj {
            let num_classes = match (config.summary_proj_to_labels, config.num_labels) {
                (Some(summary_proj_to_labels), Some(num_labels))
@ -132,7 +132,7 @@ impl SequenceSummary {
        let mut output = match self.summary_type {
            SummaryType::last => hidden_states.select(1, -1),
            SummaryType::first => hidden_states.select(1, 0),
-            SummaryType::mean => hidden_states.mean1(&[1], false, Kind::Float),
+            SummaryType::mean => hidden_states.mean_dim(&[1], false, Kind::Float),
            SummaryType::cls_index => {
                let cls_index = if let Some(cls_index_value) = cls_index {
                    let mut expand_dim = vec![-1i64; cls_index_value.dim() - 1];
@ -147,7 +147,7 @@ impl SequenceSummary {
                    let fill_value = fill_value[2];
                    hidden_states.select(-2, 0).full_like(fill_value)
                };
-                hidden_states.gather(-2, &cls_index, false).squeeze1(-2)
+                hidden_states.gather(-2, &cls_index, false).squeeze_dim(-2)
            }
        };

--- a/src/distilbert/attention.rs
+++ b/src/distilbert/attention.rs
@ -83,7 +83,7 @@ impl MultiHeadSelfAttention {
        let scores = if let Some(mask) = mask {
            let unmasked_scores = q.matmul(&k.transpose(2, 3));
            let mask = mask
-                .le1(&(mask.zeros_like() + 0.1))
+                .le_tensor(&(mask.zeros_like() + 0.1))
                .view((bs, 1i64, 1i64, k_length))
                .expand_as(&unmasked_scores);
            unmasked_scores.masked_fill(&mask, f64::NEG_INFINITY)
--- a/src/distilbert/distilbert_model.rs
+++ b/src/distilbert/distilbert_model.rs
@ -614,8 +614,8 @@ impl DistilBertForQuestionAnswering {

        let logits = output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        Ok(DistilBertQuestionAnsweringOutput {
            start_logits,
--- a/src/gpt2/gpt2_model.rs
+++ b/src/gpt2/gpt2_model.rs
@ -414,7 +414,7 @@ impl Gpt2Model {

        let position_ids = match position_ids {
            Some(value) => value.copy(),
-            None => Tensor::arange1(
+            None => Tensor::arange_start(
                layer_past_length,
                seq_length + layer_past_length,
                (Int64, input_embeddings.device()),
--- a/src/gpt_neo/attention.rs
+++ b/src/gpt_neo/attention.rs
@ -132,7 +132,9 @@ pub(crate) trait GptNeoAttentionUtils {
        let query_indices = Self::split_sequence_length_dim_to(&indices, num_blocks, block_length)?;
        let key_indices = Self::look_back(&indices, block_length, window_size, None, false)?;

-        let causal_mask = query_indices.unsqueeze(-1).ge1(&key_indices.unsqueeze(-2));
+        let causal_mask = query_indices
+            .unsqueeze(-1)
+            .ge_tensor(&key_indices.unsqueeze(-2));

        let calc_attention_mask = if attention_mask.is_none() {
            Some(Tensor::ones(
@ -212,7 +214,7 @@ pub(crate) trait GptNeoAttentionUtils {
    ) -> (Tensor, Tensor) {
        let mut attention_weights = query
            .matmul(&key.transpose(-1, -2))
-            .where1(causal_mask, &masked_bias.to_kind(query.kind()));
+            .where_self(causal_mask, &masked_bias.to_kind(query.kind()));

        if let Some(attention_mask_value) = attention_mask {
            attention_weights = attention_weights + attention_mask_value;
--- a/src/gpt_neo/gpt_neo_model.rs
+++ b/src/gpt_neo/gpt_neo_model.rs
@ -345,7 +345,7 @@ impl GptNeoModel {

        let calc_position_ids = if position_ids.is_none() {
            let position_ids =
-                Tensor::arange1(past_length, full_sequence_length, (Kind::Int64, device));
+                Tensor::arange_start(past_length, full_sequence_length, (Kind::Int64, device));
            Some(
                position_ids
                    .unsqueeze(0)
--- a/src/longformer/attention.rs
+++ b/src/longformer/attention.rs
@ -352,7 +352,8 @@ impl LongformerSelfAttention {
        &self,
        is_index_global_attn: &Tensor,
    ) -> GlobalAttentionIndices {
-        let num_global_attention_indices = is_index_global_attn.sum1(&[1], false, Kind::Int64);
+        let num_global_attention_indices =
+            is_index_global_attn.sum_dim_intlist(&[1], false, Kind::Int64);
        let max_num_global_attention_indices = i64::from(num_global_attention_indices.max());
        let is_index_global_attn_nonzero = is_index_global_attn
            .nonzero_numpy()
@ -364,7 +365,7 @@ impl LongformerSelfAttention {
            max_num_global_attention_indices,
            (Kind::Int64, is_index_global_attn.device()),
        )
-        .lt1(&num_global_attention_indices.unsqueeze(-1));
+        .lt_tensor(&num_global_attention_indices.unsqueeze(-1));

        let is_local_index_global_attention_nonzero = is_local_index_global_attention
            .nonzero_numpy()
--- a/src/longformer/embeddings.rs
+++ b/src/longformer/embeddings.rs
@ -86,7 +86,7 @@ impl LongformerEmbeddings {
        let input_shape = inputs_embeds.size();
        let (batch_size, sequence_length) = (input_shape[0], input_shape[1]);

-        Tensor::arange1(
+        Tensor::arange_start(
            self.pad_token_id + 1,
            sequence_length + self.pad_token_id + 1,
            (Kind::Int64, inputs_embeds.device()),
--- a/src/longformer/longformer_model.rs
+++ b/src/longformer/longformer_model.rs
@ -140,11 +140,13 @@ fn compute_global_attention_mask(
    let attention_mask = Tensor::arange(input_ids.size()[1], (Kind::Int64, input_ids.device()));

    if before_sep_token {
-        attention_mask.expand_as(input_ids).lt1(&question_end_index)
+        attention_mask
+            .expand_as(input_ids)
+            .lt_tensor(&question_end_index)
    } else {
        attention_mask
            .expand_as(input_ids)
-            .gt1(&(question_end_index + 1))
+            .gt_tensor(&(question_end_index + 1))
            * attention_mask
                .expand_as(input_ids)
                .lt(*input_ids.size().last().unwrap())
@ -580,7 +582,7 @@ impl LongformerModel {
                        .unsqueeze(0)
                        .unsqueeze(0)
                        .repeat(&[batch_size, sequence_length, 1])
-                        .le1(&sequence_ids.unsqueeze(-1).unsqueeze(0))
+                        .le_tensor(&sequence_ids.unsqueeze(-1).unsqueeze(0))
                        .totype(Kind::Int);
                    if causal_mask.size()[1] < padded_attention_mask.size()[1] {
                        let prefix_sequence_length =
@ -1147,8 +1149,8 @@ impl LongformerForQuestionAnswering {
        let sequence_output = base_model_output.hidden_state.apply(&self.qa_outputs);
        let logits = sequence_output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        Ok(LongformerQuestionAnsweringOutput {
            start_logits,
--- a/src/mbart/mbart_model.rs
+++ b/src/mbart/mbart_model.rs
@ -111,7 +111,10 @@ impl Config for MBartConfig {}

 fn _shift_tokens_right(input_ids: &Tensor, pad_token_id: i64) -> Tensor {
    let output = input_ids.masked_fill(&input_ids.eq(-100), pad_token_id);
-    let index_eos: Tensor = input_ids.ne(pad_token_id).sum1(&[1], true, Int64) - 1;
+    let index_eos: Tensor = input_ids
+        .ne(pad_token_id)
+        .sum_dim_intlist(&[1], true, Int64)
+        - 1;
    output
        .select(1, 0)
        .copy_(&input_ids.gather(1, &index_eos, true).squeeze());
@ -632,7 +635,7 @@ impl MBartForSequenceClassification {
            train,
        );
        let eos_mask = input_ids.eq(self.eos_token_id);
-        let reshape = eos_mask.sum1(&[1], true, Int64);
+        let reshape = eos_mask.sum_dim_intlist(&[1], true, Int64);
        let sentence_representation = base_model_output
            .decoder_output
            .permute(&[2, 0, 1])
--- a/src/mobilebert/mobilebert_model.rs
+++ b/src/mobilebert/mobilebert_model.rs
@ -901,8 +901,8 @@ impl MobileBertForQuestionAnswering {
        let sequence_output = mobilebert_output.hidden_state.apply(&self.qa_outputs);
        let logits = sequence_output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        Ok(MobileBertQuestionAnsweringOutput {
            start_logits,
--- a/src/pegasus/embeddings.rs
+++ b/src/pegasus/embeddings.rs
@ -87,7 +87,7 @@ impl SinusoidalPositionalEmbedding {
    pub fn forward(&self, input: &Tensor, past_key_values_length: i64) -> Tensor {
        let input_shape = input.size();
        let (_, sequence_length) = (input_shape[0], input_shape[1]);
-        let positions = Tensor::arange1(
+        let positions = Tensor::arange_start(
            past_key_values_length,
            past_key_values_length + sequence_length,
            (Kind::Int64, input.device()),
--- a/src/pipelines/conversation.rs
+++ b/src/pipelines/conversation.rs
@ -716,7 +716,8 @@ impl ConversationOption {
        attention_mask: Option<Tensor>,
    ) -> Vec<Vec<i64>> {
        match *self {
-            Self::GPT2(ref model) => model.generate_from_ids_and_past(
+            Self::GPT2(ref model) => model
+                .generate_from_ids_and_past(
                    input_ids,
                    attention_mask,
                    None,
@ -724,7 +725,11 @@ impl ConversationOption {
                    None,
                    None,
                    None,
-            ),
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
        }
    }
 }
--- a/src/pipelines/generation_utils.rs
+++ b/src/pipelines/generation_utils.rs
@ -48,6 +48,7 @@
 //!     decoder_start_id,
 //!     forced_bos_token_id,
 //!     None,
+//!     false,
 //! );
 //! # Ok(())
 //! # }
@ -492,13 +493,13 @@ pub(crate) mod private_generation_utils {
                if min_tokens_to_keep > 1 {
                    let _ = sorted_indices_to_remove.index_fill_(
                        1,
-                        &Tensor::arange1(0, min_tokens_to_keep + 1, (Int64, logits.device())),
+                        &Tensor::arange_start(0, min_tokens_to_keep + 1, (Int64, logits.device())),
                        0,
                    );
                }
                let _ = sorted_indices_to_remove.index_copy_(
                    1,
-                    &Tensor::arange1(1, vocab_size, (Int64, logits.device())),
+                    &Tensor::arange_start(1, vocab_size, (Int64, logits.device())),
                    &sorted_indices_to_remove
                        .slice(1, 0, vocab_size - 1, 1)
                        .copy(),
@ -585,7 +586,8 @@ pub(crate) mod private_generation_utils {
            attention_mask: Tensor,
            gen_opt: GenerateOptions,
            prefix_allowed_tokens_fn: Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>,
-        ) -> Tensor {
+            output_scores: bool,
+        ) -> (Tensor, Option<Vec<f64>>) {
            let mut unfinished_sentences =
                Tensor::ones(&[batch_size], (Int64, self.get_var_store().device()));
            let mut sentence_lengths: Tensor =
@ -596,6 +598,14 @@ pub(crate) mod private_generation_utils {
            let mut past: Cache = Cache::None;
            let mut outputs: Tensor;
            let mut current_length = cur_len;
+            let mut scores_output = if output_scores {
+                Some(Tensor::zeros(
+                    &[batch_size],
+                    (Float, self.get_var_store().device()),
+                ))
+            } else {
+                None
+            };

            while current_length < gen_opt.max_length {
                let prepared_input = self.prepare_inputs_for_generation(
@ -690,11 +700,22 @@ pub(crate) mod private_generation_utils {
                        1,
                    );
                    let probabilities = next_token_logits.softmax(-1, Float);
-                    probabilities.multinomial(1, false).squeeze1(1)
+                    probabilities.multinomial(1, false).squeeze_dim(1)
                } else {
                    next_token_logits.argmax(-1, false)
                };

+                if let Some(prev_scores) = scores_output {
+                    let finished_mask = unfinished_sentences.eq(0);
+                    scores_output = Some(
+                        prev_scores
+                            + (&next_token_logits
+                                .log_softmax(-1, Float)
+                                .gather(1, &next_token.reshape(&[-1, 1]), true)
+                                .squeeze()
+                                .masked_fill(&finished_mask, 0)),
+                    );
+                }
                // Add tokens to unfinished sentences
                let tokens_to_add = match &gen_opt.eos_token_ids {
                    Some(_) => {
@ -736,7 +757,14 @@ pub(crate) mod private_generation_utils {
                }
                current_length += 1;
            }
-            input_ids
+            let scores_output = scores_output.map(|scores_tensor| {
+                (scores_tensor / sentence_lengths.pow(gen_opt.length_penalty))
+                    .iter::<f64>()
+                    .unwrap()
+                    .collect::<Vec<f64>>()
+            });
+
+            (input_ids, scores_output)
        }

        fn generate_beam_search(
@ -748,7 +776,8 @@ pub(crate) mod private_generation_utils {
            mut attention_mask: Tensor,
            gen_opt: GenerateOptions,
            prefix_allowed_tokens_fn: Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>,
-        ) -> Tensor {
+            output_scores: bool,
+        ) -> (Tensor, Option<Vec<f64>>) {
            let num_beam_groups = gen_opt.num_beam_groups.unwrap_or(1);
            let num_sub_beams = gen_opt.num_beams / num_beam_groups;
            let diversity_penalty = gen_opt.diversity_penalty.unwrap_or(5.5);
@ -960,12 +989,12 @@ pub(crate) mod private_generation_utils {
                    };

                    let eos_token_ids = gen_opt.eos_token_ids.as_ref();
-                    let beam_ids_tensor = &next_tokens.floor_divide1(vocab_size);
+                    let beam_ids_tensor = &next_tokens.divide_scalar_mode(vocab_size, "floor");
                    let effective_beam_ids_tensor = (&next_tokens.ones_like().cumsum(0, Int64) - 1)
                        * group_size
                        + beam_ids_tensor;
                    let token_id_tensor = &next_tokens - beam_ids_tensor * vocab_size;
-                    let (max_scores, _) = next_scores.max2(1, false);
+                    let (max_scores, _) = next_scores.max_dim(1, false);
                    let mut eos_mask = token_id_tensor.ones_like();
                    if let Some(eos_token_id) = eos_token_ids {
                        eos_mask -= token_id_tensor.eq(eos_token_id[0]).to_kind(Int64);
@ -1034,7 +1063,7 @@ pub(crate) mod private_generation_utils {
                            &group_beam_tokens,
                        );
                        let new_indices = gen_opt.num_beams
-                            * group_beam_indices.floor_divide1(group_size)
+                            * group_beam_indices.divide_scalar_mode(group_size, "floor")
                            + group_start_index
                            + group_beam_indices.remainder(group_size);
                        let _ = beam_indices.index_copy_(
@ -1110,6 +1139,11 @@ pub(crate) mod private_generation_utils {
                Tensor::zeros(&[output_batch_size], (Int64, input_ids.device()));
            let mut best_ids = vec![];

+            let mut scores_output = if output_scores {
+                Some(Vec::with_capacity(best_ids.len()))
+            } else {
+                None
+            };
            for (hypothesis_index, hypothesis) in hypotheses.iter().enumerate() {
                let mut sorted_hypotheses = hypothesis.clone();
                sorted_hypotheses
@ -1118,13 +1152,16 @@ pub(crate) mod private_generation_utils {
                for j in 0..output_num_return_sequences_per_batch {
                    let effective_batch_index =
                        output_num_return_sequences_per_batch * hypothesis_index as i64 + j;
-                    let (_, best_hyp) = sorted_hypotheses.beams.pop().unwrap();
+                    let (best_score, best_hyp) = sorted_hypotheses.beams.pop().unwrap();
                    let _ = sentence_lengths.index_fill_(
                        0,
                        &Tensor::of_slice(&[effective_batch_index]).to(sentence_lengths.device()),
                        *best_hyp.size().first().unwrap(),
                    );
                    best_ids.push(best_hyp);
+                    if let Some(current_best_scores) = &mut scores_output {
+                        current_best_scores.push(best_score);
+                    }
                }
            }
            let sentence_max_length =
@ -1143,7 +1180,7 @@ pub(crate) mod private_generation_utils {
            for (hypothesis_index, best_id) in best_ids.iter().enumerate() {
                let _ = decoded.get(hypothesis_index as i64).index_copy_(
                    0,
-                    &Tensor::arange1(
+                    &Tensor::arange_start(
                        0,
                        i64::from(sentence_lengths.get(hypothesis_index as i64)),
                        (Int64, input_ids.device()),
@ -1159,7 +1196,7 @@ pub(crate) mod private_generation_utils {
                    );
                }
            }
-            decoded
+            (decoded, scores_output)
        }

        fn reorder_cache(
@ -1178,6 +1215,22 @@ pub(crate) mod private_generation_utils {
    }
 }

+#[derive(Debug, Clone)]
+/// # Generated text output
+/// Contains generated text and an optional log-likelihood score for the generated sequence
+pub struct GeneratedTextOutput {
+    pub text: String,
+    pub score: Option<f64>,
+}
+
+#[derive(Debug, Clone)]
+/// # Generated indices output
+/// Contains generated indices and an optional log-likelihood score for the generated sequence
+pub struct GeneratedIndicesOutput {
+    pub indices: Vec<i64>,
+    pub score: Option<f64>,
+}
+
 /// # Common trait for text generation models.
 /// Main API for text generation
 pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
@ -1195,7 +1248,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// * `prefix_allowed_tokens_fn` - `Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>` Optional function to control the generation process. The function should take a `batch_id` (i64) and a tensor of token_ids already generated and returns a `Vec<i64>` of allowed tokens.
    ///
    /// # Returns
-    /// * `Vec<String>` Vector of generated strings based on the prompts of length *number_of_prompts* x *num_return_sequences*.
+    /// * `Vec<TextOutput>` Vector of length *number_of_prompts* x *num_return_sequences* containing TextOutput with the generated texts and the generation score if `output_scores` is true.
    ///
    /// # Example
    ///
@ -1231,6 +1284,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// let max_length = 128;
    /// let decoder_start_token_id = None;
    /// let forced_bos_token_id = None;
+    /// let output_scores = true;
    ///
    /// //Example custom function for fine-grained generation control
    /// fn force_one_paragraph(_batch_id: i64, previous_token_ids: &Tensor) -> Vec<i64> {
@ -1257,6 +1311,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    ///     decoder_start_token_id,
    ///     forced_bos_token_id,
    ///     Some(&force_one_paragraph),
+    ///     output_scores,
    /// );
    /// # Ok(())
    /// # }
@ -1283,11 +1338,12 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
        decoder_start_token_id: impl Into<Option<i64>>,
        forced_bos_token_id: impl Into<Option<i64>>,
        prefix_allowed_tokens_fn: Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>,
-    ) -> Vec<String>
+        output_scores: bool,
+    ) -> Vec<GeneratedTextOutput>
    where
        S: AsRef<[&'a str]>,
    {
-        let generated = self.generate_indices(
+        let indices_outputs = self.generate_indices(
            prompt_texts,
            attention_mask,
            min_length,
@ -1295,10 +1351,16 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
            decoder_start_token_id,
            forced_bos_token_id,
            prefix_allowed_tokens_fn,
+            output_scores,
        );
-        let mut output = Vec::with_capacity(generated.len());
-        for generated_sequence in generated {
-            output.push(self._get_tokenizer().decode(generated_sequence, true, true));
+        let mut output = Vec::with_capacity(indices_outputs.len());
+        for generated_sequence in indices_outputs {
+            output.push(GeneratedTextOutput {
+                text: self
+                    ._get_tokenizer()
+                    .decode(generated_sequence.indices, true, true),
+                score: generated_sequence.score,
+            });
        }
        output
    }
@ -1315,7 +1377,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// * `prefix_allowed_tokens_fn` - `Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>` Optional function to control the generation process. The function should take a `batch_id` (i64) and a tensor of token_ids already generated and returns a `Vec<i64>` of allowed tokens.
    ///
    /// # Returns
-    /// * `Vec<Vec<i64>>` Vector of Vector of generated token indices based on the prompts of length *number_of_prompts* x *num_return_sequences*.
+    /// * `Vec<IndicesOutput>` Vector of length *number_of_prompts* x *num_return_sequences* containing IndicesOutput with the generated indices and the generation score if `output_scores` is true.
    ///
    /// # Example
    ///
@ -1350,6 +1412,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// let max_length = 128;
    /// let decoder_start_token_id = None;
    /// let forced_bos_token_id = None;
+    /// let output_scores = true;
    ///
    /// //Example custom function for fine-grained generation control
    /// fn force_one_paragraph(_batch_id: i64, previous_token_ids: &Tensor) -> Vec<i64> {
@ -1376,6 +1439,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    ///     decoder_start_token_id,
    ///     forced_bos_token_id,
    ///     Some(&force_one_paragraph),
+    ///     output_scores,
    /// );
    /// # Ok(())
    /// # }
@ -1389,7 +1453,8 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
        decoder_start_token_id: impl Into<Option<i64>>,
        forced_bos_token_id: impl Into<Option<i64>>,
        prefix_allowed_tokens_fn: Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>,
-    ) -> Vec<Vec<i64>>
+        output_scores: bool,
+    ) -> Vec<GeneratedIndicesOutput>
    where
        S: AsRef<[&'a str]>,
    {
@ -1426,6 +1491,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
            decoder_start_token_id,
            forced_bos_token_id,
            prefix_allowed_tokens_fn,
+            output_scores,
        )
    }

@ -1442,7 +1508,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// * `prefix_allowed_tokens_fn` - `Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>` Optional function to control the generation process. The function should take a `batch_id` (i64) and a tensor of token_ids already generated and returns a `Vec<i64>` of allowed tokens.
    ///
    /// # Returns
-    /// * `Vec<Vec<i64>>` Vector of Vector of generated token indices based on the prompts of length *number_of_prompts* x *num_return_sequences*.
+    /// * `Vec<IndicesOutput>` Vector of length *number_of_prompts* x *num_return_sequences* containing IndicesOutput with the generated indices and the generation score if `output_scores` is true.
    ///
    /// # Example
    ///
@ -1477,6 +1543,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    /// let max_length = 128;
    /// let decoder_start_token_id = None;
    /// let forced_bos_token_id = None;
+    /// let output_scores = true;
    ///
    /// //Example custom function for fine-grained generation control
    /// fn force_one_paragraph(_batch_id: i64, previous_token_ids: &Tensor) -> Vec<i64> {
@ -1503,6 +1570,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
    ///     decoder_start_token_id,
    ///     forced_bos_token_id,
    ///     Some(&force_one_paragraph),
+    ///     output_scores,
    /// );
    /// # Ok(())
    /// # }
@ -1516,7 +1584,8 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
        decoder_start_token_id: impl Into<Option<i64>>,
        forced_bos_token_id: impl Into<Option<i64>>,
        prefix_allowed_tokens_fn: Option<&dyn Fn(i64, &Tensor) -> Vec<i64>>,
-    ) -> Vec<Vec<i64>> {
+        output_scores: bool,
+    ) -> Vec<GeneratedIndicesOutput> {
        let eos_token_ids = PrivateLanguageGenerator::get_eos_ids(self).clone();

        let config = PrivateLanguageGenerator::get_config(self);
@ -1647,7 +1716,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
            forced_bos_token_id: forced_bos_token_id.into(),
        };

-        let decoded = no_grad(|| {
+        let (decoded, scores) = no_grad(|| {
            if num_beams > 1 {
                self.generate_beam_search(
                    input_ids,
@ -1657,6 +1726,7 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
                    attention_mask,
                    gen_opt,
                    prefix_allowed_tokens_fn,
+                    output_scores,
                )
            } else {
                self.generate_no_beam_search(
@ -1667,21 +1737,25 @@ pub trait LanguageGenerator<T: LMHeadModel, V: Vocab, U: Tokenizer<V>>:
                    attention_mask,
                    gen_opt,
                    prefix_allowed_tokens_fn,
+                    output_scores,
                )
            }
        });
        let num_sequences = *decoded.size().first().unwrap();
-        let mut output_ids = Vec::with_capacity(num_sequences as usize);
+        let mut output = Vec::with_capacity(num_sequences as usize);
        for sequence_index in 0..num_sequences {
-            let sequence_output_ids = decoded
+            let indices = decoded
                .as_ref()
                .get(sequence_index)
                .iter::<i64>()
                .unwrap()
                .collect::<Vec<i64>>();
-            output_ids.push(sequence_output_ids.clone());
+            let score = scores
+                .as_ref()
+                .map(|scores_value| scores_value[sequence_index as usize]);
+            output.push(GeneratedIndicesOutput { indices, score });
        }
-        output_ids
+        output
    }

    /// Returns a reference to the text generator's tokenizer
--- a/src/pipelines/sequence_classification.rs
+++ b/src/pipelines/sequence_classification.rs
@ -597,10 +597,10 @@ impl SequenceClassificationModel {
            );
            output.softmax(-1, Kind::Float).detach().to(Device::Cpu)
        });
-        let label_indices = output.as_ref().argmax(-1, true).squeeze1(1);
+        let label_indices = output.as_ref().argmax(-1, true).squeeze_dim(1);
        let scores = output
            .gather(1, &label_indices.unsqueeze(-1), false)
-            .squeeze1(1);
+            .squeeze_dim(1);
        let label_indices = label_indices.iter::<i64>().unwrap().collect::<Vec<i64>>();
        let scores = scores.iter::<f64>().unwrap().collect::<Vec<f64>>();

--- a/src/pipelines/summarization.rs
+++ b/src/pipelines/summarization.rs
@ -263,18 +263,62 @@ impl SummarizationOption {
        S: AsRef<[&'a str]>,
    {
        match *self {
-            Self::Bart(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
-            Self::T5(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
-            Self::ProphetNet(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
-            Self::Pegasus(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
+            Self::Bart(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
+            Self::T5(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
+            Self::ProphetNet(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
+            Self::Pegasus(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
        }
    }
 }
--- a/src/pipelines/text_generation.rs
+++ b/src/pipelines/text_generation.rs
@ -249,7 +249,8 @@ impl TextGenerationOption {
        S: AsRef<[&'a str]>,
    {
        match *self {
-            Self::GPT(ref model) => model.generate_indices(
+            Self::GPT(ref model) => model
+                .generate_indices(
                    prompt_texts,
                    attention_mask,
                    min_length,
@ -257,8 +258,13 @@ impl TextGenerationOption {
                    None,
                    None,
                    None,
-            ),
-            Self::GPT2(ref model) => model.generate_indices(
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
+            Self::GPT2(ref model) => model
+                .generate_indices(
                    prompt_texts,
                    attention_mask,
                    min_length,
@ -266,8 +272,13 @@ impl TextGenerationOption {
                    None,
                    None,
                    None,
-            ),
-            Self::GPTNeo(ref model) => model.generate_indices(
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
+            Self::GPTNeo(ref model) => model
+                .generate_indices(
                    prompt_texts,
                    attention_mask,
                    min_length,
@ -275,8 +286,13 @@ impl TextGenerationOption {
                    None,
                    None,
                    None,
-            ),
-            Self::XLNet(ref model) => model.generate_indices(
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
+            Self::XLNet(ref model) => model
+                .generate_indices(
                    prompt_texts,
                    attention_mask,
                    min_length,
@ -284,8 +300,13 @@ impl TextGenerationOption {
                    None,
                    None,
                    None,
-            ),
-            Self::Reformer(ref model) => model.generate_indices(
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
+            Self::Reformer(ref model) => model
+                .generate_indices(
                    prompt_texts,
                    attention_mask,
                    min_length,
@ -293,7 +314,11 @@ impl TextGenerationOption {
                    None,
                    None,
                    None,
-            ),
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.indices)
+                .collect(),
        }
    }
 }
--- a/src/pipelines/token_classification.rs
+++ b/src/pipelines/token_classification.rs
@ -692,7 +692,7 @@ impl TokenClassificationModel {
            )
        });
        let output = output.detach().to(Device::Cpu);
-        let score: Tensor = output.exp() / output.exp().sum1(&[-1], true, Float);
+        let score: Tensor = output.exp() / output.exp().sum_dim_intlist(&[-1], true, Float);
        let labels_idx = &score.argmax(-1, true);
        let mut tokens: Vec<Vec<Token>> = vec![];
        for sentence_idx in 0..labels_idx.size()[0] {
--- a/src/pipelines/translation.rs
+++ b/src/pipelines/translation.rs
@ -674,12 +674,34 @@ impl TranslationOption {
        S: AsRef<[&'a str]>,
    {
        match *self {
-            Self::Marian(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
-            Self::T5(ref model) => {
-                model.generate(prompt_texts, attention_mask, None, None, None, None, None)
-            }
+            Self::Marian(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
+            Self::T5(ref model) => model
+                .generate(
+                    prompt_texts,
+                    attention_mask,
+                    None,
+                    None,
+                    None,
+                    None,
+                    None,
+                    false,
+                )
+                .into_iter()
+                .map(|output| output.text)
+                .collect(),
        }
    }
 }
--- a/src/pipelines/zero_shot_classification.rs
+++ b/src/pipelines/zero_shot_classification.rs
@ -687,10 +687,10 @@ impl ZeroShotClassificationModel {
        });

        let scores = output.softmax(1, Float).select(-1, -1);
-        let label_indices = scores.as_ref().argmax(-1, true).squeeze1(1);
+        let label_indices = scores.as_ref().argmax(-1, true).squeeze_dim(1);
        let scores = scores
            .gather(1, &label_indices.unsqueeze(-1), false)
-            .squeeze1(1);
+            .squeeze_dim(1);
        let label_indices = label_indices.iter::<i64>().unwrap().collect::<Vec<i64>>();
        let scores = scores.iter::<f64>().unwrap().collect::<Vec<f64>>();

--- a/src/prophetnet/attention.rs
+++ b/src/prophetnet/attention.rs
@ -602,7 +602,7 @@ impl ProphetNetNgramAttention {
        let hidden_states_size = hidden_states.size();
        let (sequence_length, batch_size) = (hidden_states_size[0], hidden_states_size[1]);
        let calc_main_relative_position_buckets = if main_relative_position_buckets.is_none() {
-            let relative_positions = Tensor::arange1(
+            let relative_positions = Tensor::arange_start(
                1,
                attention_weights.size().last().unwrap() + 1,
                (Kind::Int64, hidden_states.device()),
@ -742,7 +742,7 @@ pub(crate) fn compute_relative_buckets(
        (
            num_buckets,
            relative_positions.zeros_like(),
-            inverse_relative_positions.max1(&inverse_relative_positions.zeros_like()),
+            inverse_relative_positions.max_other(&inverse_relative_positions.zeros_like()),
        )
    };
    let max_exact = num_buckets / 2;
@ -754,10 +754,10 @@ pub(crate) fn compute_relative_buckets(
        + max_exact_f64;

    let val_if_large = val_if_large
-        .min1(&(val_if_large.ones_like() * (num_buckets as f64 - 1.0)))
+        .min_other(&(val_if_large.ones_like() * (num_buckets as f64 - 1.0)))
        .totype(Kind::Int64);

-    relative_positions_bucket + inverse_relative_positions.where1(&is_small, &val_if_large)
+    relative_positions_bucket + inverse_relative_positions.where_self(&is_small, &val_if_large)
 }

 pub(crate) fn compute_all_stream_relative_buckets(
--- a/src/prophetnet/decoder.rs
+++ b/src/prophetnet/decoder.rs
@ -308,9 +308,9 @@ impl ProphetNetDecoder {

        let hidden_states = (input_embeds + main_stream_pos_embed).transpose(0, 1);

-        let (mut ngram_hidden_states, extended_attention_mask, extended_predict_attention_mask) =
-            if old_layer_states.is_some() {
+        let (mut ngram_hidden_states, extended_attention_mask, extended_predict_attention_mask) = {
            let mut ngram_hidden_states = Vec::with_capacity(self.ngram as usize);
+            if old_layer_states.is_some() {
                for ngram in 0..self.ngram {
                    ngram_hidden_states.push(
                        (&self.ngram_embeddings.get(ngram - 1) + &predicting_stream_pos_embed)
@ -320,7 +320,6 @@ impl ProphetNetDecoder {
                }
                (ngram_hidden_states, None, None)
            } else {
-                let mut ngram_hidden_states = Vec::with_capacity(self.ngram as usize);
                for ngram in 0..self.ngram {
                    ngram_hidden_states.push(
                        (&self.ngram_embeddings.get(ngram - 1) + &predicting_stream_pos_embed)
@ -336,6 +335,7 @@ impl ProphetNetDecoder {
                    Some(extended_attention_mask),
                    Some(extended_predict_attention_mask),
                )
+            }
        };

        let extended_encoder_attention_mask =
@ -510,7 +510,7 @@ impl ProphetNetDecoder {
        let input_size = position_ids.size();
        let (batch_size, sequence_length) = (input_size[0], input_size[1]);

-        let position_ids = Tensor::arange1(
+        let position_ids = Tensor::arange_start(
            1,
            self.max_target_positions,
            (Kind::Int64, position_ids.device()),
--- a/src/reformer/attention.rs
+++ b/src/reformer/attention.rs
@ -307,7 +307,7 @@ impl LSHSelfAttention {
                    .unsqueeze(1)
                    .expand(&buckets.size(), true)
                    .to_kind(Kind::Bool);
-                buckets = buckets.where1(
+                buckets = buckets.where_self(
                    &buckets_mask,
                    &Tensor::of_slice(&[num_buckets - 1])
                        .to_kind(Kind::Float)
@ -423,15 +423,16 @@ impl LSHSelfAttention {
            );

            if let Some(mask) = mask {
-                query_key_dots = query_key_dots.where1(&mask.to_kind(Kind::Bool), &self.mask_value);
+                query_key_dots =
+                    query_key_dots.where_self(&mask.to_kind(Kind::Bool), &self.mask_value);
            }
        }
        {
            let self_mask = query_bucket_idx
                .unsqueeze(-1)
-                .ne1(&key_value_bucket_idx.unsqueeze(-2));
+                .ne_tensor(&key_value_bucket_idx.unsqueeze(-2));
            query_key_dots =
-                query_key_dots.where1(&self_mask.to_kind(Kind::Bool), &self.self_mask_value);
+                query_key_dots.where_self(&self_mask.to_kind(Kind::Bool), &self.self_mask_value);
        }

        let mut logits = query_key_dots.logsumexp(&[-1], true);
@ -441,7 +442,7 @@ impl LSHSelfAttention {

        let mut out_vectors = attention_probs.matmul(&value_vectors);
        if out_vectors.dim() > 4 {
-            logits = logits.flatten(2, 3).squeeze1(-1);
+            logits = logits.flatten(2, 3).squeeze_dim(-1);
            out_vectors = out_vectors.flatten(2, 3)
        }

@ -476,7 +477,9 @@ impl LSHSelfAttention {
        };

        if self.is_decoder {
-            let causal_mask = query_indices.unsqueeze(-1).ge1(&key_indices.unsqueeze(-2));
+            let causal_mask = query_indices
+                .unsqueeze(-1)
+                .ge_tensor(&key_indices.unsqueeze(-2));
            let attention_mask = if let Some(attention_mask) = attention_mask {
                causal_mask * attention_mask
            } else {
@ -534,7 +537,10 @@ impl LSHSelfAttention {
                    *relevant_bucket_indices_chunk.size().last().unwrap(),
                    (Kind::Int64, hidden_states.device()),
                )
-                .floor_divide1(*relevant_bucket_indices_chunk.size().last().unwrap()));
+                .divide_scalar_mode(
+                    *relevant_bucket_indices_chunk.size().last().unwrap(),
+                    "floor",
+                ));

        let relevant_bucket_indices_chunk_all_batch =
            &relevant_bucket_indices_chunk + bucket_indices_batch_offset;
@ -566,7 +572,9 @@ impl LSHSelfAttention {
        indices: &Tensor,
        sequence_length: i64,
    ) -> Tensor {
-        let start_indices_chunk = (indices.select(1, -1).floor_divide1(self.chunk_length)
+        let start_indices_chunk = (indices
+            .select(1, -1)
+            .divide_scalar_mode(self.chunk_length, "floor")
            - self.num_chunks_before)
            * self.chunk_length;
        let total_chunk_size =
@ -593,7 +601,7 @@ impl LSHSelfAttention {
    }

    fn len_norm(&self, input_tensor: &Tensor, epsilon: f64) -> Tensor {
-        let variance = (input_tensor * input_tensor).mean1(&[-1], true, input_tensor.kind());
+        let variance = (input_tensor * input_tensor).mean_dim(&[-1], true, input_tensor.kind());
        input_tensor * (variance + epsilon).rsqrt()
    }

@ -850,7 +858,7 @@ impl LSHSelfAttention {
            )?
            .unsqueeze(-1);
            let probs_vectors = (&logits - &logits.logsumexp(&[2], true)).exp();
-            out_vectors = (out_vectors * probs_vectors).sum1(&[2], false, Kind::Float);
+            out_vectors = (out_vectors * probs_vectors).sum_dim_intlist(&[2], false, Kind::Float);
        }

        out_vectors = merge_hidden_size_dim(
@ -967,7 +975,9 @@ impl LocalSelfAttention {
        });

        if self.is_decoder {
-            let causal_mask = query_indices.unsqueeze(-1).ge1(&key_indices.unsqueeze(-2));
+            let causal_mask = query_indices
+                .unsqueeze(-1)
+                .ge_tensor(&key_indices.unsqueeze(-2));
            attention_mask = Some(if let Some(mask) = attention_mask {
                causal_mask * mask
            } else {
@ -1087,7 +1097,7 @@ impl LocalSelfAttention {
        );

        if let Some(mask) = attention_mask {
-            query_key_dots = query_key_dots.where1(&mask.to_kind(Kind::Bool), &self.mask_value);
+            query_key_dots = query_key_dots.where_self(&mask.to_kind(Kind::Bool), &self.mask_value);
        }

        let logits = query_key_dots.logsumexp(&[-1], true);
--- a/src/reformer/embeddings.rs
+++ b/src/reformer/embeddings.rs
@ -263,10 +263,9 @@ impl ReformerEmbeddings {

        let calc_position_ids = if position_ids.is_none() {
            Some(
-                Tensor::arange2(
+                Tensor::arange_start(
                    start_ids_pos_encoding,
                    start_ids_pos_encoding + input_shape[1],
-                    1,
                    (Kind::Int64, device),
                )
                .unsqueeze(0)
--- a/src/reformer/reformer_model.rs
+++ b/src/reformer/reformer_model.rs
@ -418,10 +418,9 @@ impl ReformerModel {
            let input_ids = Tensor::cat(&[input_ids, &input_ids_padding], -1);
            new_input_shape = input_ids.size();
            let position_ids = if let Some(position_ids) = position_ids {
-                let position_ids_padding = Tensor::arange2(
+                let position_ids_padding = Tensor::arange_start(
                    *input_shape.last().unwrap(),
                    self.least_common_mult_chunk_length,
-                    1,
                    (Kind::Int64, device),
                )
                .unsqueeze(0)
@ -972,8 +971,8 @@ impl ReformerForQuestionAnswering {
            .apply(&self.qa_outputs)
            .split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        Ok(ReformerQuestionAnsweringModelOutput {
            start_logits,
--- a/src/roberta/embeddings.rs
+++ b/src/roberta/embeddings.rs
@ -39,7 +39,7 @@ impl RobertaEmbeddings {
    fn create_position_ids_from_embeddings(&self, x: &Tensor) -> Tensor {
        let input_shape = x.size();
        let input_shape = vec![input_shape[0], input_shape[1]];
-        let position_ids: Tensor = Tensor::arange1(
+        let position_ids: Tensor = Tensor::arange_start(
            self.padding_index + 1,
            input_shape[0],
            (Kind::Int64, x.device()),
--- a/src/roberta/roberta_model.rs
+++ b/src/roberta/roberta_model.rs
@ -961,8 +961,8 @@ impl RobertaForQuestionAnswering {
        let sequence_output = base_model_output.hidden_state.apply(&self.qa_outputs);
        let logits = sequence_output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        RobertaQuestionAnsweringOutput {
            start_logits,
--- a/src/t5/attention.rs
+++ b/src/t5/attention.rs
@ -254,7 +254,7 @@ impl T5Attention {
            ret += n.lt(0).to_kind(Kind::Int64) * num_buckets;
            n.abs()
        } else {
-            n.max1(&n.zeros_like())
+            n.max_other(&n.zeros_like())
        };

        let max_exact = num_buckets / 2;
@ -266,8 +266,8 @@ impl T5Attention {
            .to_kind(Kind::Int64)
            + max_exact;

-        let value_if_large = value_if_large.min1(&value_if_large.full_like(num_buckets - 1));
-        ret += n.where1(&is_small, &value_if_large);
+        let value_if_large = value_if_large.min_other(&value_if_large.full_like(num_buckets - 1));
+        ret += n.where_self(&is_small, &value_if_large);
        ret
    }

--- a/src/t5/encoder.rs
+++ b/src/t5/encoder.rs
@ -328,7 +328,7 @@ impl T5Stack {
                        input_shape[1],
                        1,
                    ]);
-                    let causal_mask = causal_mask.le1(&seq_ids.unsqueeze(0).unsqueeze(-1));
+                    let causal_mask = causal_mask.le_tensor(&seq_ids.unsqueeze(0).unsqueeze(-1));
                    causal_mask.unsqueeze(1) * attention_mask.unsqueeze(1).unsqueeze(1)
                } else {
                    attention_mask.unsqueeze(1).unsqueeze(1)
--- a/src/t5/layer_norm.rs
+++ b/src/t5/layer_norm.rs
@ -32,7 +32,7 @@ impl T5LayerNorm {

 impl Module for T5LayerNorm {
    fn forward(&self, x: &Tensor) -> Tensor {
-        let variance = x.pow(2f64).mean1(&[-1], true, Kind::Float);
+        let variance = x.pow(2f64).mean_dim(&[-1], true, Kind::Float);
        let x = x / (variance + self.epsilon).sqrt();
        &self.weight * x
    }
--- a/src/xlnet/xlnet_model.rs
+++ b/src/xlnet/xlnet_model.rs
@ -287,13 +287,16 @@ impl XLNetModel {
        batch_size: Option<i64>,
        device: Device,
    ) -> Tensor {
-        let frequency_sequence = Tensor::arange2(0, self.d_model, 2, (Kind::Float, device));
-        let inverse_frequency = 1f64 / Tensor::pow2(10000f64, &(frequency_sequence / self.d_model));
+        let frequency_sequence =
+            Tensor::arange_start_step(0, self.d_model, 2, (Kind::Float, device));
+        let inverse_frequency =
+            1f64 / Tensor::pow_scalar(10000f64, &(frequency_sequence / self.d_model));
        let (begin, end) = match self.attention_type {
            AttentionType::bi => (k_len, -q_len),
            AttentionType::uni => (k_len, -1),
        };
-        let mut forward_positions_sequence = Tensor::arange2(begin, end, -1, (Kind::Float, device));
+        let mut forward_positions_sequence =
+            Tensor::arange_start_step(begin, end, -1, (Kind::Float, device));
        match self.clamp_len {
            Some(clamp_value) if clamp_value > 0 => {
                let _ = forward_positions_sequence.clamp_(-clamp_value, clamp_value);
@ -302,7 +305,7 @@ impl XLNetModel {
        }
        if self.bi_data {
            let mut backward_positions_sequence =
-                Tensor::arange2(-begin, -end, 1, (Kind::Float, device));
+                Tensor::arange_start(-begin, -end, (Kind::Float, device));
            match self.clamp_len {
                Some(clamp_value) if clamp_value > 0 => {
                    let _ = backward_positions_sequence.clamp_(-clamp_value, clamp_value);
@ -512,7 +515,7 @@ impl XLNetModel {
            };
            let seg_mat = token_type_ids_value
                .unsqueeze(-1)
-                .ne1(&cat_ids.unsqueeze(0))
+                .ne_tensor(&cat_ids.unsqueeze(0))
                .to_kind(Kind::Int64);
            Some(seg_mat.one_hot(2).to_kind(Kind::Float))
        } else {
@ -1461,8 +1464,8 @@ impl XLNetForQuestionAnswering {
        let sequence_output = base_model_output.hidden_state.apply(&self.qa_outputs);
        let logits = sequence_output.split(1, -1);
        let (start_logits, end_logits) = (&logits[0], &logits[1]);
-        let start_logits = start_logits.squeeze1(-1);
-        let end_logits = end_logits.squeeze1(-1);
+        let start_logits = start_logits.squeeze_dim(-1);
+        let end_logits = end_logits.squeeze_dim(-1);

        XLNetQuestionAnsweringOutput {
            start_logits,
--- a/tests/gpt2.rs
+++ b/tests/gpt2.rs
@ -428,17 +428,20 @@ fn gpt2_prefix_allowed_token_greedy() -> anyhow::Result<()> {
        None,
        None,
        Some(&force_one_paragraph),
+        true,
    );

    assert_eq!(output.len(), 2);
    assert_eq!(
-        output[0],
+        output[0].text,
        "Rust is a very simple and powerful library for building and running web applications. It is a simple, fast, and lightweight library that can be used to build web applications in a number of different ways.\n"
    );
+    assert!((output[0].score.unwrap() - (-1.4666)).abs() < 1e-4);
    assert_eq!(
-        output[1],
+        output[1].text,
        "There was a urn in the room, and I was sitting on it. I was like, \'What the hell is going on?\' And he said, \'Well, I\'m not sure. I\'m just going to go back to my room and get some coffee.\' And"
    );
+    assert!((output[1].score.unwrap() - (-1.3545)).abs() < 1e-4);

    Ok(())
 }
@ -493,17 +496,20 @@ fn gpt2_prefix_allowed_token_beam_search() -> anyhow::Result<()> {
        None,
        None,
        Some(&force_one_paragraph),
+        true,
    );

    assert_eq!(output.len(), 2);
    assert_eq!(
-        output[0],
+        output[0].text,
        "Rust is a simple, fast, and easy-to-use framework for building web applications. It is designed to be easy to use and maintain, and"
    );
+    assert!((output[0].score.unwrap() - (-1.2750)).abs() < 1e-4);
    assert_eq!(
-        output[1],
+        output[1].text,
        "There was a urn in the back of the room, and I was sitting on it, and it looked like it was going to explode. And then I"
    );
+    assert!((output[1].score.unwrap() - (-1.3326)).abs() < 1e-4);

    Ok(())
 }
--- a/tests/mbart.rs
+++ b/tests/mbart.rs
@ -97,11 +97,12 @@ fn mbart_translation() -> anyhow::Result<()> {
        None,
        target_language,
        None,
+        false,
    );

    assert_eq!(output.len(), 1);
    assert_eq!(
-        output[0],
+        output[0].text,
        "de_DE Der schnelle braune Fuchs springt über den faulen Hund."
    );