Updated Albert (clippy warnings)

2024-08-17 08:30:25 +03:00 · 2020-09-12 15:11:56 +02:00 · 2020-09-12 15:11:56 +02:00 · daa6dba2d2
commit daa6dba2d2
parent eee7985dce
9 changed files with 134 additions and 88 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,6 +1,6 @@
 [package]
 name = "rust-bert"
-version = "0.9.0"
+version = "0.10.0"
 authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
 edition = "2018"
 description = "Ready-to-use NLP pipelines and transformer-based models (BERT, DistilBERT, GPT2,...)"
--- a/examples/albert.rs
+++ b/examples/albert.rs
@ -70,12 +70,23 @@ fn main() -> anyhow::Result<()> {
    let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);

    //    Forward pass
-    let (output, _, _) =
+    let model_output =
        no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));
-    println!("{:?}", output.double_value(&[0, 0, 0]));
+    println!(
+        "{:?}",
+        model_output.prediction_scores.double_value(&[0, 0, 0])
+    );
    //    Print masked tokens
-    let index_1 = output.get(0).get(4).argmax(0, false);
-    let index_2 = output.get(1).get(7).argmax(0, false);
+    let index_1 = model_output
+        .prediction_scores
+        .get(0)
+        .get(4)
+        .argmax(0, false);
+    let index_2 = model_output
+        .prediction_scores
+        .get(1)
+        .get(7)
+        .argmax(0, false);
    let word_1 = tokenizer.vocab().id_to_token(&index_1.int64_value(&[]));
    let word_2 = tokenizer.vocab().id_to_token(&index_2.int64_value(&[]));

--- a/src/albert/albert_model.rs
+++ b/src/albert/albert_model.rs
@ -209,7 +209,7 @@ impl AlbertModel {
    /// let position_ids = Tensor::arange(sequence_length, (Int64, device))
    ///     .expand(&[batch_size, sequence_length], true);
    ///
-    /// let (output, pooled_output, all_hidden_states, all_attentions) = no_grad(|| {
+    /// let model_output = no_grad(|| {
    ///     albert_model
    ///         .forward_t(
    ///             Some(input_tensor),
@ -268,18 +268,20 @@ impl AlbertModel {
            }
        };

-        let (hidden_state, all_hidden_states, all_attentions) =
+        let transformer_output =
            self.encoder
                .forward_t(&embedding_output, Some(extended_attention_mask), train);

-        let pooled_output = self.pooler.forward(&hidden_state.select(1, 0));
+        let pooled_output = self
+            .pooler
+            .forward(&transformer_output.hidden_state.select(1, 0));
        let pooled_output = (self.pooler_activation)(&pooled_output);

        Ok(AlbertOutput {
-            hidden_state,
+            hidden_state: transformer_output.hidden_state,
            pooled_output,
-            all_hidden_states,
-            all_attentions,
+            all_hidden_states: transformer_output.all_hidden_states,
+            all_attentions: transformer_output.all_attentions,
        })
    }
 }
@ -429,7 +431,7 @@ impl AlbertForMaskedLM {
    /// let position_ids = Tensor::arange(sequence_length, (Int64, device))
    ///     .expand(&[batch_size, sequence_length], true);
    ///
-    /// let (output, all_hidden_states, all_attentions) = no_grad(|| {
+    /// let masked_lm_output = no_grad(|| {
    ///     albert_model.forward_t(
    ///         Some(input_tensor),
    ///         Some(mask),
@ -448,7 +450,7 @@ impl AlbertForMaskedLM {
        position_ids: Option<Tensor>,
        input_embeds: Option<Tensor>,
        train: bool,
-    ) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
+    ) -> AlbertMaskedLMOutput {
        let base_model_output = self
            .albert
            .forward_t(
@ -461,11 +463,11 @@ impl AlbertForMaskedLM {
            )
            .unwrap();
        let prediction_scores = self.predictions.forward(&base_model_output.hidden_state);
-        (
+        AlbertMaskedLMOutput {
            prediction_scores,
-            base_model_output.all_hidden_states,
-            base_model_output.all_attentions,
-        )
+            all_hidden_states: base_model_output.all_hidden_states,
+            all_attentions: base_model_output.all_attentions,
+        }
    }
 }

@ -571,7 +573,7 @@ impl AlbertForSequenceClassification {
    ///  let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
    ///  let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
    ///
-    ///  let (output, all_hidden_states, all_attentions) = no_grad(|| {
+    ///  let classification_output = no_grad(|| {
    ///    albert_model
    ///         .forward_t(Some(input_tensor),
    ///                    Some(mask),
@ -589,7 +591,7 @@ impl AlbertForSequenceClassification {
        position_ids: Option<Tensor>,
        input_embeds: Option<Tensor>,
        train: bool,
-    ) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
+    ) -> AlbertSequenceClassificationOutput {
        let base_model_output = self
            .albert
            .forward_t(
@ -605,11 +607,11 @@ impl AlbertForSequenceClassification {
            .pooled_output
            .apply_t(&self.dropout, train)
            .apply(&self.classifier);
-        (
+        AlbertSequenceClassificationOutput {
            logits,
-            base_model_output.all_hidden_states,
-            base_model_output.all_attentions,
-        )
+            all_hidden_states: base_model_output.all_hidden_states,
+            all_attentions: base_model_output.all_attentions,
+        }
    }
 }

@ -712,7 +714,7 @@ impl AlbertForTokenClassification {
    ///  let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
    ///  let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
    ///
-    ///  let (output, all_hidden_states, all_attentions) = no_grad(|| {
+    ///  let model_output = no_grad(|| {
    ///    albert_model
    ///         .forward_t(Some(input_tensor),
    ///                    Some(mask),
@ -730,7 +732,7 @@ impl AlbertForTokenClassification {
        position_ids: Option<Tensor>,
        input_embeds: Option<Tensor>,
        train: bool,
-    ) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
+    ) -> AlbertTokenClassificationOutput {
        let base_model_output = self
            .albert
            .forward_t(
@ -746,11 +748,11 @@ impl AlbertForTokenClassification {
            .hidden_state
            .apply_t(&self.dropout, train)
            .apply(&self.classifier);
-        (
+        AlbertTokenClassificationOutput {
            logits,
-            base_model_output.all_hidden_states,
-            base_model_output.all_attentions,
-        )
+            all_hidden_states: base_model_output.all_hidden_states,
+            all_attentions: base_model_output.all_attentions,
+        }
    }
 }

@ -843,7 +845,7 @@ impl AlbertForQuestionAnswering {
    ///  let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
    ///  let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
    ///
-    ///  let (start_logits, end_logits, all_hidden_states, all_attentions) = no_grad(|| {
+    ///  let model_output = no_grad(|| {
    ///    albert_model
    ///         .forward_t(Some(input_tensor),
    ///                    Some(mask),
@ -861,12 +863,7 @@ impl AlbertForQuestionAnswering {
        position_ids: Option<Tensor>,
        input_embeds: Option<Tensor>,
        train: bool,
-    ) -> (
-        Tensor,
-        Tensor,
-        Option<Vec<Tensor>>,
-        Option<Vec<Vec<Tensor>>>,
-    ) {
+    ) -> AlbertQuestionAnsweringOutput {
        let base_model_output = self
            .albert
            .forward_t(
@ -886,12 +883,12 @@ impl AlbertForQuestionAnswering {
        let start_logits = start_logits.squeeze1(-1);
        let end_logits = end_logits.squeeze1(-1);

-        (
+        AlbertQuestionAnsweringOutput {
            start_logits,
            end_logits,
-            base_model_output.all_hidden_states,
-            base_model_output.all_attentions,
-        )
+            all_hidden_states: base_model_output.all_hidden_states,
+            all_attentions: base_model_output.all_attentions,
+        }
    }
 }

@ -990,7 +987,7 @@ impl AlbertForMultipleChoice {
    ///  let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
    ///  let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
    ///
-    ///  let (output, all_hidden_states, all_attentions) = no_grad(|| {
+    ///  let model_output = no_grad(|| {
    ///    albert_model
    ///         .forward_t(Some(input_tensor),
    ///                    Some(mask),
@ -1008,7 +1005,7 @@ impl AlbertForMultipleChoice {
        position_ids: Option<Tensor>,
        input_embeds: Option<Tensor>,
        train: bool,
-    ) -> Result<(Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>), &'static str> {
+    ) -> Result<AlbertSequenceClassificationOutput, &'static str> {
        let (input_ids, input_embeds, num_choices) = match &input_ids {
            Some(input_value) => match &input_embeds {
                Some(_) => {
@ -1062,10 +1059,35 @@ impl AlbertForMultipleChoice {
            .apply(&self.classifier)
            .view((-1, num_choices));

-        Ok((
+        Ok(AlbertSequenceClassificationOutput {
            logits,
-            base_model_output.all_hidden_states,
-            base_model_output.all_attentions,
-        ))
+            all_hidden_states: base_model_output.all_hidden_states,
+            all_attentions: base_model_output.all_attentions,
+        })
    }
 }
+
+pub struct AlbertMaskedLMOutput {
+    pub prediction_scores: Tensor,
+    pub all_hidden_states: Option<Vec<Tensor>>,
+    pub all_attentions: Option<Vec<Vec<Tensor>>>,
+}
+
+pub struct AlbertSequenceClassificationOutput {
+    pub logits: Tensor,
+    pub all_hidden_states: Option<Vec<Tensor>>,
+    pub all_attentions: Option<Vec<Vec<Tensor>>>,
+}
+
+pub struct AlbertTokenClassificationOutput {
+    pub logits: Tensor,
+    pub all_hidden_states: Option<Vec<Tensor>>,
+    pub all_attentions: Option<Vec<Vec<Tensor>>>,
+}
+
+pub struct AlbertQuestionAnsweringOutput {
+    pub start_logits: Tensor,
+    pub end_logits: Tensor,
+    pub all_hidden_states: Option<Vec<Tensor>>,
+    pub all_attentions: Option<Vec<Vec<Tensor>>>,
+}
--- a/src/albert/encoder.rs
+++ b/src/albert/encoder.rs
@ -149,22 +149,17 @@ impl AlbertLayerGroup {

        let mut hidden_state = hidden_states.copy();
        let mut attention_weights: Option<Tensor>;
-        let mut layers = self.layers.iter();
-        loop {
-            match layers.next() {
-                Some(layer) => {
-                    if let Some(hidden_states) = all_hidden_states.borrow_mut() {
-                        hidden_states.push(hidden_state.as_ref().copy());
-                    };

-                    let temp = layer.forward_t(&hidden_state, &mask, train);
-                    hidden_state = temp.0;
-                    attention_weights = temp.1;
-                    if let Some(attentions) = all_attentions.borrow_mut() {
-                        attentions.push(attention_weights.as_ref().unwrap().copy());
-                    };
-                }
-                None => break,
+        for layer in &self.layers {
+            if let Some(hidden_states) = all_hidden_states.borrow_mut() {
+                hidden_states.push(hidden_state.as_ref().copy());
+            };
+
+            let temp = layer.forward_t(&hidden_state, &mask, train);
+            hidden_state = temp.0;
+            attention_weights = temp.1;
+            if let Some(attentions) = all_attentions.borrow_mut() {
+                attentions.push(attention_weights.as_ref().unwrap().copy());
            };
        }

@ -226,7 +221,7 @@ impl AlbertTransformer {
        hidden_states: &Tensor,
        mask: Option<Tensor>,
        train: bool,
-    ) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
+    ) -> AlbertTransformerOutput {
        let mut hidden_state = hidden_states.apply(&self.embedding_hidden_mapping_in);

        let mut all_hidden_states: Option<Vec<Tensor>> = if self.output_hidden_states {
@ -256,6 +251,16 @@ impl AlbertTransformer {
            };
        }

-        (hidden_state, all_hidden_states, all_attentions)
+        AlbertTransformerOutput {
+            hidden_state,
+            all_hidden_states,
+            all_attentions,
+        }
    }
 }
+
+pub struct AlbertTransformerOutput {
+    pub hidden_state: Tensor,
+    pub all_hidden_states: Option<Vec<Tensor>>,
+    pub all_attentions: Option<Vec<Vec<Tensor>>>,
+}
--- a/src/pipelines/question_answering.rs
+++ b/src/pipelines/question_answering.rs
@ -370,7 +370,7 @@ impl QuestionAnsweringOption {
            }
            Self::Albert(ref model) => {
                let outputs = model.forward_t(input_ids, mask, None, None, input_embeds, train);
-                (outputs.0, outputs.1)
+                (outputs.start_logits, outputs.end_logits)
            }
        }
    }
--- a/src/pipelines/sequence_classification.rs
+++ b/src/pipelines/sequence_classification.rs
@ -343,7 +343,7 @@ impl SequenceClassificationOption {
                        input_embeds,
                        train,
                    )
-                    .0
+                    .logits
            }
        }
    }
--- a/src/pipelines/token_classification.rs
+++ b/src/pipelines/token_classification.rs
@ -450,7 +450,7 @@ impl TokenClassificationOption {
                        input_embeds,
                        train,
                    )
-                    .0
+                    .logits
            }
        }
    }
--- a/src/pipelines/zero_shot_classification.rs
+++ b/src/pipelines/zero_shot_classification.rs
@ -374,7 +374,7 @@ impl ZeroShotClassificationOption {
                        input_embeds,
                        train,
                    )
-                    .0
+                    .logits
            }
        }
    }
--- a/tests/albert.rs
+++ b/tests/albert.rs
@ -61,18 +61,26 @@ fn albert_masked_lm() -> anyhow::Result<()> {
    let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);

    //    Forward pass
-    let (output, _, _) =
+    let model_output =
        no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));

    //    Print masked tokens
-    let index_1 = output.get(0).get(4).argmax(0, false);
-    let index_2 = output.get(1).get(6).argmax(0, false);
+    let index_1 = model_output
+        .prediction_scores
+        .get(0)
+        .get(4)
+        .argmax(0, false);
+    let index_2 = model_output
+        .prediction_scores
+        .get(1)
+        .get(6)
+        .argmax(0, false);
    let word_1 = tokenizer.vocab().id_to_token(&index_1.int64_value(&[]));
    let word_2 = tokenizer.vocab().id_to_token(&index_2.int64_value(&[]));

    assert_eq!("▁them", word_1); // Outputs "_them" : "Looks like one [them] is missing (? this is identical with the original implementation)"
    assert_eq!("▁grapes", word_2); // Outputs "grapes" : "It\'s like comparing [grapes] to apples"
-    assert!((output.double_value(&[0, 0, 0]) - 4.6143).abs() < 1e-4);
+    assert!((model_output.prediction_scores.double_value(&[0, 0, 0]) - 4.6143).abs() < 1e-4);
    Ok(())
 }

@ -127,17 +135,17 @@ fn albert_for_sequence_classification() -> anyhow::Result<()> {
    let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);

    //    Forward pass
-    let (output, all_hidden_states, all_attentions) =
+    let model_output =
        no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));

-    assert_eq!(output.size(), &[2, 3]);
+    assert_eq!(model_output.logits.size(), &[2, 3]);
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_hidden_states.unwrap().len()
+        model_output.all_hidden_states.unwrap().len()
    );
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_attentions.unwrap().len()
+        model_output.all_attentions.unwrap().len()
    );

    Ok(())
@ -191,20 +199,20 @@ fn albert_for_multiple_choice() -> anyhow::Result<()> {
        .unsqueeze(0);

    //    Forward pass
-    let (output, all_hidden_states, all_attentions) = no_grad(|| {
+    let model_output = no_grad(|| {
        albert_model
            .forward_t(Some(input_tensor), None, None, None, None, false)
            .unwrap()
    });

-    assert_eq!(output.size(), &[1, 2]);
+    assert_eq!(model_output.logits.size(), &[1, 2]);
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_hidden_states.unwrap().len()
+        model_output.all_hidden_states.unwrap().len()
    );
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_attentions.unwrap().len()
+        model_output.all_attentions.unwrap().len()
    );

    Ok(())
@ -262,17 +270,17 @@ fn albert_for_token_classification() -> anyhow::Result<()> {
    let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);

    //    Forward pass
-    let (output, all_hidden_states, all_attentions) =
+    let model_output =
        no_grad(|| bert_model.forward_t(Some(input_tensor), None, None, None, None, false));

-    assert_eq!(output.size(), &[2, 12, 4]);
+    assert_eq!(model_output.logits.size(), &[2, 12, 4]);
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_hidden_states.unwrap().len()
+        model_output.all_hidden_states.unwrap().len()
    );
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_attentions.unwrap().len()
+        model_output.all_attentions.unwrap().len()
    );

    Ok(())
@ -324,18 +332,18 @@ fn albert_for_question_answering() -> anyhow::Result<()> {
    let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);

    //    Forward pass
-    let (start_scores, end_scores, all_hidden_states, all_attentions) =
+    let model_output =
        no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));

-    assert_eq!(start_scores.size(), &[2, 12]);
-    assert_eq!(end_scores.size(), &[2, 12]);
+    assert_eq!(model_output.start_logits.size(), &[2, 12]);
+    assert_eq!(model_output.end_logits.size(), &[2, 12]);
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_hidden_states.unwrap().len()
+        model_output.all_hidden_states.unwrap().len()
    );
    assert_eq!(
        config.num_hidden_layers as usize,
-        all_attentions.unwrap().len()
+        model_output.all_attentions.unwrap().len()
    );

    Ok(())