Updated Albert (clippy warnings)

This commit is contained in:
Guillaume B 2020-09-12 15:11:56 +02:00
parent eee7985dce
commit daa6dba2d2
9 changed files with 134 additions and 88 deletions

View File

@ -1,6 +1,6 @@
[package]
name = "rust-bert"
version = "0.9.0"
version = "0.10.0"
authors = ["Guillaume Becquin <guillaume.becquin@gmail.com>"]
edition = "2018"
description = "Ready-to-use NLP pipelines and transformer-based models (BERT, DistilBERT, GPT2,...)"

View File

@ -70,12 +70,23 @@ fn main() -> anyhow::Result<()> {
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let (output, _, _) =
let model_output =
no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));
println!("{:?}", output.double_value(&[0, 0, 0]));
println!(
"{:?}",
model_output.prediction_scores.double_value(&[0, 0, 0])
);
// Print masked tokens
let index_1 = output.get(0).get(4).argmax(0, false);
let index_2 = output.get(1).get(7).argmax(0, false);
let index_1 = model_output
.prediction_scores
.get(0)
.get(4)
.argmax(0, false);
let index_2 = model_output
.prediction_scores
.get(1)
.get(7)
.argmax(0, false);
let word_1 = tokenizer.vocab().id_to_token(&index_1.int64_value(&[]));
let word_2 = tokenizer.vocab().id_to_token(&index_2.int64_value(&[]));

View File

@ -209,7 +209,7 @@ impl AlbertModel {
/// let position_ids = Tensor::arange(sequence_length, (Int64, device))
/// .expand(&[batch_size, sequence_length], true);
///
/// let (output, pooled_output, all_hidden_states, all_attentions) = no_grad(|| {
/// let model_output = no_grad(|| {
/// albert_model
/// .forward_t(
/// Some(input_tensor),
@ -268,18 +268,20 @@ impl AlbertModel {
}
};
let (hidden_state, all_hidden_states, all_attentions) =
let transformer_output =
self.encoder
.forward_t(&embedding_output, Some(extended_attention_mask), train);
let pooled_output = self.pooler.forward(&hidden_state.select(1, 0));
let pooled_output = self
.pooler
.forward(&transformer_output.hidden_state.select(1, 0));
let pooled_output = (self.pooler_activation)(&pooled_output);
Ok(AlbertOutput {
hidden_state,
hidden_state: transformer_output.hidden_state,
pooled_output,
all_hidden_states,
all_attentions,
all_hidden_states: transformer_output.all_hidden_states,
all_attentions: transformer_output.all_attentions,
})
}
}
@ -429,7 +431,7 @@ impl AlbertForMaskedLM {
/// let position_ids = Tensor::arange(sequence_length, (Int64, device))
/// .expand(&[batch_size, sequence_length], true);
///
/// let (output, all_hidden_states, all_attentions) = no_grad(|| {
/// let masked_lm_output = no_grad(|| {
/// albert_model.forward_t(
/// Some(input_tensor),
/// Some(mask),
@ -448,7 +450,7 @@ impl AlbertForMaskedLM {
position_ids: Option<Tensor>,
input_embeds: Option<Tensor>,
train: bool,
) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
) -> AlbertMaskedLMOutput {
let base_model_output = self
.albert
.forward_t(
@ -461,11 +463,11 @@ impl AlbertForMaskedLM {
)
.unwrap();
let prediction_scores = self.predictions.forward(&base_model_output.hidden_state);
(
AlbertMaskedLMOutput {
prediction_scores,
base_model_output.all_hidden_states,
base_model_output.all_attentions,
)
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
@ -571,7 +573,7 @@ impl AlbertForSequenceClassification {
/// let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
/// let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
///
/// let (output, all_hidden_states, all_attentions) = no_grad(|| {
/// let classification_output = no_grad(|| {
/// albert_model
/// .forward_t(Some(input_tensor),
/// Some(mask),
@ -589,7 +591,7 @@ impl AlbertForSequenceClassification {
position_ids: Option<Tensor>,
input_embeds: Option<Tensor>,
train: bool,
) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
) -> AlbertSequenceClassificationOutput {
let base_model_output = self
.albert
.forward_t(
@ -605,11 +607,11 @@ impl AlbertForSequenceClassification {
.pooled_output
.apply_t(&self.dropout, train)
.apply(&self.classifier);
(
AlbertSequenceClassificationOutput {
logits,
base_model_output.all_hidden_states,
base_model_output.all_attentions,
)
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
@ -712,7 +714,7 @@ impl AlbertForTokenClassification {
/// let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
/// let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
///
/// let (output, all_hidden_states, all_attentions) = no_grad(|| {
/// let model_output = no_grad(|| {
/// albert_model
/// .forward_t(Some(input_tensor),
/// Some(mask),
@ -730,7 +732,7 @@ impl AlbertForTokenClassification {
position_ids: Option<Tensor>,
input_embeds: Option<Tensor>,
train: bool,
) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
) -> AlbertTokenClassificationOutput {
let base_model_output = self
.albert
.forward_t(
@ -746,11 +748,11 @@ impl AlbertForTokenClassification {
.hidden_state
.apply_t(&self.dropout, train)
.apply(&self.classifier);
(
AlbertTokenClassificationOutput {
logits,
base_model_output.all_hidden_states,
base_model_output.all_attentions,
)
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
@ -843,7 +845,7 @@ impl AlbertForQuestionAnswering {
/// let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
/// let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
///
/// let (start_logits, end_logits, all_hidden_states, all_attentions) = no_grad(|| {
/// let model_output = no_grad(|| {
/// albert_model
/// .forward_t(Some(input_tensor),
/// Some(mask),
@ -861,12 +863,7 @@ impl AlbertForQuestionAnswering {
position_ids: Option<Tensor>,
input_embeds: Option<Tensor>,
train: bool,
) -> (
Tensor,
Tensor,
Option<Vec<Tensor>>,
Option<Vec<Vec<Tensor>>>,
) {
) -> AlbertQuestionAnsweringOutput {
let base_model_output = self
.albert
.forward_t(
@ -886,12 +883,12 @@ impl AlbertForQuestionAnswering {
let start_logits = start_logits.squeeze1(-1);
let end_logits = end_logits.squeeze1(-1);
(
AlbertQuestionAnsweringOutput {
start_logits,
end_logits,
base_model_output.all_hidden_states,
base_model_output.all_attentions,
)
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
}
}
}
@ -990,7 +987,7 @@ impl AlbertForMultipleChoice {
/// let token_type_ids = Tensor::zeros(&[batch_size, sequence_length], (Int64, device));
/// let position_ids = Tensor::arange(sequence_length, (Int64, device)).expand(&[batch_size, sequence_length], true);
///
/// let (output, all_hidden_states, all_attentions) = no_grad(|| {
/// let model_output = no_grad(|| {
/// albert_model
/// .forward_t(Some(input_tensor),
/// Some(mask),
@ -1008,7 +1005,7 @@ impl AlbertForMultipleChoice {
position_ids: Option<Tensor>,
input_embeds: Option<Tensor>,
train: bool,
) -> Result<(Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>), &'static str> {
) -> Result<AlbertSequenceClassificationOutput, &'static str> {
let (input_ids, input_embeds, num_choices) = match &input_ids {
Some(input_value) => match &input_embeds {
Some(_) => {
@ -1062,10 +1059,35 @@ impl AlbertForMultipleChoice {
.apply(&self.classifier)
.view((-1, num_choices));
Ok((
Ok(AlbertSequenceClassificationOutput {
logits,
base_model_output.all_hidden_states,
base_model_output.all_attentions,
))
all_hidden_states: base_model_output.all_hidden_states,
all_attentions: base_model_output.all_attentions,
})
}
}
pub struct AlbertMaskedLMOutput {
pub prediction_scores: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertSequenceClassificationOutput {
pub logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertTokenClassificationOutput {
pub logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}
pub struct AlbertQuestionAnsweringOutput {
pub start_logits: Tensor,
pub end_logits: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}

View File

@ -149,22 +149,17 @@ impl AlbertLayerGroup {
let mut hidden_state = hidden_states.copy();
let mut attention_weights: Option<Tensor>;
let mut layers = self.layers.iter();
loop {
match layers.next() {
Some(layer) => {
if let Some(hidden_states) = all_hidden_states.borrow_mut() {
hidden_states.push(hidden_state.as_ref().copy());
};
let temp = layer.forward_t(&hidden_state, &mask, train);
hidden_state = temp.0;
attention_weights = temp.1;
if let Some(attentions) = all_attentions.borrow_mut() {
attentions.push(attention_weights.as_ref().unwrap().copy());
};
}
None => break,
for layer in &self.layers {
if let Some(hidden_states) = all_hidden_states.borrow_mut() {
hidden_states.push(hidden_state.as_ref().copy());
};
let temp = layer.forward_t(&hidden_state, &mask, train);
hidden_state = temp.0;
attention_weights = temp.1;
if let Some(attentions) = all_attentions.borrow_mut() {
attentions.push(attention_weights.as_ref().unwrap().copy());
};
}
@ -226,7 +221,7 @@ impl AlbertTransformer {
hidden_states: &Tensor,
mask: Option<Tensor>,
train: bool,
) -> (Tensor, Option<Vec<Tensor>>, Option<Vec<Vec<Tensor>>>) {
) -> AlbertTransformerOutput {
let mut hidden_state = hidden_states.apply(&self.embedding_hidden_mapping_in);
let mut all_hidden_states: Option<Vec<Tensor>> = if self.output_hidden_states {
@ -256,6 +251,16 @@ impl AlbertTransformer {
};
}
(hidden_state, all_hidden_states, all_attentions)
AlbertTransformerOutput {
hidden_state,
all_hidden_states,
all_attentions,
}
}
}
pub struct AlbertTransformerOutput {
pub hidden_state: Tensor,
pub all_hidden_states: Option<Vec<Tensor>>,
pub all_attentions: Option<Vec<Vec<Tensor>>>,
}

View File

@ -370,7 +370,7 @@ impl QuestionAnsweringOption {
}
Self::Albert(ref model) => {
let outputs = model.forward_t(input_ids, mask, None, None, input_embeds, train);
(outputs.0, outputs.1)
(outputs.start_logits, outputs.end_logits)
}
}
}

View File

@ -343,7 +343,7 @@ impl SequenceClassificationOption {
input_embeds,
train,
)
.0
.logits
}
}
}

View File

@ -450,7 +450,7 @@ impl TokenClassificationOption {
input_embeds,
train,
)
.0
.logits
}
}
}

View File

@ -374,7 +374,7 @@ impl ZeroShotClassificationOption {
input_embeds,
train,
)
.0
.logits
}
}
}

View File

@ -61,18 +61,26 @@ fn albert_masked_lm() -> anyhow::Result<()> {
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let (output, _, _) =
let model_output =
no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));
// Print masked tokens
let index_1 = output.get(0).get(4).argmax(0, false);
let index_2 = output.get(1).get(6).argmax(0, false);
let index_1 = model_output
.prediction_scores
.get(0)
.get(4)
.argmax(0, false);
let index_2 = model_output
.prediction_scores
.get(1)
.get(6)
.argmax(0, false);
let word_1 = tokenizer.vocab().id_to_token(&index_1.int64_value(&[]));
let word_2 = tokenizer.vocab().id_to_token(&index_2.int64_value(&[]));
assert_eq!("▁them", word_1); // Outputs "_them" : "Looks like one [them] is missing (? this is identical with the original implementation)"
assert_eq!("▁grapes", word_2); // Outputs "grapes" : "It\'s like comparing [grapes] to apples"
assert!((output.double_value(&[0, 0, 0]) - 4.6143).abs() < 1e-4);
assert!((model_output.prediction_scores.double_value(&[0, 0, 0]) - 4.6143).abs() < 1e-4);
Ok(())
}
@ -127,17 +135,17 @@ fn albert_for_sequence_classification() -> anyhow::Result<()> {
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let (output, all_hidden_states, all_attentions) =
let model_output =
no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));
assert_eq!(output.size(), &[2, 3]);
assert_eq!(model_output.logits.size(), &[2, 3]);
assert_eq!(
config.num_hidden_layers as usize,
all_hidden_states.unwrap().len()
model_output.all_hidden_states.unwrap().len()
);
assert_eq!(
config.num_hidden_layers as usize,
all_attentions.unwrap().len()
model_output.all_attentions.unwrap().len()
);
Ok(())
@ -191,20 +199,20 @@ fn albert_for_multiple_choice() -> anyhow::Result<()> {
.unsqueeze(0);
// Forward pass
let (output, all_hidden_states, all_attentions) = no_grad(|| {
let model_output = no_grad(|| {
albert_model
.forward_t(Some(input_tensor), None, None, None, None, false)
.unwrap()
});
assert_eq!(output.size(), &[1, 2]);
assert_eq!(model_output.logits.size(), &[1, 2]);
assert_eq!(
config.num_hidden_layers as usize,
all_hidden_states.unwrap().len()
model_output.all_hidden_states.unwrap().len()
);
assert_eq!(
config.num_hidden_layers as usize,
all_attentions.unwrap().len()
model_output.all_attentions.unwrap().len()
);
Ok(())
@ -262,17 +270,17 @@ fn albert_for_token_classification() -> anyhow::Result<()> {
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let (output, all_hidden_states, all_attentions) =
let model_output =
no_grad(|| bert_model.forward_t(Some(input_tensor), None, None, None, None, false));
assert_eq!(output.size(), &[2, 12, 4]);
assert_eq!(model_output.logits.size(), &[2, 12, 4]);
assert_eq!(
config.num_hidden_layers as usize,
all_hidden_states.unwrap().len()
model_output.all_hidden_states.unwrap().len()
);
assert_eq!(
config.num_hidden_layers as usize,
all_attentions.unwrap().len()
model_output.all_attentions.unwrap().len()
);
Ok(())
@ -324,18 +332,18 @@ fn albert_for_question_answering() -> anyhow::Result<()> {
let input_tensor = Tensor::stack(tokenized_input.as_slice(), 0).to(device);
// Forward pass
let (start_scores, end_scores, all_hidden_states, all_attentions) =
let model_output =
no_grad(|| albert_model.forward_t(Some(input_tensor), None, None, None, None, false));
assert_eq!(start_scores.size(), &[2, 12]);
assert_eq!(end_scores.size(), &[2, 12]);
assert_eq!(model_output.start_logits.size(), &[2, 12]);
assert_eq!(model_output.end_logits.size(), &[2, 12]);
assert_eq!(
config.num_hidden_layers as usize,
all_hidden_states.unwrap().len()
model_output.all_hidden_states.unwrap().len()
);
assert_eq!(
config.num_hidden_layers as usize,
all_attentions.unwrap().len()
model_output.all_attentions.unwrap().len()
);
Ok(())