Merge pull request #58 from jerry73204/generic-path

Generic path argument
This commit is contained in:
guillaume-be 2020-06-30 19:20:06 +02:00 committed by GitHub
commit a067faf574
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 486 additions and 191 deletions

View File

@ -17,7 +17,7 @@ use crate::common::activations::{_gelu, _gelu_new, _mish, _relu, _tanh};
use crate::common::dropout::Dropout;
use crate::Config;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::{borrow::Borrow, collections::HashMap};
use tch::nn::Module;
use tch::{nn, Kind, Tensor};
@ -140,11 +140,16 @@ impl AlbertModel {
/// let config = AlbertConfig::from_file(config_path);
/// let albert: AlbertModel = AlbertModel::new(&(&p.root() / "albert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertModel {
let embeddings = AlbertEmbeddings::new(&(p / "embeddings"), config);
let encoder = AlbertTransformer::new(&(p / "encoder"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embeddings = AlbertEmbeddings::new(p / "embeddings", config);
let encoder = AlbertTransformer::new(p / "encoder", config);
let pooler = nn::linear(
&(p / "pooler"),
p / "pooler",
config.hidden_size,
config.hidden_size,
Default::default(),
@ -288,7 +293,12 @@ pub struct AlbertMLMHead {
}
impl AlbertMLMHead {
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertMLMHead {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertMLMHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let layer_norm_eps = match config.layer_norm_eps {
Some(value) => value,
None => 1e-12,
@ -298,18 +308,18 @@ impl AlbertMLMHead {
..Default::default()
};
let layer_norm = nn::layer_norm(
&(p / "LayerNorm"),
p / "LayerNorm",
vec![config.embedding_size],
layer_norm_config,
);
let dense = nn::linear(
&(p / "dense"),
p / "dense",
config.hidden_size,
config.embedding_size,
Default::default(),
);
let decoder = nn::linear(
&(p / "decoder"),
p / "decoder",
config.embedding_size,
config.vocab_size,
Default::default(),
@ -368,9 +378,14 @@ impl AlbertForMaskedLM {
/// let config = AlbertConfig::from_file(config_path);
/// let albert: AlbertForMaskedLM = AlbertForMaskedLM::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMaskedLM {
let albert = AlbertModel::new(&(p / "albert"), config);
let predictions = AlbertMLMHead::new(&(p / "predictions"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let predictions = AlbertMLMHead::new(p / "predictions", config);
AlbertForMaskedLM {
albert,
@ -486,8 +501,13 @@ impl AlbertForSequenceClassification {
/// let albert: AlbertForSequenceClassification =
/// AlbertForSequenceClassification::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForSequenceClassification {
let albert = AlbertModel::new(&(p / "albert"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForSequenceClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let classifier_dropout_prob = match config.classifier_dropout_prob {
Some(value) => value,
None => 0.1,
@ -499,7 +519,7 @@ impl AlbertForSequenceClassification {
.expect("num_labels not provided in configuration")
.len() as i64;
let classifier = nn::linear(
&(p / "classifier"),
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),
@ -621,8 +641,13 @@ impl AlbertForTokenClassification {
/// let albert: AlbertForTokenClassification =
/// AlbertForTokenClassification::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForTokenClassification {
let albert = AlbertModel::new(&(p / "albert"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForTokenClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
@ -630,7 +655,7 @@ impl AlbertForTokenClassification {
.expect("num_labels not provided in configuration")
.len() as i64;
let classifier = nn::linear(
&(p / "classifier"),
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),
@ -750,11 +775,16 @@ impl AlbertForQuestionAnswering {
/// let config = AlbertConfig::from_file(config_path);
/// let albert: AlbertForQuestionAnswering = AlbertForQuestionAnswering::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForQuestionAnswering {
let albert = AlbertModel::new(&(p / "albert"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForQuestionAnswering
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let num_labels = 2;
let qa_outputs = nn::linear(
&(p / "qa_outputs"),
p / "qa_outputs",
config.hidden_size,
num_labels,
Default::default(),
@ -880,12 +910,17 @@ impl AlbertForMultipleChoice {
/// let config = AlbertConfig::from_file(config_path);
/// let albert: AlbertForMultipleChoice = AlbertForMultipleChoice::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMultipleChoice {
let albert = AlbertModel::new(&(p / "albert"), config);
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMultipleChoice
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let albert = AlbertModel::new(p / "albert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = 1;
let classifier = nn::linear(
&(p / "classifier"),
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),

View File

@ -13,6 +13,7 @@
use crate::albert::AlbertConfig;
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::nn::{embedding, EmbeddingConfig};
use tch::{nn, Kind, Tensor};
@ -28,7 +29,12 @@ pub struct AlbertEmbeddings {
}
impl AlbertEmbeddings {
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertEmbeddings {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertEmbeddings
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embedding_config = EmbeddingConfig {
padding_idx: config.pad_token_id,
..Default::default()

View File

@ -15,7 +15,7 @@ use crate::albert::albert::Activation;
use crate::albert::attention::AlbertSelfAttention;
use crate::albert::AlbertConfig;
use crate::common::activations::{_gelu, _gelu_new, _mish, _relu};
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use tch::{nn, Tensor};
pub struct AlbertLayer {
@ -27,7 +27,12 @@ pub struct AlbertLayer {
}
impl AlbertLayer {
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertLayer {
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertLayer
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let attention = AlbertSelfAttention::new(p / "attention", &config);
let layer_norm_eps = match config.layer_norm_eps {
@ -39,19 +44,19 @@ impl AlbertLayer {
..Default::default()
};
let full_layer_layer_norm = nn::layer_norm(
&(p / "full_layer_layer_norm"),
p / "full_layer_layer_norm",
vec![config.hidden_size],
layer_norm_config,
);
let ffn = nn::linear(
&(p / "ffn"),
p / "ffn",
config.hidden_size,
config.intermediate_size,
Default::default(),
);
let ffn_output = nn::linear(
&(p / "ffn_output"),
p / "ffn_output",
config.intermediate_size,
config.hidden_size,
Default::default(),
@ -97,8 +102,11 @@ pub struct AlbertLayerGroup {
}
impl AlbertLayerGroup {
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertLayerGroup {
let p = &(p / "albert_layers");
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertLayerGroup
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "albert_layers";
let output_attentions = match config.output_attentions {
Some(value) => value,
@ -112,7 +120,7 @@ impl AlbertLayerGroup {
let mut layers: Vec<AlbertLayer> = vec![];
for layer_index in 0..config.inner_group_num {
layers.push(AlbertLayer::new(&(p / layer_index), config));
layers.push(AlbertLayer::new(&p / layer_index, config));
}
AlbertLayerGroup {
@ -174,8 +182,12 @@ pub struct AlbertTransformer {
}
impl AlbertTransformer {
pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertTransformer {
let p_layers = &(p / "albert_layer_groups");
pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertTransformer
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let p_layers = p / "albert_layer_groups";
let output_attentions = match config.output_attentions {
Some(value) => value,
@ -188,7 +200,7 @@ impl AlbertTransformer {
};
let embedding_hidden_mapping_in = nn::linear(
&(p / "embedding_hidden_mapping_in"),
p / "embedding_hidden_mapping_in",
config.embedding_size,
config.hidden_size,
Default::default(),
@ -196,7 +208,7 @@ impl AlbertTransformer {
let mut layers: Vec<AlbertLayerGroup> = vec![];
for layer_index in 0..config.inner_group_num {
layers.push(AlbertLayerGroup::new(&(p_layers / layer_index), config));
layers.push(AlbertLayerGroup::new(&p_layers / layer_index, config));
}
AlbertTransformer {

View File

@ -18,7 +18,7 @@ use crate::common::dropout::Dropout;
use crate::pipelines::generation::{Cache, LMHeadModel};
use crate::Config;
use serde::{Deserialize, Serialize};
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use std::collections::HashMap;
use tch::kind::Kind::{Float, Int64};
use tch::nn::{embedding, EmbeddingConfig};
@ -248,7 +248,12 @@ impl BartModel {
/// let generation_mode = true;
/// let bart: BartModel = BartModel::new(&(&p.root() / "bart"), &config, generation_mode);
/// ```
pub fn new(p: &nn::Path, config: &BartConfig, generation_mode: bool) -> BartModel {
pub fn new<'p, P>(p: P, config: &BartConfig, generation_mode: bool) -> BartModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let pad_token_id = match config.pad_token_id {
Some(value) => value,
None => 1,
@ -454,7 +459,7 @@ impl BartForConditionalGeneration {
config: &BartConfig,
generation_mode: bool,
) -> BartForConditionalGeneration {
let base_model = BartModel::new(&(p / "model"), config, generation_mode);
let base_model = BartModel::new(p / "model", config, generation_mode);
BartForConditionalGeneration { base_model }
}
@ -578,16 +583,21 @@ pub struct BartClassificationHead {
}
impl BartClassificationHead {
pub fn new(p: &nn::Path, config: &BartConfig) -> BartClassificationHead {
pub fn new<'p, P>(p: P, config: &BartConfig) -> BartClassificationHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let dense = nn::linear(
&(p / "dense"),
p / "dense",
config.d_model,
config.d_model,
Default::default(),
);
let dropout = Dropout::new(config.classif_dropout);
let out_proj = nn::linear(
&(p / "out_proj"),
p / "out_proj",
config.d_model,
config.num_labels.unwrap(),
Default::default(),
@ -645,9 +655,14 @@ impl BartForSequenceClassification {
/// let bart: BartForSequenceClassification =
/// BartForSequenceClassification::new(&(&p.root() / "bart"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BartConfig) -> BartForSequenceClassification {
let base_model = BartModel::new(&(p / "model"), config, false);
let classification_head = BartClassificationHead::new(&(p / "classification_head"), config);
pub fn new<'p, P>(p: P, config: &BartConfig) -> BartForSequenceClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let base_model = BartModel::new(p / "model", config, false);
let classification_head = BartClassificationHead::new(p / "classification_head", config);
let eos_token_id = match config.eos_token_id {
Some(value) => value,
None => 3,

View File

@ -14,6 +14,7 @@
use crate::bert::bert::{Activation, BertConfig};
use crate::common::activations::{_gelu, _mish, _relu};
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
use tch::{nn, Tensor};
@ -141,7 +142,12 @@ pub struct BertSelfOutput {
}
impl BertSelfOutput {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertSelfOutput {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertSelfOutput
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let linear = nn::linear(
p / "dense",
config.hidden_size,
@ -179,9 +185,14 @@ pub struct BertAttention {
}
impl BertAttention {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertAttention {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertAttention
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let _self = BertSelfAttention::new(p / "self", config);
let output = BertSelfOutput::new(&(p / "output"), config);
let output = BertSelfOutput::new(p / "output", config);
BertAttention { _self, output }
}
@ -212,7 +223,12 @@ pub struct BertIntermediate {
}
impl BertIntermediate {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertIntermediate {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertIntermediate
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let lin = nn::linear(
p / "dense",
config.hidden_size,
@ -239,7 +255,12 @@ pub struct BertOutput {
}
impl BertOutput {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertOutput {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertOutput
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let lin = nn::linear(
p / "dense",
config.intermediate_size,

View File

@ -18,6 +18,7 @@ use crate::common::dropout::Dropout;
use crate::common::linear::{linear_no_bias, LinearNoBias};
use crate::Config;
use serde::{Deserialize, Serialize};
use std::borrow::Borrow;
use std::collections::HashMap;
use tch::kind::Kind::Float;
use tch::nn::Init;
@ -146,14 +147,19 @@ impl<T: BertEmbedding> BertModel<T> {
/// let config = BertConfig::from_file(config_path);
/// let bert: BertModel<BertEmbeddings> = BertModel::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertModel<T> {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertModel<T>
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let is_decoder = match config.is_decoder {
Some(value) => value,
None => false,
};
let embeddings = T::new(&(p / "embeddings"), config);
let encoder = BertEncoder::new(&(p / "encoder"), config);
let pooler = BertPooler::new(&(p / "pooler"), config);
let embeddings = T::new(p / "embeddings", config);
let encoder = BertEncoder::new(p / "encoder", config);
let pooler = BertPooler::new(p / "pooler", config);
BertModel {
embeddings,
@ -337,7 +343,12 @@ pub struct BertPredictionHeadTransform {
}
impl BertPredictionHeadTransform {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertPredictionHeadTransform {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertPredictionHeadTransform
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let dense = nn::linear(
p / "dense",
config.hidden_size,
@ -375,11 +386,14 @@ pub struct BertLMPredictionHead {
}
impl BertLMPredictionHead {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertLMPredictionHead {
let p = &(p / "predictions");
let transform = BertPredictionHeadTransform::new(&(p / "transform"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertLMPredictionHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "predictions";
let transform = BertPredictionHeadTransform::new(&p / "transform", config);
let decoder = linear_no_bias(
&(p / "decoder"),
&p / "decoder",
config.hidden_size,
config.vocab_size,
Default::default(),
@ -430,9 +444,14 @@ impl BertForMaskedLM {
/// let config = BertConfig::from_file(config_path);
/// let bert = BertForMaskedLM::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertForMaskedLM {
let bert = BertModel::new(&(p / "bert"), config);
let cls = BertLMPredictionHead::new(&(p / "cls"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bert = BertModel::new(p / "bert", config);
let cls = BertLMPredictionHead::new(p / "cls", config);
BertForMaskedLM { bert, cls }
}
@ -552,8 +571,13 @@ impl BertForSequenceClassification {
/// let config = BertConfig::from_file(config_path);
/// let bert = BertForSequenceClassification::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertForSequenceClassification {
let bert = BertModel::new(&(p / "bert"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForSequenceClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bert = BertModel::new(p / "bert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
@ -687,8 +711,13 @@ impl BertForMultipleChoice {
/// let config = BertConfig::from_file(config_path);
/// let bert = BertForMultipleChoice::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertForMultipleChoice {
let bert = BertModel::new(&(p / "bert"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForMultipleChoice
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bert = BertModel::new(p / "bert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let classifier = nn::linear(p / "classifier", config.hidden_size, 1, Default::default());
@ -825,8 +854,13 @@ impl BertForTokenClassification {
/// let config = BertConfig::from_file(config_path);
/// let bert = BertForTokenClassification::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertForTokenClassification {
let bert = BertModel::new(&(p / "bert"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForTokenClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bert = BertModel::new(p / "bert", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
@ -959,8 +993,13 @@ impl BertForQuestionAnswering {
/// let config = BertConfig::from_file(config_path);
/// let bert = BertForQuestionAnswering::new(&(&p.root() / "bert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> BertForQuestionAnswering {
let bert = BertModel::new(&(p / "bert"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForQuestionAnswering
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bert = BertModel::new(p / "bert", config);
let num_labels = 2;
let qa_outputs = nn::linear(
p / "qa_outputs",

View File

@ -13,13 +13,16 @@
use crate::bert::bert::BertConfig;
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::nn::{embedding, EmbeddingConfig};
use tch::{nn, Kind, Tensor};
/// # BertEmbedding trait (for use in BertModel or RoBERTaModel)
/// Defines an interface for the embedding layers in BERT-based models
pub trait BertEmbedding {
fn new(p: &nn::Path, config: &BertConfig) -> Self;
fn new<'p, P>(p: P, config: &BertConfig) -> Self
where
P: Borrow<nn::Path<'p>>;
fn forward_t(
&self,
@ -64,7 +67,12 @@ impl BertEmbedding for BertEmbeddings {
/// let config = BertConfig::from_file(config_path);
/// let bert_embeddings = BertEmbeddings::new(&(&p.root() / "bert_embeddings"), &config);
/// ```
fn new(p: &nn::Path, config: &BertConfig) -> BertEmbeddings {
fn new<'p, P>(p: P, config: &BertConfig) -> BertEmbeddings
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embedding_config = EmbeddingConfig {
padding_idx: 0,
..Default::default()

View File

@ -13,7 +13,7 @@
use crate::bert::attention::{BertAttention, BertIntermediate, BertOutput};
use crate::bert::bert::BertConfig;
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use tch::{nn, Tensor};
pub struct BertLayer {
@ -25,14 +25,19 @@ pub struct BertLayer {
}
impl BertLayer {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertLayer {
let attention = BertAttention::new(&(p / "attention"), &config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertLayer
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let attention = BertAttention::new(p / "attention", &config);
let (is_decoder, cross_attention) = match config.is_decoder {
Some(value) => {
if value == true {
(
value,
Some(BertAttention::new(&(p / "cross_attention"), &config)),
Some(BertAttention::new(p / "cross_attention", &config)),
)
} else {
(value, None)
@ -41,8 +46,8 @@ impl BertLayer {
None => (false, None),
};
let intermediate = BertIntermediate::new(&(p / "intermediate"), &config);
let output = BertOutput::new(&(p / "output"), &config);
let intermediate = BertIntermediate::new(p / "intermediate", &config);
let output = BertOutput::new(p / "output", &config);
BertLayer {
attention,
@ -96,8 +101,11 @@ pub struct BertEncoder {
}
impl BertEncoder {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertEncoder {
let p = &(p / "layer");
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertEncoder
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "layer";
let output_attentions = if let Some(value) = config.output_attentions {
value
} else {
@ -111,7 +119,7 @@ impl BertEncoder {
let mut layers: Vec<BertLayer> = vec![];
for layer_index in 0..config.num_hidden_layers {
layers.push(BertLayer::new(&(p / layer_index), config));
layers.push(BertLayer::new(&p / layer_index, config));
}
BertEncoder {
@ -176,9 +184,14 @@ pub struct BertPooler {
}
impl BertPooler {
pub fn new(p: &nn::Path, config: &BertConfig) -> BertPooler {
pub fn new<'p, P>(p: P, config: &BertConfig) -> BertPooler
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let lin = nn::linear(
&(p / "dense"),
p / "dense",
config.hidden_size,
config.hidden_size,
Default::default(),

View File

@ -18,7 +18,7 @@ use crate::distilbert::embeddings::DistilBertEmbedding;
use crate::distilbert::transformer::Transformer;
use crate::Config;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::{borrow::Borrow, collections::HashMap};
/// # DistilBERT Pretrained model weight files
pub struct DistilBertModelResources;
@ -156,10 +156,13 @@ impl DistilBertModel {
/// let config = DistilBertConfig::from_file(config_path);
/// let distil_bert: DistilBertModel = DistilBertModel::new(&(&p.root() / "distilbert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModel {
let p = &(p / "distilbert");
let embeddings = DistilBertEmbedding::new(&(p / "embeddings"), config);
let transformer = Transformer::new(&(p / "transformer"), config);
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "distilbert";
let embeddings = DistilBertEmbedding::new(&p / "embeddings", config);
let transformer = Transformer::new(&p / "transformer", config);
DistilBertModel {
embeddings,
transformer,
@ -268,8 +271,13 @@ impl DistilBertModelClassifier {
/// let distil_bert: DistilBertModelClassifier =
/// DistilBertModelClassifier::new(&(&p.root() / "distilbert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModelClassifier {
let distil_bert_model = DistilBertModel::new(&p, config);
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModelClassifier
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let distil_bert_model = DistilBertModel::new(p, config);
let num_labels = config
.id2label
@ -278,17 +286,12 @@ impl DistilBertModelClassifier {
.len() as i64;
let pre_classifier = nn::linear(
&(p / "pre_classifier"),
p / "pre_classifier",
config.dim,
config.dim,
Default::default(),
);
let classifier = nn::linear(
&(p / "classifier"),
config.dim,
num_labels,
Default::default(),
);
let classifier = nn::linear(p / "classifier", config.dim, num_labels, Default::default());
let dropout = Dropout::new(config.seq_classif_dropout);
DistilBertModelClassifier {
@ -403,10 +406,15 @@ impl DistilBertModelMaskedLM {
/// let config = DistilBertConfig::from_file(config_path);
/// let distil_bert = DistilBertModelMaskedLM::new(&(&p.root() / "distilbert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModelMaskedLM {
let distil_bert_model = DistilBertModel::new(&p, config);
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModelMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let distil_bert_model = DistilBertModel::new(p, config);
let vocab_transform = nn::linear(
&(p / "vocab_transform"),
p / "vocab_transform",
config.dim,
config.dim,
Default::default(),
@ -418,7 +426,7 @@ impl DistilBertModelMaskedLM {
let vocab_layer_norm =
nn::layer_norm(p / "vocab_layer_norm", vec![config.dim], layer_norm_config);
let vocab_projector = nn::linear(
&(p / "vocab_projector"),
p / "vocab_projector",
config.dim,
config.vocab_size,
Default::default(),
@ -532,9 +540,14 @@ impl DistilBertForQuestionAnswering {
/// let config = DistilBertConfig::from_file(config_path);
/// let distil_bert = DistilBertForQuestionAnswering::new(&(&p.root() / "distilbert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertForQuestionAnswering {
let distil_bert_model = DistilBertModel::new(&p, config);
let qa_outputs = nn::linear(&(p / "qa_outputs"), config.dim, 2, Default::default());
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertForQuestionAnswering
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let distil_bert_model = DistilBertModel::new(p, config);
let qa_outputs = nn::linear(p / "qa_outputs", config.dim, 2, Default::default());
let dropout = Dropout::new(config.qa_dropout);
DistilBertForQuestionAnswering {
@ -645,8 +658,13 @@ impl DistilBertForTokenClassification {
/// let config = DistilBertConfig::from_file(config_path);
/// let distil_bert = DistilBertForTokenClassification::new(&(&p.root() / "distilbert"), &config);
/// ```
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertForTokenClassification {
let distil_bert_model = DistilBertModel::new(&p, config);
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertForTokenClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let distil_bert_model = DistilBertModel::new(p, config);
let num_labels = config
.id2label
@ -654,12 +672,7 @@ impl DistilBertForTokenClassification {
.expect("id2label must be provided for classifiers")
.len() as i64;
let classifier = nn::linear(
&(p / "classifier"),
config.dim,
num_labels,
Default::default(),
);
let classifier = nn::linear(p / "classifier", config.dim, num_labels, Default::default());
let dropout = Dropout::new(config.seq_classif_dropout);
DistilBertForTokenClassification {

View File

@ -12,6 +12,7 @@
use crate::common::dropout::Dropout;
use crate::distilbert::distilbert::DistilBertConfig;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
use tch::nn::{embedding, EmbeddingConfig, ModuleT};
use tch::{nn, Device, Kind, Tensor};
@ -63,7 +64,12 @@ pub struct DistilBertEmbedding {
}
impl DistilBertEmbedding {
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertEmbedding {
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertEmbedding
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embedding_config = EmbeddingConfig {
padding_idx: 0,
..Default::default()

View File

@ -14,7 +14,7 @@ use crate::common::activations::{_gelu, _relu};
use crate::common::dropout::Dropout;
use crate::distilbert::attention::MultiHeadSelfAttention;
use crate::distilbert::distilbert::{Activation, DistilBertConfig};
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use tch::nn::LayerNorm;
use tch::{nn, Tensor};
@ -67,7 +67,12 @@ pub struct TransformerBlock {
}
impl TransformerBlock {
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> TransformerBlock {
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> TransformerBlock
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let attention = MultiHeadSelfAttention::new(p / "attention", &config);
let layer_norm_config = nn::LayerNormConfig {
eps: 1e-12,
@ -109,8 +114,11 @@ pub struct Transformer {
}
impl Transformer {
pub fn new(p: &nn::Path, config: &DistilBertConfig) -> Transformer {
let p = &(p / "layer");
pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> Transformer
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "layer";
let output_attentions = match config.output_attentions {
Some(value) => value,
None => false,
@ -122,7 +130,7 @@ impl Transformer {
let mut layers: Vec<TransformerBlock> = vec![];
for layer_index in 0..config.n_layers {
layers.push(TransformerBlock::new(&(p / layer_index), config));
layers.push(TransformerBlock::new(&p / layer_index, config));
}
Transformer {

View File

@ -19,7 +19,7 @@ use crate::common::dropout::Dropout;
use crate::electra::embeddings::ElectraEmbeddings;
use crate::Config;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::{borrow::Borrow, collections::HashMap};
use tch::{nn, Kind, Tensor};
/// # Electra Pretrained model weight files
@ -132,11 +132,16 @@ impl ElectraModel {
/// let config = ElectraConfig::from_file(config_path);
/// let electra_model: ElectraModel = ElectraModel::new(&(&p.root() / "electra"), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraModel {
let embeddings = ElectraEmbeddings::new(&(p / "embeddings"), config);
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embeddings = ElectraEmbeddings::new(p / "embeddings", config);
let embeddings_project = if config.embedding_size != config.hidden_size {
Some(nn::linear(
&(p / "embeddings_project"),
p / "embeddings_project",
config.embedding_size,
config.hidden_size,
Default::default(),
@ -162,7 +167,7 @@ impl ElectraModel {
id2label: config.id2label.clone(),
label2id: config.label2id.clone(),
};
let encoder = BertEncoder::new(&(p / "encoder"), &bert_config);
let encoder = BertEncoder::new(p / "encoder", &bert_config);
ElectraModel {
embeddings,
embeddings_project,
@ -322,15 +327,20 @@ impl ElectraDiscriminatorHead {
/// let config = ElectraConfig::from_file(config_path);
/// let discriminator_head = ElectraDiscriminatorHead::new(&(&p.root() / "electra"), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraDiscriminatorHead {
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraDiscriminatorHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let dense = nn::linear(
&(p / "dense"),
p / "dense",
config.hidden_size,
config.hidden_size,
Default::default(),
);
let dense_prediction = nn::linear(
&(p / "dense_prediction"),
p / "dense_prediction",
config.hidden_size,
1,
Default::default(),
@ -422,14 +432,19 @@ impl ElectraGeneratorHead {
/// let config = ElectraConfig::from_file(config_path);
/// let generator_head = ElectraGeneratorHead::new(&(&p.root() / "electra"), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraGeneratorHead {
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraGeneratorHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let layer_norm = nn::layer_norm(
p / "LayerNorm",
vec![config.embedding_size],
Default::default(),
);
let dense = nn::linear(
&(p / "dense"),
p / "dense",
config.hidden_size,
config.embedding_size,
Default::default(),
@ -516,11 +531,16 @@ impl ElectraForMaskedLM {
/// let config = ElectraConfig::from_file(config_path);
/// let electra_model: ElectraForMaskedLM = ElectraForMaskedLM::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraForMaskedLM {
let electra = ElectraModel::new(&(p / "electra"), config);
let generator_head = ElectraGeneratorHead::new(&(p / "generator_predictions"), config);
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraForMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let electra = ElectraModel::new(p / "electra", config);
let generator_head = ElectraGeneratorHead::new(p / "generator_predictions", config);
let lm_head = nn::linear(
&(p / "generator_lm_head"),
p / "generator_lm_head",
config.embedding_size,
config.vocab_size,
Default::default(),
@ -640,10 +660,15 @@ impl ElectraDiscriminator {
/// let config = ElectraConfig::from_file(config_path);
/// let electra_model: ElectraDiscriminator = ElectraDiscriminator::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraDiscriminator {
let electra = ElectraModel::new(&(p / "electra"), config);
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraDiscriminator
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let electra = ElectraModel::new(p / "electra", config);
let discriminator_head =
ElectraDiscriminatorHead::new(&(p / "discriminator_predictions"), config);
ElectraDiscriminatorHead::new(p / "discriminator_predictions", config);
ElectraDiscriminator {
electra,
@ -757,8 +782,13 @@ impl ElectraForTokenClassification {
/// let electra_model: ElectraForTokenClassification =
/// ElectraForTokenClassification::new(&p.root(), &config);
/// ```
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraForTokenClassification {
let electra = ElectraModel::new(&(p / "electra"), config);
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraForTokenClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let electra = ElectraModel::new(p / "electra", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
@ -766,7 +796,7 @@ impl ElectraForTokenClassification {
.expect("id2label must be provided for classifiers")
.len() as i64;
let classifier = nn::linear(
&(p / "classifier"),
p / "classifier",
config.hidden_size,
num_labels,
Default::default(),

View File

@ -14,6 +14,7 @@
use crate::common::dropout::Dropout;
use crate::electra::electra::ElectraConfig;
use std::borrow::Borrow;
use tch::nn::{embedding, EmbeddingConfig};
use tch::{nn, Kind, Tensor};
@ -28,7 +29,12 @@ pub struct ElectraEmbeddings {
}
impl ElectraEmbeddings {
pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraEmbeddings {
pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraEmbeddings
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embedding_config = EmbeddingConfig {
padding_idx: config.pad_token_id,
..Default::default()

View File

@ -14,6 +14,7 @@
use crate::common::dropout::Dropout;
use crate::gpt2::gpt2::Gpt2Config;
use std::borrow::Borrow;
use tch::kind::Kind::Float;
use tch::nn::{Init, Module};
use tch::{nn, Tensor};
@ -25,7 +26,12 @@ pub struct GPTConv1D {
}
impl GPTConv1D {
pub fn new(p: &nn::Path, nf: i64, nx: i64) -> GPTConv1D {
pub fn new<'p, P>(p: P, nf: i64, nx: i64) -> GPTConv1D
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let weight = p.var(
"weight",
&[nx, nf],
@ -59,12 +65,17 @@ pub struct Attention {
}
impl Attention {
pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Attention {
pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Attention
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let bias = Tensor::ones(&[config.n_ctx, config.n_ctx], (Float, p.device()))
.tril(0)
.view((1, 1, config.n_ctx, config.n_ctx));
let c_attn = GPTConv1D::new(&(p / "c_attn"), config.n_embd * 3, config.n_embd);
let c_proj = GPTConv1D::new(&(p / "c_proj"), config.n_embd, config.n_embd);
let c_attn = GPTConv1D::new(p / "c_attn", config.n_embd * 3, config.n_embd);
let c_proj = GPTConv1D::new(p / "c_proj", config.n_embd, config.n_embd);
let attn_pdrop = match config.attn_pdrop {
Some(value) => value,

View File

@ -18,7 +18,7 @@ use crate::gpt2::transformer::Block;
use crate::pipelines::generation::{Cache, LMHeadModel};
use crate::Config;
use serde::{Deserialize, Serialize};
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use tch::kind::Kind::Int64;
use tch::nn::embedding;
use tch::{nn, Tensor};
@ -247,16 +247,20 @@ impl Gpt2Model {
/// let config = Gpt2Config::from_file(config_path);
/// let gpt2: Gpt2Model = Gpt2Model::new(&(&p.root() / "gpt2"), &config);
/// ```
pub fn new(p: &nn::Path, config: &Gpt2Config) -> Gpt2Model {
let p = &(p / "transformer");
pub fn new<'p, P>(p: P, config: &Gpt2Config) -> Gpt2Model
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow() / "transformer";
let wte = embedding(
&(p / "wte"),
&p / "wte",
config.vocab_size,
config.n_embd,
Default::default(),
);
let wpe = embedding(
&(p / "wpe"),
&p / "wpe",
config.n_positions,
config.n_embd,
Default::default(),
@ -271,11 +275,11 @@ impl Gpt2Model {
eps: config.layer_norm_epsilon,
..Default::default()
};
let ln_f = nn::layer_norm(p / "ln_f", vec![config.n_embd], layer_norm_config);
let ln_f = nn::layer_norm(&p / "ln_f", vec![config.n_embd], layer_norm_config);
let mut h: Vec<Block> = vec![];
let h_path = &(p / "h");
let h_path = &p / "h";
for layer_index in 0..config.n_layer {
h.push(Block::new(&(h_path / layer_index), config, true));
h.push(Block::new(&h_path / layer_index, config, true));
}
let output_attentions = match config.output_attentions {
Some(value) => value,
@ -531,10 +535,15 @@ impl GPT2LMHeadModel {
/// let config = Gpt2Config::from_file(config_path);
/// let gpt2: GPT2LMHeadModel = GPT2LMHeadModel::new(&(&p.root() / "gpt2"), &config);
/// ```
pub fn new(p: &nn::Path, config: &Gpt2Config) -> GPT2LMHeadModel {
let transformer = Gpt2Model::new(&p, config);
pub fn new<'p, P>(p: P, config: &Gpt2Config) -> GPT2LMHeadModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let transformer = Gpt2Model::new(p, config);
let lm_head = linear_no_bias(
&(p / "lm_head"),
p / "lm_head",
config.n_embd,
config.vocab_size,
Default::default(),

View File

@ -16,6 +16,7 @@ use crate::common::activations::{_gelu_new, _relu, _swish};
use crate::common::dropout::Dropout;
use crate::gpt2::attention::{Attention, GPTConv1D};
use crate::gpt2::gpt2::{Gpt2Config, GptActivation};
use std::borrow::Borrow;
use tch::{nn, Tensor};
pub struct MLP {
@ -26,9 +27,14 @@ pub struct MLP {
}
impl MLP {
pub fn new(p: &nn::Path, config: &Gpt2Config) -> MLP {
let c_fc = GPTConv1D::new(&(p / "c_fc"), config.n_embd * 4, config.n_embd);
let c_proj = GPTConv1D::new(&(p / "c_proj"), config.n_embd, config.n_embd * 4);
pub fn new<'p, P>(p: P, config: &Gpt2Config) -> MLP
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let c_fc = GPTConv1D::new(p / "c_fc", config.n_embd * 4, config.n_embd);
let c_proj = GPTConv1D::new(p / "c_proj", config.n_embd, config.n_embd * 4);
let activation = Box::new(match &config.afn {
Some(activation_enum) => match activation_enum {
GptActivation::gelu => _gelu_new,
@ -64,15 +70,20 @@ pub struct Block {
}
impl Block {
pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Block {
pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Block
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let layer_norm_config = nn::LayerNormConfig {
eps: config.layer_norm_epsilon,
..Default::default()
};
let ln_1 = nn::layer_norm(p / "ln_1", vec![config.n_embd], layer_norm_config);
let ln_2 = nn::layer_norm(p / "ln_2", vec![config.n_embd], layer_norm_config);
let attn = Attention::new(&(p / "attn"), config, scale);
let mlp = MLP::new(&(p / "mlp"), config);
let attn = Attention::new(p / "attn", config, scale);
let mlp = MLP::new(p / "mlp", config);
Block {
ln_1,

View File

@ -264,7 +264,7 @@ impl MarianForConditionalGeneration {
config: &BartConfig,
generation_mode: bool,
) -> MarianForConditionalGeneration {
let base_model = BartModel::new(&(p / "model"), config, generation_mode);
let base_model = BartModel::new(p / "model", config, generation_mode);
let final_logits_bias = p.var(
"final_logits_bias",
&[1, config.vocab_size],

View File

@ -17,7 +17,7 @@ use crate::common::linear::{linear_no_bias, LinearNoBias};
use crate::gpt2::Gpt2Config;
use crate::openai_gpt::transformer::Block;
use crate::pipelines::generation::{Cache, LMHeadModel};
use std::borrow::BorrowMut;
use std::borrow::{Borrow, BorrowMut};
use tch::kind::Kind::Int64;
use tch::nn::embedding;
use tch::{nn, Tensor};
@ -106,15 +106,20 @@ impl OpenAiGptModel {
/// let config = Gpt2Config::from_file(config_path);
/// let gpt2: OpenAiGptModel = OpenAiGptModel::new(&(&p.root() / "gpt"), &config);
/// ```
pub fn new(p: &nn::Path, config: &Gpt2Config) -> OpenAiGptModel {
pub fn new<'p, P>(p: P, config: &Gpt2Config) -> OpenAiGptModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let tokens_embed = embedding(
&(p / "tokens_embed"),
p / "tokens_embed",
config.vocab_size,
config.n_embd,
Default::default(),
);
let positions_embed = embedding(
&(p / "positions_embed"),
p / "positions_embed",
config.n_positions,
config.n_embd,
Default::default(),
@ -126,9 +131,9 @@ impl OpenAiGptModel {
};
let drop = Dropout::new(embd_pdrop);
let mut h: Vec<Block> = vec![];
let h_path = &(p / "h");
let h_path = p / "h";
for layer_index in 0..config.n_layer {
h.push(Block::new(&(h_path / layer_index), config, true));
h.push(Block::new(&h_path / layer_index, config, true));
}
let output_attentions = match config.output_attentions {
Some(value) => value,
@ -317,10 +322,15 @@ impl OpenAIGPTLMHeadModel {
/// let config = Gpt2Config::from_file(config_path);
/// let gpt2: OpenAIGPTLMHeadModel = OpenAIGPTLMHeadModel::new(&(&p.root() / "gpt"), &config);
/// ```
pub fn new(p: &nn::Path, config: &Gpt2Config) -> OpenAIGPTLMHeadModel {
let transformer = OpenAiGptModel::new(&p, config);
pub fn new<'p, P>(p: P, config: &Gpt2Config) -> OpenAIGPTLMHeadModel
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let transformer = OpenAiGptModel::new(p, config);
let lm_head = linear_no_bias(
&(p / "lm_head"),
p / "lm_head",
config.n_embd,
config.vocab_size,
Default::default(),

View File

@ -15,6 +15,7 @@
use crate::gpt2::attention::Attention;
use crate::gpt2::transformer::MLP;
use crate::gpt2::Gpt2Config;
use std::borrow::Borrow;
use tch::{nn, Tensor};
pub struct Block {
@ -25,15 +26,20 @@ pub struct Block {
}
impl Block {
pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Block {
pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Block
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let layer_norm_config = nn::LayerNormConfig {
eps: config.layer_norm_epsilon,
..Default::default()
};
let ln_1 = nn::layer_norm(p / "ln_1", vec![config.n_embd], layer_norm_config);
let ln_2 = nn::layer_norm(p / "ln_2", vec![config.n_embd], layer_norm_config);
let attn = Attention::new(&(p / "attn"), config, scale);
let mlp = MLP::new(&(p / "mlp"), config);
let attn = Attention::new(p / "attn", config, scale);
let mlp = MLP::new(p / "mlp", config);
Block {
ln_1,

View File

@ -1661,7 +1661,7 @@ pub(crate) mod private_generation_utils {
assert!(
eos_token_ids.is_some() & pad_token_id.is_some(),
"EOS and Padding tokens need to be defined if the number of generated \
beams is greater than the target number fo beams"
beams is greater than the target number fo beams"
);
next_batch_beam.append(
&mut (0..num_beams)

View File

@ -13,6 +13,7 @@
use crate::bert::{BertConfig, BertEmbedding};
use crate::common::dropout::Dropout;
use std::borrow::Borrow;
use tch::nn::{embedding, EmbeddingConfig};
use tch::{nn, Kind, Tensor};
@ -69,7 +70,12 @@ impl BertEmbedding for RobertaEmbeddings {
/// let config = BertConfig::from_file(config_path);
/// let robert_embeddings = RobertaEmbeddings::new(&(&p.root() / "bert_embeddings"), &config);
/// ```
fn new(p: &nn::Path, config: &BertConfig) -> RobertaEmbeddings {
fn new<'p, P>(p: P, config: &BertConfig) -> RobertaEmbeddings
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let embedding_config = EmbeddingConfig {
padding_idx: 1,
..Default::default()

View File

@ -16,6 +16,7 @@ use crate::common::activations::_gelu;
use crate::common::dropout::Dropout;
use crate::common::linear::{linear_no_bias, LinearNoBias};
use crate::roberta::embeddings::RobertaEmbeddings;
use std::borrow::Borrow;
use tch::nn::Init;
use tch::{nn, Tensor};
@ -71,7 +72,11 @@ pub struct RobertaLMHead {
}
impl RobertaLMHead {
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaLMHead {
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaLMHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let dense = nn::linear(
p / "dense",
config.hidden_size,
@ -88,7 +93,7 @@ impl RobertaLMHead {
layer_norm_config,
);
let decoder = linear_no_bias(
&(p / "decoder"),
p / "decoder",
config.hidden_size,
config.vocab_size,
Default::default(),
@ -144,9 +149,14 @@ impl RobertaForMaskedLM {
/// let config = BertConfig::from_file(config_path);
/// let roberta = RobertaForMaskedLM::new(&(&p.root() / "roberta"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForMaskedLM {
let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
let lm_head = RobertaLMHead::new(&(p / "lm_head"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForMaskedLM
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
let lm_head = RobertaLMHead::new(p / "lm_head", config);
RobertaForMaskedLM { roberta, lm_head }
}
@ -242,7 +252,11 @@ pub struct RobertaClassificationHead {
}
impl RobertaClassificationHead {
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaClassificationHead {
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaClassificationHead
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let dense = nn::linear(
p / "dense",
config.hidden_size,
@ -313,9 +327,13 @@ impl RobertaForSequenceClassification {
/// let config = BertConfig::from_file(config_path);
/// let roberta = RobertaForSequenceClassification::new(&(&p.root() / "roberta"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForSequenceClassification {
let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
let classifier = RobertaClassificationHead::new(&(p / "classifier"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForSequenceClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
let classifier = RobertaClassificationHead::new(p / "classifier", config);
RobertaForSequenceClassification {
roberta,
@ -437,8 +455,12 @@ impl RobertaForMultipleChoice {
/// let config = BertConfig::from_file(config_path);
/// let roberta = RobertaForMultipleChoice::new(&(&p.root() / "roberta"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForMultipleChoice {
let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForMultipleChoice
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let classifier = nn::linear(p / "classifier", config.hidden_size, 1, Default::default());
@ -578,8 +600,12 @@ impl RobertaForTokenClassification {
/// let config = BertConfig::from_file(config_path);
/// let roberta = RobertaForTokenClassification::new(&(&p.root() / "roberta"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForTokenClassification {
let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForTokenClassification
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
let dropout = Dropout::new(config.hidden_dropout_prob);
let num_labels = config
.id2label
@ -715,8 +741,12 @@ impl RobertaForQuestionAnswering {
/// let config = BertConfig::from_file(config_path);
/// let roberta = RobertaForQuestionAnswering::new(&(&p.root() / "roberta"), &config);
/// ```
pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForQuestionAnswering {
let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForQuestionAnswering
where
P: Borrow<nn::Path<'p>>,
{
let p = p.borrow();
let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
let num_labels = 2;
let qa_outputs = nn::linear(
p / "qa_outputs",