Merge pull request #58 from jerry73204/generic-path

Generic path argument
2024-09-11 12:55:34 +03:00 · 2020-06-30 19:20:06 +02:00 · 2020-06-30 19:20:06 +02:00 · a067faf574
commit a067faf574
parent d076ec6f77 4095545f64
22 changed files with 486 additions and 191 deletions
--- a/src/albert/albert.rs
+++ b/src/albert/albert.rs
@ -17,7 +17,7 @@ use crate::common::activations::{_gelu, _gelu_new, _mish, _relu, _tanh};
 use crate::common::dropout::Dropout;
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::{borrow::Borrow, collections::HashMap};
 use tch::nn::Module;
 use tch::{nn, Kind, Tensor};

@ -140,11 +140,16 @@ impl AlbertModel {
    /// let config = AlbertConfig::from_file(config_path);
    /// let albert: AlbertModel = AlbertModel::new(&(&p.root() / "albert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertModel {
-        let embeddings = AlbertEmbeddings::new(&(p / "embeddings"), config);
-        let encoder = AlbertTransformer::new(&(p / "encoder"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let embeddings = AlbertEmbeddings::new(p / "embeddings", config);
+        let encoder = AlbertTransformer::new(p / "encoder", config);
        let pooler = nn::linear(
-            &(p / "pooler"),
+            p / "pooler",
            config.hidden_size,
            config.hidden_size,
            Default::default(),
@ -288,7 +293,12 @@ pub struct AlbertMLMHead {
 }

 impl AlbertMLMHead {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertMLMHead {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertMLMHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let layer_norm_eps = match config.layer_norm_eps {
            Some(value) => value,
            None => 1e-12,
@ -298,18 +308,18 @@ impl AlbertMLMHead {
            ..Default::default()
        };
        let layer_norm = nn::layer_norm(
-            &(p / "LayerNorm"),
+            p / "LayerNorm",
            vec![config.embedding_size],
            layer_norm_config,
        );
        let dense = nn::linear(
-            &(p / "dense"),
+            p / "dense",
            config.hidden_size,
            config.embedding_size,
            Default::default(),
        );
        let decoder = nn::linear(
-            &(p / "decoder"),
+            p / "decoder",
            config.embedding_size,
            config.vocab_size,
            Default::default(),
@ -368,9 +378,14 @@ impl AlbertForMaskedLM {
    /// let config = AlbertConfig::from_file(config_path);
    /// let albert: AlbertForMaskedLM = AlbertForMaskedLM::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMaskedLM {
-        let albert = AlbertModel::new(&(p / "albert"), config);
-        let predictions = AlbertMLMHead::new(&(p / "predictions"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
+        let predictions = AlbertMLMHead::new(p / "predictions", config);

        AlbertForMaskedLM {
            albert,
@ -486,8 +501,13 @@ impl AlbertForSequenceClassification {
    /// let albert: AlbertForSequenceClassification =
    ///     AlbertForSequenceClassification::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForSequenceClassification {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForSequenceClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
        let classifier_dropout_prob = match config.classifier_dropout_prob {
            Some(value) => value,
            None => 0.1,
@ -499,7 +519,7 @@ impl AlbertForSequenceClassification {
            .expect("num_labels not provided in configuration")
            .len() as i64;
        let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
            config.hidden_size,
            num_labels,
            Default::default(),
@ -621,8 +641,13 @@ impl AlbertForTokenClassification {
    /// let albert: AlbertForTokenClassification =
    ///     AlbertForTokenClassification::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForTokenClassification {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = config
            .id2label
@ -630,7 +655,7 @@ impl AlbertForTokenClassification {
            .expect("num_labels not provided in configuration")
            .len() as i64;
        let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
            config.hidden_size,
            num_labels,
            Default::default(),
@ -750,11 +775,16 @@ impl AlbertForQuestionAnswering {
    /// let config = AlbertConfig::from_file(config_path);
    /// let albert: AlbertForQuestionAnswering = AlbertForQuestionAnswering::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForQuestionAnswering {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForQuestionAnswering
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
        let num_labels = 2;
        let qa_outputs = nn::linear(
-            &(p / "qa_outputs"),
+            p / "qa_outputs",
            config.hidden_size,
            num_labels,
            Default::default(),
@ -880,12 +910,17 @@ impl AlbertForMultipleChoice {
    /// let config = AlbertConfig::from_file(config_path);
    /// let albert: AlbertForMultipleChoice = AlbertForMultipleChoice::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertForMultipleChoice {
-        let albert = AlbertModel::new(&(p / "albert"), config);
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertForMultipleChoice
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let albert = AlbertModel::new(p / "albert", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = 1;
        let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
            config.hidden_size,
            num_labels,
            Default::default(),
--- a/src/albert/embeddings.rs
+++ b/src/albert/embeddings.rs
@ -13,6 +13,7 @@

 use crate::albert::AlbertConfig;
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::nn::{embedding, EmbeddingConfig};
 use tch::{nn, Kind, Tensor};

@ -28,7 +29,12 @@ pub struct AlbertEmbeddings {
 }

 impl AlbertEmbeddings {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertEmbeddings {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertEmbeddings
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let embedding_config = EmbeddingConfig {
            padding_idx: config.pad_token_id,
            ..Default::default()
--- a/src/albert/encoder.rs
+++ b/src/albert/encoder.rs
@ -15,7 +15,7 @@ use crate::albert::albert::Activation;
 use crate::albert::attention::AlbertSelfAttention;
 use crate::albert::AlbertConfig;
 use crate::common::activations::{_gelu, _gelu_new, _mish, _relu};
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use tch::{nn, Tensor};

 pub struct AlbertLayer {
@ -27,7 +27,12 @@ pub struct AlbertLayer {
 }

 impl AlbertLayer {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertLayer {
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertLayer
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let attention = AlbertSelfAttention::new(p / "attention", &config);

        let layer_norm_eps = match config.layer_norm_eps {
@ -39,19 +44,19 @@ impl AlbertLayer {
            ..Default::default()
        };
        let full_layer_layer_norm = nn::layer_norm(
-            &(p / "full_layer_layer_norm"),
+            p / "full_layer_layer_norm",
            vec![config.hidden_size],
            layer_norm_config,
        );

        let ffn = nn::linear(
-            &(p / "ffn"),
+            p / "ffn",
            config.hidden_size,
            config.intermediate_size,
            Default::default(),
        );
        let ffn_output = nn::linear(
-            &(p / "ffn_output"),
+            p / "ffn_output",
            config.intermediate_size,
            config.hidden_size,
            Default::default(),
@ -97,8 +102,11 @@ pub struct AlbertLayerGroup {
 }

 impl AlbertLayerGroup {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertLayerGroup {
-        let p = &(p / "albert_layers");
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertLayerGroup
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "albert_layers";

        let output_attentions = match config.output_attentions {
            Some(value) => value,
@ -112,7 +120,7 @@ impl AlbertLayerGroup {

        let mut layers: Vec<AlbertLayer> = vec![];
        for layer_index in 0..config.inner_group_num {
-            layers.push(AlbertLayer::new(&(p / layer_index), config));
+            layers.push(AlbertLayer::new(&p / layer_index, config));
        }

        AlbertLayerGroup {
@ -174,8 +182,12 @@ pub struct AlbertTransformer {
 }

 impl AlbertTransformer {
-    pub fn new(p: &nn::Path, config: &AlbertConfig) -> AlbertTransformer {
-        let p_layers = &(p / "albert_layer_groups");
+    pub fn new<'p, P>(p: P, config: &AlbertConfig) -> AlbertTransformer
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+        let p_layers = p / "albert_layer_groups";

        let output_attentions = match config.output_attentions {
            Some(value) => value,
@ -188,7 +200,7 @@ impl AlbertTransformer {
        };

        let embedding_hidden_mapping_in = nn::linear(
-            &(p / "embedding_hidden_mapping_in"),
+            p / "embedding_hidden_mapping_in",
            config.embedding_size,
            config.hidden_size,
            Default::default(),
@ -196,7 +208,7 @@ impl AlbertTransformer {

        let mut layers: Vec<AlbertLayerGroup> = vec![];
        for layer_index in 0..config.inner_group_num {
-            layers.push(AlbertLayerGroup::new(&(p_layers / layer_index), config));
+            layers.push(AlbertLayerGroup::new(&p_layers / layer_index, config));
        }

        AlbertTransformer {
--- a/src/bart/bart.rs
+++ b/src/bart/bart.rs
@ -18,7 +18,7 @@ use crate::common::dropout::Dropout;
 use crate::pipelines::generation::{Cache, LMHeadModel};
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use std::collections::HashMap;
 use tch::kind::Kind::{Float, Int64};
 use tch::nn::{embedding, EmbeddingConfig};
@ -248,7 +248,12 @@ impl BartModel {
    /// let generation_mode = true;
    /// let bart: BartModel = BartModel::new(&(&p.root() / "bart"), &config, generation_mode);
    /// ```
-    pub fn new(p: &nn::Path, config: &BartConfig, generation_mode: bool) -> BartModel {
+    pub fn new<'p, P>(p: P, config: &BartConfig, generation_mode: bool) -> BartModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let pad_token_id = match config.pad_token_id {
            Some(value) => value,
            None => 1,
@ -454,7 +459,7 @@ impl BartForConditionalGeneration {
        config: &BartConfig,
        generation_mode: bool,
    ) -> BartForConditionalGeneration {
-        let base_model = BartModel::new(&(p / "model"), config, generation_mode);
+        let base_model = BartModel::new(p / "model", config, generation_mode);
        BartForConditionalGeneration { base_model }
    }

@ -578,16 +583,21 @@ pub struct BartClassificationHead {
 }

 impl BartClassificationHead {
-    pub fn new(p: &nn::Path, config: &BartConfig) -> BartClassificationHead {
+    pub fn new<'p, P>(p: P, config: &BartConfig) -> BartClassificationHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let dense = nn::linear(
-            &(p / "dense"),
+            p / "dense",
            config.d_model,
            config.d_model,
            Default::default(),
        );
        let dropout = Dropout::new(config.classif_dropout);
        let out_proj = nn::linear(
-            &(p / "out_proj"),
+            p / "out_proj",
            config.d_model,
            config.num_labels.unwrap(),
            Default::default(),
@ -645,9 +655,14 @@ impl BartForSequenceClassification {
    /// let bart: BartForSequenceClassification =
    ///     BartForSequenceClassification::new(&(&p.root() / "bart"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BartConfig) -> BartForSequenceClassification {
-        let base_model = BartModel::new(&(p / "model"), config, false);
-        let classification_head = BartClassificationHead::new(&(p / "classification_head"), config);
+    pub fn new<'p, P>(p: P, config: &BartConfig) -> BartForSequenceClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let base_model = BartModel::new(p / "model", config, false);
+        let classification_head = BartClassificationHead::new(p / "classification_head", config);
        let eos_token_id = match config.eos_token_id {
            Some(value) => value,
            None => 3,
--- a/src/bert/attention.rs
+++ b/src/bert/attention.rs
@ -14,6 +14,7 @@
 use crate::bert::bert::{Activation, BertConfig};
 use crate::common::activations::{_gelu, _mish, _relu};
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::kind::Kind::Float;
 use tch::{nn, Tensor};

@ -141,7 +142,12 @@ pub struct BertSelfOutput {
 }

 impl BertSelfOutput {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertSelfOutput {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertSelfOutput
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let linear = nn::linear(
            p / "dense",
            config.hidden_size,
@ -179,9 +185,14 @@ pub struct BertAttention {
 }

 impl BertAttention {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertAttention {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertAttention
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let _self = BertSelfAttention::new(p / "self", config);
-        let output = BertSelfOutput::new(&(p / "output"), config);
+        let output = BertSelfOutput::new(p / "output", config);
        BertAttention { _self, output }
    }

@ -212,7 +223,12 @@ pub struct BertIntermediate {
 }

 impl BertIntermediate {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertIntermediate {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertIntermediate
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let lin = nn::linear(
            p / "dense",
            config.hidden_size,
@ -239,7 +255,12 @@ pub struct BertOutput {
 }

 impl BertOutput {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertOutput {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertOutput
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let lin = nn::linear(
            p / "dense",
            config.intermediate_size,
--- a/src/bert/bert.rs
+++ b/src/bert/bert.rs
@ -18,6 +18,7 @@ use crate::common::dropout::Dropout;
 use crate::common::linear::{linear_no_bias, LinearNoBias};
 use crate::Config;
 use serde::{Deserialize, Serialize};
+use std::borrow::Borrow;
 use std::collections::HashMap;
 use tch::kind::Kind::Float;
 use tch::nn::Init;
@ -146,14 +147,19 @@ impl<T: BertEmbedding> BertModel<T> {
    /// let config = BertConfig::from_file(config_path);
    /// let bert: BertModel<BertEmbeddings> = BertModel::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertModel<T> {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertModel<T>
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let is_decoder = match config.is_decoder {
            Some(value) => value,
            None => false,
        };
-        let embeddings = T::new(&(p / "embeddings"), config);
-        let encoder = BertEncoder::new(&(p / "encoder"), config);
-        let pooler = BertPooler::new(&(p / "pooler"), config);
+        let embeddings = T::new(p / "embeddings", config);
+        let encoder = BertEncoder::new(p / "encoder", config);
+        let pooler = BertPooler::new(p / "pooler", config);

        BertModel {
            embeddings,
@ -337,7 +343,12 @@ pub struct BertPredictionHeadTransform {
 }

 impl BertPredictionHeadTransform {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertPredictionHeadTransform {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertPredictionHeadTransform
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let dense = nn::linear(
            p / "dense",
            config.hidden_size,
@ -375,11 +386,14 @@ pub struct BertLMPredictionHead {
 }

 impl BertLMPredictionHead {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertLMPredictionHead {
-        let p = &(p / "predictions");
-        let transform = BertPredictionHeadTransform::new(&(p / "transform"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertLMPredictionHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "predictions";
+        let transform = BertPredictionHeadTransform::new(&p / "transform", config);
        let decoder = linear_no_bias(
-            &(p / "decoder"),
+            &p / "decoder",
            config.hidden_size,
            config.vocab_size,
            Default::default(),
@ -430,9 +444,14 @@ impl BertForMaskedLM {
    /// let config = BertConfig::from_file(config_path);
    /// let bert = BertForMaskedLM::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertForMaskedLM {
-        let bert = BertModel::new(&(p / "bert"), config);
-        let cls = BertLMPredictionHead::new(&(p / "cls"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let bert = BertModel::new(p / "bert", config);
+        let cls = BertLMPredictionHead::new(p / "cls", config);

        BertForMaskedLM { bert, cls }
    }
@ -552,8 +571,13 @@ impl BertForSequenceClassification {
    /// let config = BertConfig::from_file(config_path);
    /// let bert = BertForSequenceClassification::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertForSequenceClassification {
-        let bert = BertModel::new(&(p / "bert"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForSequenceClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let bert = BertModel::new(p / "bert", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = config
            .id2label
@ -687,8 +711,13 @@ impl BertForMultipleChoice {
    /// let config = BertConfig::from_file(config_path);
    /// let bert = BertForMultipleChoice::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertForMultipleChoice {
-        let bert = BertModel::new(&(p / "bert"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForMultipleChoice
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let bert = BertModel::new(p / "bert", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let classifier = nn::linear(p / "classifier", config.hidden_size, 1, Default::default());

@ -825,8 +854,13 @@ impl BertForTokenClassification {
    /// let config = BertConfig::from_file(config_path);
    /// let bert = BertForTokenClassification::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertForTokenClassification {
-        let bert = BertModel::new(&(p / "bert"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let bert = BertModel::new(p / "bert", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = config
            .id2label
@ -959,8 +993,13 @@ impl BertForQuestionAnswering {
    /// let config = BertConfig::from_file(config_path);
    /// let bert = BertForQuestionAnswering::new(&(&p.root() / "bert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertForQuestionAnswering {
-        let bert = BertModel::new(&(p / "bert"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertForQuestionAnswering
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let bert = BertModel::new(p / "bert", config);
        let num_labels = 2;
        let qa_outputs = nn::linear(
            p / "qa_outputs",
--- a/src/bert/embeddings.rs
+++ b/src/bert/embeddings.rs
@ -13,13 +13,16 @@

 use crate::bert::bert::BertConfig;
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::nn::{embedding, EmbeddingConfig};
 use tch::{nn, Kind, Tensor};

 /// # BertEmbedding trait (for use in BertModel or RoBERTaModel)
 /// Defines an interface for the embedding layers in BERT-based models
 pub trait BertEmbedding {
-    fn new(p: &nn::Path, config: &BertConfig) -> Self;
+    fn new<'p, P>(p: P, config: &BertConfig) -> Self
+    where
+        P: Borrow<nn::Path<'p>>;

    fn forward_t(
        &self,
@ -64,7 +67,12 @@ impl BertEmbedding for BertEmbeddings {
    /// let config = BertConfig::from_file(config_path);
    /// let bert_embeddings = BertEmbeddings::new(&(&p.root() / "bert_embeddings"), &config);
    /// ```
-    fn new(p: &nn::Path, config: &BertConfig) -> BertEmbeddings {
+    fn new<'p, P>(p: P, config: &BertConfig) -> BertEmbeddings
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let embedding_config = EmbeddingConfig {
            padding_idx: 0,
            ..Default::default()
--- a/src/bert/encoder.rs
+++ b/src/bert/encoder.rs
@ -13,7 +13,7 @@

 use crate::bert::attention::{BertAttention, BertIntermediate, BertOutput};
 use crate::bert::bert::BertConfig;
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use tch::{nn, Tensor};

 pub struct BertLayer {
@ -25,14 +25,19 @@ pub struct BertLayer {
 }

 impl BertLayer {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertLayer {
-        let attention = BertAttention::new(&(p / "attention"), &config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertLayer
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let attention = BertAttention::new(p / "attention", &config);
        let (is_decoder, cross_attention) = match config.is_decoder {
            Some(value) => {
                if value == true {
                    (
                        value,
-                        Some(BertAttention::new(&(p / "cross_attention"), &config)),
+                        Some(BertAttention::new(p / "cross_attention", &config)),
                    )
                } else {
                    (value, None)
@ -41,8 +46,8 @@ impl BertLayer {
            None => (false, None),
        };

-        let intermediate = BertIntermediate::new(&(p / "intermediate"), &config);
-        let output = BertOutput::new(&(p / "output"), &config);
+        let intermediate = BertIntermediate::new(p / "intermediate", &config);
+        let output = BertOutput::new(p / "output", &config);

        BertLayer {
            attention,
@ -96,8 +101,11 @@ pub struct BertEncoder {
 }

 impl BertEncoder {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertEncoder {
-        let p = &(p / "layer");
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertEncoder
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "layer";
        let output_attentions = if let Some(value) = config.output_attentions {
            value
        } else {
@ -111,7 +119,7 @@ impl BertEncoder {

        let mut layers: Vec<BertLayer> = vec![];
        for layer_index in 0..config.num_hidden_layers {
-            layers.push(BertLayer::new(&(p / layer_index), config));
+            layers.push(BertLayer::new(&p / layer_index, config));
        }

        BertEncoder {
@ -176,9 +184,14 @@ pub struct BertPooler {
 }

 impl BertPooler {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> BertPooler {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> BertPooler
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let lin = nn::linear(
-            &(p / "dense"),
+            p / "dense",
            config.hidden_size,
            config.hidden_size,
            Default::default(),
--- a/src/distilbert/distilbert.rs
+++ b/src/distilbert/distilbert.rs
@ -18,7 +18,7 @@ use crate::distilbert::embeddings::DistilBertEmbedding;
 use crate::distilbert::transformer::Transformer;
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::{borrow::Borrow, collections::HashMap};

 /// # DistilBERT Pretrained model weight files
 pub struct DistilBertModelResources;
@ -156,10 +156,13 @@ impl DistilBertModel {
    /// let config = DistilBertConfig::from_file(config_path);
    /// let distil_bert: DistilBertModel = DistilBertModel::new(&(&p.root() / "distilbert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModel {
-        let p = &(p / "distilbert");
-        let embeddings = DistilBertEmbedding::new(&(p / "embeddings"), config);
-        let transformer = Transformer::new(&(p / "transformer"), config);
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "distilbert";
+        let embeddings = DistilBertEmbedding::new(&p / "embeddings", config);
+        let transformer = Transformer::new(&p / "transformer", config);
        DistilBertModel {
            embeddings,
            transformer,
@ -268,8 +271,13 @@ impl DistilBertModelClassifier {
    /// let distil_bert: DistilBertModelClassifier =
    ///     DistilBertModelClassifier::new(&(&p.root() / "distilbert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModelClassifier {
-        let distil_bert_model = DistilBertModel::new(&p, config);
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModelClassifier
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let distil_bert_model = DistilBertModel::new(p, config);

        let num_labels = config
            .id2label
@ -278,17 +286,12 @@ impl DistilBertModelClassifier {
            .len() as i64;

        let pre_classifier = nn::linear(
-            &(p / "pre_classifier"),
+            p / "pre_classifier",
            config.dim,
            config.dim,
            Default::default(),
        );
-        let classifier = nn::linear(
-            &(p / "classifier"),
-            config.dim,
-            num_labels,
-            Default::default(),
-        );
+        let classifier = nn::linear(p / "classifier", config.dim, num_labels, Default::default());
        let dropout = Dropout::new(config.seq_classif_dropout);

        DistilBertModelClassifier {
@ -403,10 +406,15 @@ impl DistilBertModelMaskedLM {
    /// let config = DistilBertConfig::from_file(config_path);
    /// let distil_bert = DistilBertModelMaskedLM::new(&(&p.root() / "distilbert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertModelMaskedLM {
-        let distil_bert_model = DistilBertModel::new(&p, config);
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertModelMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let distil_bert_model = DistilBertModel::new(p, config);
        let vocab_transform = nn::linear(
-            &(p / "vocab_transform"),
+            p / "vocab_transform",
            config.dim,
            config.dim,
            Default::default(),
@ -418,7 +426,7 @@ impl DistilBertModelMaskedLM {
        let vocab_layer_norm =
            nn::layer_norm(p / "vocab_layer_norm", vec![config.dim], layer_norm_config);
        let vocab_projector = nn::linear(
-            &(p / "vocab_projector"),
+            p / "vocab_projector",
            config.dim,
            config.vocab_size,
            Default::default(),
@ -532,9 +540,14 @@ impl DistilBertForQuestionAnswering {
    /// let config = DistilBertConfig::from_file(config_path);
    /// let distil_bert = DistilBertForQuestionAnswering::new(&(&p.root() / "distilbert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertForQuestionAnswering {
-        let distil_bert_model = DistilBertModel::new(&p, config);
-        let qa_outputs = nn::linear(&(p / "qa_outputs"), config.dim, 2, Default::default());
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertForQuestionAnswering
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let distil_bert_model = DistilBertModel::new(p, config);
+        let qa_outputs = nn::linear(p / "qa_outputs", config.dim, 2, Default::default());
        let dropout = Dropout::new(config.qa_dropout);

        DistilBertForQuestionAnswering {
@ -645,8 +658,13 @@ impl DistilBertForTokenClassification {
    /// let config = DistilBertConfig::from_file(config_path);
    /// let distil_bert = DistilBertForTokenClassification::new(&(&p.root() / "distilbert"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertForTokenClassification {
-        let distil_bert_model = DistilBertModel::new(&p, config);
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let distil_bert_model = DistilBertModel::new(p, config);

        let num_labels = config
            .id2label
@ -654,12 +672,7 @@ impl DistilBertForTokenClassification {
            .expect("id2label must be provided for classifiers")
            .len() as i64;

-        let classifier = nn::linear(
-            &(p / "classifier"),
-            config.dim,
-            num_labels,
-            Default::default(),
-        );
+        let classifier = nn::linear(p / "classifier", config.dim, num_labels, Default::default());
        let dropout = Dropout::new(config.seq_classif_dropout);

        DistilBertForTokenClassification {
--- a/src/distilbert/embeddings.rs
+++ b/src/distilbert/embeddings.rs
@ -12,6 +12,7 @@

 use crate::common::dropout::Dropout;
 use crate::distilbert::distilbert::DistilBertConfig;
+use std::borrow::Borrow;
 use tch::kind::Kind::Float;
 use tch::nn::{embedding, EmbeddingConfig, ModuleT};
 use tch::{nn, Device, Kind, Tensor};
@ -63,7 +64,12 @@ pub struct DistilBertEmbedding {
 }

 impl DistilBertEmbedding {
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> DistilBertEmbedding {
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> DistilBertEmbedding
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let embedding_config = EmbeddingConfig {
            padding_idx: 0,
            ..Default::default()
--- a/src/distilbert/transformer.rs
+++ b/src/distilbert/transformer.rs
@ -14,7 +14,7 @@ use crate::common::activations::{_gelu, _relu};
 use crate::common::dropout::Dropout;
 use crate::distilbert::attention::MultiHeadSelfAttention;
 use crate::distilbert::distilbert::{Activation, DistilBertConfig};
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use tch::nn::LayerNorm;
 use tch::{nn, Tensor};

@ -67,7 +67,12 @@ pub struct TransformerBlock {
 }

 impl TransformerBlock {
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> TransformerBlock {
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> TransformerBlock
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let attention = MultiHeadSelfAttention::new(p / "attention", &config);
        let layer_norm_config = nn::LayerNormConfig {
            eps: 1e-12,
@ -109,8 +114,11 @@ pub struct Transformer {
 }

 impl Transformer {
-    pub fn new(p: &nn::Path, config: &DistilBertConfig) -> Transformer {
-        let p = &(p / "layer");
+    pub fn new<'p, P>(p: P, config: &DistilBertConfig) -> Transformer
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "layer";
        let output_attentions = match config.output_attentions {
            Some(value) => value,
            None => false,
@ -122,7 +130,7 @@ impl Transformer {

        let mut layers: Vec<TransformerBlock> = vec![];
        for layer_index in 0..config.n_layers {
-            layers.push(TransformerBlock::new(&(p / layer_index), config));
+            layers.push(TransformerBlock::new(&p / layer_index, config));
        }

        Transformer {
--- a/src/electra/electra.rs
+++ b/src/electra/electra.rs
@ -19,7 +19,7 @@ use crate::common::dropout::Dropout;
 use crate::electra::embeddings::ElectraEmbeddings;
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
+use std::{borrow::Borrow, collections::HashMap};
 use tch::{nn, Kind, Tensor};

 /// # Electra Pretrained model weight files
@ -132,11 +132,16 @@ impl ElectraModel {
    /// let config = ElectraConfig::from_file(config_path);
    /// let electra_model: ElectraModel = ElectraModel::new(&(&p.root() / "electra"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraModel {
-        let embeddings = ElectraEmbeddings::new(&(p / "embeddings"), config);
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let embeddings = ElectraEmbeddings::new(p / "embeddings", config);
        let embeddings_project = if config.embedding_size != config.hidden_size {
            Some(nn::linear(
-                &(p / "embeddings_project"),
+                p / "embeddings_project",
                config.embedding_size,
                config.hidden_size,
                Default::default(),
@ -162,7 +167,7 @@ impl ElectraModel {
            id2label: config.id2label.clone(),
            label2id: config.label2id.clone(),
        };
-        let encoder = BertEncoder::new(&(p / "encoder"), &bert_config);
+        let encoder = BertEncoder::new(p / "encoder", &bert_config);
        ElectraModel {
            embeddings,
            embeddings_project,
@ -322,15 +327,20 @@ impl ElectraDiscriminatorHead {
    /// let config = ElectraConfig::from_file(config_path);
    /// let discriminator_head = ElectraDiscriminatorHead::new(&(&p.root() / "electra"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraDiscriminatorHead {
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraDiscriminatorHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let dense = nn::linear(
-            &(p / "dense"),
+            p / "dense",
            config.hidden_size,
            config.hidden_size,
            Default::default(),
        );
        let dense_prediction = nn::linear(
-            &(p / "dense_prediction"),
+            p / "dense_prediction",
            config.hidden_size,
            1,
            Default::default(),
@ -422,14 +432,19 @@ impl ElectraGeneratorHead {
    /// let config = ElectraConfig::from_file(config_path);
    /// let generator_head = ElectraGeneratorHead::new(&(&p.root() / "electra"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraGeneratorHead {
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraGeneratorHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let layer_norm = nn::layer_norm(
            p / "LayerNorm",
            vec![config.embedding_size],
            Default::default(),
        );
        let dense = nn::linear(
-            &(p / "dense"),
+            p / "dense",
            config.hidden_size,
            config.embedding_size,
            Default::default(),
@ -516,11 +531,16 @@ impl ElectraForMaskedLM {
    /// let config = ElectraConfig::from_file(config_path);
    /// let electra_model: ElectraForMaskedLM = ElectraForMaskedLM::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraForMaskedLM {
-        let electra = ElectraModel::new(&(p / "electra"), config);
-        let generator_head = ElectraGeneratorHead::new(&(p / "generator_predictions"), config);
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraForMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let electra = ElectraModel::new(p / "electra", config);
+        let generator_head = ElectraGeneratorHead::new(p / "generator_predictions", config);
        let lm_head = nn::linear(
-            &(p / "generator_lm_head"),
+            p / "generator_lm_head",
            config.embedding_size,
            config.vocab_size,
            Default::default(),
@ -640,10 +660,15 @@ impl ElectraDiscriminator {
    /// let config = ElectraConfig::from_file(config_path);
    /// let electra_model: ElectraDiscriminator = ElectraDiscriminator::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraDiscriminator {
-        let electra = ElectraModel::new(&(p / "electra"), config);
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraDiscriminator
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let electra = ElectraModel::new(p / "electra", config);
        let discriminator_head =
-            ElectraDiscriminatorHead::new(&(p / "discriminator_predictions"), config);
+            ElectraDiscriminatorHead::new(p / "discriminator_predictions", config);

        ElectraDiscriminator {
            electra,
@ -757,8 +782,13 @@ impl ElectraForTokenClassification {
    /// let electra_model: ElectraForTokenClassification =
    ///     ElectraForTokenClassification::new(&p.root(), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraForTokenClassification {
-        let electra = ElectraModel::new(&(p / "electra"), config);
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let electra = ElectraModel::new(p / "electra", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = config
            .id2label
@ -766,7 +796,7 @@ impl ElectraForTokenClassification {
            .expect("id2label must be provided for classifiers")
            .len() as i64;
        let classifier = nn::linear(
-            &(p / "classifier"),
+            p / "classifier",
            config.hidden_size,
            num_labels,
            Default::default(),
--- a/src/electra/embeddings.rs
+++ b/src/electra/embeddings.rs
@ -14,6 +14,7 @@

 use crate::common::dropout::Dropout;
 use crate::electra::electra::ElectraConfig;
+use std::borrow::Borrow;
 use tch::nn::{embedding, EmbeddingConfig};
 use tch::{nn, Kind, Tensor};

@ -28,7 +29,12 @@ pub struct ElectraEmbeddings {
 }

 impl ElectraEmbeddings {
-    pub fn new(p: &nn::Path, config: &ElectraConfig) -> ElectraEmbeddings {
+    pub fn new<'p, P>(p: P, config: &ElectraConfig) -> ElectraEmbeddings
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let embedding_config = EmbeddingConfig {
            padding_idx: config.pad_token_id,
            ..Default::default()
--- a/src/gpt2/attention.rs
+++ b/src/gpt2/attention.rs
@ -14,6 +14,7 @@

 use crate::common::dropout::Dropout;
 use crate::gpt2::gpt2::Gpt2Config;
+use std::borrow::Borrow;
 use tch::kind::Kind::Float;
 use tch::nn::{Init, Module};
 use tch::{nn, Tensor};
@ -25,7 +26,12 @@ pub struct GPTConv1D {
 }

 impl GPTConv1D {
-    pub fn new(p: &nn::Path, nf: i64, nx: i64) -> GPTConv1D {
+    pub fn new<'p, P>(p: P, nf: i64, nx: i64) -> GPTConv1D
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let weight = p.var(
            "weight",
            &[nx, nf],
@ -59,12 +65,17 @@ pub struct Attention {
 }

 impl Attention {
-    pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Attention {
+    pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Attention
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let bias = Tensor::ones(&[config.n_ctx, config.n_ctx], (Float, p.device()))
            .tril(0)
            .view((1, 1, config.n_ctx, config.n_ctx));
-        let c_attn = GPTConv1D::new(&(p / "c_attn"), config.n_embd * 3, config.n_embd);
-        let c_proj = GPTConv1D::new(&(p / "c_proj"), config.n_embd, config.n_embd);
+        let c_attn = GPTConv1D::new(p / "c_attn", config.n_embd * 3, config.n_embd);
+        let c_proj = GPTConv1D::new(p / "c_proj", config.n_embd, config.n_embd);

        let attn_pdrop = match config.attn_pdrop {
            Some(value) => value,
--- a/src/gpt2/gpt2.rs
+++ b/src/gpt2/gpt2.rs
@ -18,7 +18,7 @@ use crate::gpt2::transformer::Block;
 use crate::pipelines::generation::{Cache, LMHeadModel};
 use crate::Config;
 use serde::{Deserialize, Serialize};
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use tch::kind::Kind::Int64;
 use tch::nn::embedding;
 use tch::{nn, Tensor};
@ -247,16 +247,20 @@ impl Gpt2Model {
    /// let config = Gpt2Config::from_file(config_path);
    /// let gpt2: Gpt2Model = Gpt2Model::new(&(&p.root() / "gpt2"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &Gpt2Config) -> Gpt2Model {
-        let p = &(p / "transformer");
+    pub fn new<'p, P>(p: P, config: &Gpt2Config) -> Gpt2Model
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow() / "transformer";
+
        let wte = embedding(
-            &(p / "wte"),
+            &p / "wte",
            config.vocab_size,
            config.n_embd,
            Default::default(),
        );
        let wpe = embedding(
-            &(p / "wpe"),
+            &p / "wpe",
            config.n_positions,
            config.n_embd,
            Default::default(),
@ -271,11 +275,11 @@ impl Gpt2Model {
            eps: config.layer_norm_epsilon,
            ..Default::default()
        };
-        let ln_f = nn::layer_norm(p / "ln_f", vec![config.n_embd], layer_norm_config);
+        let ln_f = nn::layer_norm(&p / "ln_f", vec![config.n_embd], layer_norm_config);
        let mut h: Vec<Block> = vec![];
-        let h_path = &(p / "h");
+        let h_path = &p / "h";
        for layer_index in 0..config.n_layer {
-            h.push(Block::new(&(h_path / layer_index), config, true));
+            h.push(Block::new(&h_path / layer_index, config, true));
        }
        let output_attentions = match config.output_attentions {
            Some(value) => value,
@ -531,10 +535,15 @@ impl GPT2LMHeadModel {
    /// let config = Gpt2Config::from_file(config_path);
    /// let gpt2: GPT2LMHeadModel = GPT2LMHeadModel::new(&(&p.root() / "gpt2"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &Gpt2Config) -> GPT2LMHeadModel {
-        let transformer = Gpt2Model::new(&p, config);
+    pub fn new<'p, P>(p: P, config: &Gpt2Config) -> GPT2LMHeadModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let transformer = Gpt2Model::new(p, config);
        let lm_head = linear_no_bias(
-            &(p / "lm_head"),
+            p / "lm_head",
            config.n_embd,
            config.vocab_size,
            Default::default(),
--- a/src/gpt2/transformer.rs
+++ b/src/gpt2/transformer.rs
@ -16,6 +16,7 @@ use crate::common::activations::{_gelu_new, _relu, _swish};
 use crate::common::dropout::Dropout;
 use crate::gpt2::attention::{Attention, GPTConv1D};
 use crate::gpt2::gpt2::{Gpt2Config, GptActivation};
+use std::borrow::Borrow;
 use tch::{nn, Tensor};

 pub struct MLP {
@ -26,9 +27,14 @@ pub struct MLP {
 }

 impl MLP {
-    pub fn new(p: &nn::Path, config: &Gpt2Config) -> MLP {
-        let c_fc = GPTConv1D::new(&(p / "c_fc"), config.n_embd * 4, config.n_embd);
-        let c_proj = GPTConv1D::new(&(p / "c_proj"), config.n_embd, config.n_embd * 4);
+    pub fn new<'p, P>(p: P, config: &Gpt2Config) -> MLP
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let c_fc = GPTConv1D::new(p / "c_fc", config.n_embd * 4, config.n_embd);
+        let c_proj = GPTConv1D::new(p / "c_proj", config.n_embd, config.n_embd * 4);
        let activation = Box::new(match &config.afn {
            Some(activation_enum) => match activation_enum {
                GptActivation::gelu => _gelu_new,
@ -64,15 +70,20 @@ pub struct Block {
 }

 impl Block {
-    pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Block {
+    pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Block
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let layer_norm_config = nn::LayerNormConfig {
            eps: config.layer_norm_epsilon,
            ..Default::default()
        };
        let ln_1 = nn::layer_norm(p / "ln_1", vec![config.n_embd], layer_norm_config);
        let ln_2 = nn::layer_norm(p / "ln_2", vec![config.n_embd], layer_norm_config);
-        let attn = Attention::new(&(p / "attn"), config, scale);
-        let mlp = MLP::new(&(p / "mlp"), config);
+        let attn = Attention::new(p / "attn", config, scale);
+        let mlp = MLP::new(p / "mlp", config);

        Block {
            ln_1,
--- a/src/marian/marian.rs
+++ b/src/marian/marian.rs
@ -264,7 +264,7 @@ impl MarianForConditionalGeneration {
        config: &BartConfig,
        generation_mode: bool,
    ) -> MarianForConditionalGeneration {
-        let base_model = BartModel::new(&(p / "model"), config, generation_mode);
+        let base_model = BartModel::new(p / "model", config, generation_mode);
        let final_logits_bias = p.var(
            "final_logits_bias",
            &[1, config.vocab_size],
--- a/src/openai_gpt/openai_gpt.rs
+++ b/src/openai_gpt/openai_gpt.rs
@ -17,7 +17,7 @@ use crate::common::linear::{linear_no_bias, LinearNoBias};
 use crate::gpt2::Gpt2Config;
 use crate::openai_gpt::transformer::Block;
 use crate::pipelines::generation::{Cache, LMHeadModel};
-use std::borrow::BorrowMut;
+use std::borrow::{Borrow, BorrowMut};
 use tch::kind::Kind::Int64;
 use tch::nn::embedding;
 use tch::{nn, Tensor};
@ -106,15 +106,20 @@ impl OpenAiGptModel {
    /// let config = Gpt2Config::from_file(config_path);
    /// let gpt2: OpenAiGptModel = OpenAiGptModel::new(&(&p.root() / "gpt"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &Gpt2Config) -> OpenAiGptModel {
+    pub fn new<'p, P>(p: P, config: &Gpt2Config) -> OpenAiGptModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let tokens_embed = embedding(
-            &(p / "tokens_embed"),
+            p / "tokens_embed",
            config.vocab_size,
            config.n_embd,
            Default::default(),
        );
        let positions_embed = embedding(
-            &(p / "positions_embed"),
+            p / "positions_embed",
            config.n_positions,
            config.n_embd,
            Default::default(),
@ -126,9 +131,9 @@ impl OpenAiGptModel {
        };
        let drop = Dropout::new(embd_pdrop);
        let mut h: Vec<Block> = vec![];
-        let h_path = &(p / "h");
+        let h_path = p / "h";
        for layer_index in 0..config.n_layer {
-            h.push(Block::new(&(h_path / layer_index), config, true));
+            h.push(Block::new(&h_path / layer_index, config, true));
        }
        let output_attentions = match config.output_attentions {
            Some(value) => value,
@ -317,10 +322,15 @@ impl OpenAIGPTLMHeadModel {
    /// let config = Gpt2Config::from_file(config_path);
    /// let gpt2: OpenAIGPTLMHeadModel = OpenAIGPTLMHeadModel::new(&(&p.root() / "gpt"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &Gpt2Config) -> OpenAIGPTLMHeadModel {
-        let transformer = OpenAiGptModel::new(&p, config);
+    pub fn new<'p, P>(p: P, config: &Gpt2Config) -> OpenAIGPTLMHeadModel
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let transformer = OpenAiGptModel::new(p, config);
        let lm_head = linear_no_bias(
-            &(p / "lm_head"),
+            p / "lm_head",
            config.n_embd,
            config.vocab_size,
            Default::default(),
--- a/src/openai_gpt/transformer.rs
+++ b/src/openai_gpt/transformer.rs
@ -15,6 +15,7 @@
 use crate::gpt2::attention::Attention;
 use crate::gpt2::transformer::MLP;
 use crate::gpt2::Gpt2Config;
+use std::borrow::Borrow;
 use tch::{nn, Tensor};

 pub struct Block {
@ -25,15 +26,20 @@ pub struct Block {
 }

 impl Block {
-    pub fn new(p: &nn::Path, config: &Gpt2Config, scale: bool) -> Block {
+    pub fn new<'p, P>(p: P, config: &Gpt2Config, scale: bool) -> Block
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let layer_norm_config = nn::LayerNormConfig {
            eps: config.layer_norm_epsilon,
            ..Default::default()
        };
        let ln_1 = nn::layer_norm(p / "ln_1", vec![config.n_embd], layer_norm_config);
        let ln_2 = nn::layer_norm(p / "ln_2", vec![config.n_embd], layer_norm_config);
-        let attn = Attention::new(&(p / "attn"), config, scale);
-        let mlp = MLP::new(&(p / "mlp"), config);
+        let attn = Attention::new(p / "attn", config, scale);
+        let mlp = MLP::new(p / "mlp", config);

        Block {
            ln_1,
--- a/src/pipelines/generation.rs
+++ b/src/pipelines/generation.rs
@ -1661,7 +1661,7 @@ pub(crate) mod private_generation_utils {
                        assert!(
                            eos_token_ids.is_some() & pad_token_id.is_some(),
                            "EOS and Padding tokens need to be defined if the number of generated \
-                            beams is greater than the target number fo beams"
+                             beams is greater than the target number fo beams"
                        );
                        next_batch_beam.append(
                            &mut (0..num_beams)
--- a/src/roberta/embeddings.rs
+++ b/src/roberta/embeddings.rs
@ -13,6 +13,7 @@

 use crate::bert::{BertConfig, BertEmbedding};
 use crate::common::dropout::Dropout;
+use std::borrow::Borrow;
 use tch::nn::{embedding, EmbeddingConfig};
 use tch::{nn, Kind, Tensor};

@ -69,7 +70,12 @@ impl BertEmbedding for RobertaEmbeddings {
    /// let config = BertConfig::from_file(config_path);
    /// let robert_embeddings = RobertaEmbeddings::new(&(&p.root() / "bert_embeddings"), &config);
    /// ```
-    fn new(p: &nn::Path, config: &BertConfig) -> RobertaEmbeddings {
+    fn new<'p, P>(p: P, config: &BertConfig) -> RobertaEmbeddings
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
        let embedding_config = EmbeddingConfig {
            padding_idx: 1,
            ..Default::default()
--- a/src/roberta/roberta.rs
+++ b/src/roberta/roberta.rs
@ -16,6 +16,7 @@ use crate::common::activations::_gelu;
 use crate::common::dropout::Dropout;
 use crate::common::linear::{linear_no_bias, LinearNoBias};
 use crate::roberta::embeddings::RobertaEmbeddings;
+use std::borrow::Borrow;
 use tch::nn::Init;
 use tch::{nn, Tensor};

@ -71,7 +72,11 @@ pub struct RobertaLMHead {
 }

 impl RobertaLMHead {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaLMHead {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaLMHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
        let dense = nn::linear(
            p / "dense",
            config.hidden_size,
@ -88,7 +93,7 @@ impl RobertaLMHead {
            layer_norm_config,
        );
        let decoder = linear_no_bias(
-            &(p / "decoder"),
+            p / "decoder",
            config.hidden_size,
            config.vocab_size,
            Default::default(),
@ -144,9 +149,14 @@ impl RobertaForMaskedLM {
    /// let config = BertConfig::from_file(config_path);
    /// let roberta = RobertaForMaskedLM::new(&(&p.root() / "roberta"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForMaskedLM {
-        let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
-        let lm_head = RobertaLMHead::new(&(p / "lm_head"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForMaskedLM
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+
+        let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
+        let lm_head = RobertaLMHead::new(p / "lm_head", config);

        RobertaForMaskedLM { roberta, lm_head }
    }
@ -242,7 +252,11 @@ pub struct RobertaClassificationHead {
 }

 impl RobertaClassificationHead {
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaClassificationHead {
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaClassificationHead
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
        let dense = nn::linear(
            p / "dense",
            config.hidden_size,
@ -313,9 +327,13 @@ impl RobertaForSequenceClassification {
    /// let config = BertConfig::from_file(config_path);
    /// let roberta = RobertaForSequenceClassification::new(&(&p.root() / "roberta"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForSequenceClassification {
-        let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
-        let classifier = RobertaClassificationHead::new(&(p / "classifier"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForSequenceClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+        let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
+        let classifier = RobertaClassificationHead::new(p / "classifier", config);

        RobertaForSequenceClassification {
            roberta,
@ -437,8 +455,12 @@ impl RobertaForMultipleChoice {
    /// let config = BertConfig::from_file(config_path);
    /// let roberta = RobertaForMultipleChoice::new(&(&p.root() / "roberta"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForMultipleChoice {
-        let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForMultipleChoice
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+        let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let classifier = nn::linear(p / "classifier", config.hidden_size, 1, Default::default());

@ -578,8 +600,12 @@ impl RobertaForTokenClassification {
    /// let config = BertConfig::from_file(config_path);
    /// let roberta = RobertaForTokenClassification::new(&(&p.root() / "roberta"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForTokenClassification {
-        let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForTokenClassification
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+        let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
        let dropout = Dropout::new(config.hidden_dropout_prob);
        let num_labels = config
            .id2label
@ -715,8 +741,12 @@ impl RobertaForQuestionAnswering {
    /// let config = BertConfig::from_file(config_path);
    /// let roberta = RobertaForQuestionAnswering::new(&(&p.root() / "roberta"), &config);
    /// ```
-    pub fn new(p: &nn::Path, config: &BertConfig) -> RobertaForQuestionAnswering {
-        let roberta = BertModel::<RobertaEmbeddings>::new(&(p / "roberta"), config);
+    pub fn new<'p, P>(p: P, config: &BertConfig) -> RobertaForQuestionAnswering
+    where
+        P: Borrow<nn::Path<'p>>,
+    {
+        let p = p.borrow();
+        let roberta = BertModel::<RobertaEmbeddings>::new(p / "roberta", config);
        let num_labels = 2;
        let qa_outputs = nn::linear(
            p / "qa_outputs",