rust-bert/examples/translation_mbart.rs

// Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
// Copyright 2019 Guillaume Becquin
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//     http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

extern crate anyhow;

use rust_bert::mbart::{
    MBartConfigResources, MBartModelResources, MBartSourceLanguages, MBartTargetLanguages,
    MBartVocabResources,
};
use rust_bert::pipelines::common::{ModelResource, ModelType};
use rust_bert::pipelines::translation::{Language, TranslationConfig, TranslationModel};
use rust_bert::resources::RemoteResource;
use tch::Device;

fn main() -> anyhow::Result<()> {
    let model_resource = RemoteResource::from_pretrained(MBartModelResources::MBART50_MANY_TO_MANY);
    let config_resource =
        RemoteResource::from_pretrained(MBartConfigResources::MBART50_MANY_TO_MANY);
    let vocab_resource = RemoteResource::from_pretrained(MBartVocabResources::MBART50_MANY_TO_MANY);

    let source_languages = MBartSourceLanguages::MBART50_MANY_TO_MANY;
    let target_languages = MBartTargetLanguages::MBART50_MANY_TO_MANY;

    let translation_config = TranslationConfig::new(
        ModelType::MBart,
        ModelResource::Torch(Box::new(model_resource)),
        config_resource,
        vocab_resource,
        None,
        source_languages,
        target_languages,
        Device::cuda_if_available(),
    );
    let model = TranslationModel::new(translation_config)?;

    let source_sentence = "This sentence will be translated in multiple languages.";

    let mut outputs = Vec::new();
    outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);
    outputs.extend(model.translate(&[source_sentence], Language::English, Language::Spanish)?);
    outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);

    for sentence in outputs {
        println!("{sentence}");
    }
    Ok(())
}
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00			`// Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.`
			`// Copyright 2019 Guillaume Becquin`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`extern crate anyhow;`

			`use rust_bert::mbart::{`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`MBartConfigResources, MBartModelResources, MBartSourceLanguages, MBartTargetLanguages,`
			`MBartVocabResources,`
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00			`};`
ONNX Support (#346) * Fixed Clippy warnings * Revert "Shallow clone optimization (#243)" This reverts commit ba584653bc8d563b8991b3ef6aa4e25d545b0ef3. * updated dependencies * tryouts * GPT2 tryouts * WIP GPT2 * input mapping * Cache storage * Initial GPT2 prototype * Initial ONNX Config and decoder implementation * ONNXDecoder first draft * Use Decoders in example * Automated tch-ort conversion, decoder implementation * ONNXCausalDecoder implementation * Refactored _get_var_store to be optional, added get_device to gen trait * updated example * Added decoder_start_token_id to ConfigOption * Addition of ONNXModelConfig, make max_position_embeddigs optional * Addition of forward pass function for ONNXModel * Working ONNX causal decoder * Simplify tensor conversion * refactor translation to facilitate ONNX integration * Implementation of ONNXEncoder * Implementation of ONNXConditionalGenerator * working ONNXCausalGenerator * - Reworked model resources type for pipelines and generators * Aligned ONNXConditionalGenerator with other generators to use GenerateConfig for creation * Moved force_token_id_generation to common utils function, fixed tests, Translation implementation * generalized forced_bos and forced_eos tokens generation * Aligned the `encode_prompt_text` method across language models * Fix prompt encoding for causal generation * Fix prompt encoding for causal generation * Support for ONNX models for SequenceClassification * Support for ONNX models for TokenClassification * Support for ONNX models for POS and NER pipelines * Support for ONNX models for ZeroShotClassification pipeline * Support for ONNX models for QuestionAnswering pipeline * Support for ONNX models for MaskedLM pipeline * Added token_type_ids , updated layer cache i/o parsing for ONNX pipelines * Support for ONNX models for TextGenerationPipeline, updated examples for remote resources * Remove ONNX zero-shot classification example (lack of correct pretrained model) * Addition of tests for ONNX pipelines support * Made onnx feature optional * Fix device lookup with onnx feature enabled * Updates from main branch * Flexible tokenizer creation for M2M100 (NLLB support), make NLLB test optional du to their size * Fixed Clippy warnings * Addition of documentation for ONNX * Added documentation for ONNX support * upcoming tch 1.12 fixes * Fix merge conflicts * Fix merge conflicts (2) * Add download libtorch feature to ONNX tests * Add download-onnx feature * attempt to enable onnx download * add remote resources feature * onnx download * pin ort version * Update ort version 2023-05-30 09:20:25 +03:00			`use rust_bert::pipelines::common::{ModelResource, ModelType};`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`use rust_bert::pipelines::translation::{Language, TranslationConfig, TranslationModel};`
Refactor: Feature gate remote resource (#223) * get_local_path as trait LocalPathProvider * Remove config default impls * Feature gate RemoteResource * translation_builder refactoring to have remote fetching grouped * Include dirs crate in remote feature gate * Examples fixes * Benches fixes * Tests fix * Remove Box from constructor parameters * Fix examples no-Box * Fix benches no-Box * Fix tests no-Box * Fix doc comment code * Fix documentation `Resource` -> `ResourceProvider` * moved remote local at same level * moved ResourceProvider to resources mod Co-authored-by: Guillaume Becquin <guillaume.becquin@gmail.com> 2022-02-26 00:24:03 +03:00			`use rust_bert::resources::RemoteResource;`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`use tch::Device;`
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00
			`fn main() -> anyhow::Result<()> {`
Refactor: Feature gate remote resource (#223) * get_local_path as trait LocalPathProvider * Remove config default impls * Feature gate RemoteResource * translation_builder refactoring to have remote fetching grouped * Include dirs crate in remote feature gate * Examples fixes * Benches fixes * Tests fix * Remove Box from constructor parameters * Fix examples no-Box * Fix benches no-Box * Fix tests no-Box * Fix doc comment code * Fix documentation `Resource` -> `ResourceProvider` * moved remote local at same level * moved ResourceProvider to resources mod Co-authored-by: Guillaume Becquin <guillaume.becquin@gmail.com> 2022-02-26 00:24:03 +03:00			`let model_resource = RemoteResource::from_pretrained(MBartModelResources::MBART50_MANY_TO_MANY);`
			`let config_resource =`
			`RemoteResource::from_pretrained(MBartConfigResources::MBART50_MANY_TO_MANY);`
			`let vocab_resource = RemoteResource::from_pretrained(MBartVocabResources::MBART50_MANY_TO_MANY);`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00
			`let source_languages = MBartSourceLanguages::MBART50_MANY_TO_MANY;`
			`let target_languages = MBartTargetLanguages::MBART50_MANY_TO_MANY;`

			`let translation_config = TranslationConfig::new(`
			`ModelType::MBart,`
ONNX Support (#346) * Fixed Clippy warnings * Revert "Shallow clone optimization (#243)" This reverts commit ba584653bc8d563b8991b3ef6aa4e25d545b0ef3. * updated dependencies * tryouts * GPT2 tryouts * WIP GPT2 * input mapping * Cache storage * Initial GPT2 prototype * Initial ONNX Config and decoder implementation * ONNXDecoder first draft * Use Decoders in example * Automated tch-ort conversion, decoder implementation * ONNXCausalDecoder implementation * Refactored _get_var_store to be optional, added get_device to gen trait * updated example * Added decoder_start_token_id to ConfigOption * Addition of ONNXModelConfig, make max_position_embeddigs optional * Addition of forward pass function for ONNXModel * Working ONNX causal decoder * Simplify tensor conversion * refactor translation to facilitate ONNX integration * Implementation of ONNXEncoder * Implementation of ONNXConditionalGenerator * working ONNXCausalGenerator * - Reworked model resources type for pipelines and generators * Aligned ONNXConditionalGenerator with other generators to use GenerateConfig for creation * Moved force_token_id_generation to common utils function, fixed tests, Translation implementation * generalized forced_bos and forced_eos tokens generation * Aligned the `encode_prompt_text` method across language models * Fix prompt encoding for causal generation * Fix prompt encoding for causal generation * Support for ONNX models for SequenceClassification * Support for ONNX models for TokenClassification * Support for ONNX models for POS and NER pipelines * Support for ONNX models for ZeroShotClassification pipeline * Support for ONNX models for QuestionAnswering pipeline * Support for ONNX models for MaskedLM pipeline * Added token_type_ids , updated layer cache i/o parsing for ONNX pipelines * Support for ONNX models for TextGenerationPipeline, updated examples for remote resources * Remove ONNX zero-shot classification example (lack of correct pretrained model) * Addition of tests for ONNX pipelines support * Made onnx feature optional * Fix device lookup with onnx feature enabled * Updates from main branch * Flexible tokenizer creation for M2M100 (NLLB support), make NLLB test optional du to their size * Fixed Clippy warnings * Addition of documentation for ONNX * Added documentation for ONNX support * upcoming tch 1.12 fixes * Fix merge conflicts * Fix merge conflicts (2) * Add download libtorch feature to ONNX tests * Add download-onnx feature * attempt to enable onnx download * add remote resources feature * onnx download * pin ort version * Update ort version 2023-05-30 09:20:25 +03:00			`ModelResource::Torch(Box::new(model_resource)),`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`config_resource,`
			`vocab_resource,`
Mixed resources (#291) * - made `merges` resource optional for all pipelines - allow mixing local and remote resources for pipelines * Updated changelog * Fixed Clippy warnings 2022-10-30 10:39:52 +03:00			`None,`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`source_languages,`
			`target_languages,`
			`Device::cuda_if_available(),`
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00			`);`
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`let model = TranslationModel::new(translation_config)?;`

			`let source_sentence = "This sentence will be translated in multiple languages.";`

			`let mut outputs = Vec::new();`
Make generics less generic. (#189) * Make generics less generic. Fix examples, tests and docs. * Address outstanding issues * Take less ownership where possible * Fixup some clippy warnings * Updated tokenizer crate version Co-authored-by: Guillaume Becquin <guillaume.becquin@gmail.com> 2021-11-07 11:42:56 +03:00			`outputs.extend(model.translate(&[source_sentence], Language::English, Language::French)?);`
			`outputs.extend(model.translate(&[source_sentence], Language::English, Language::Spanish)?);`
			`outputs.extend(model.translate(&[source_sentence], Language::English, Language::Hindi)?);`
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00
Updated examples and integration tests 2021-07-11 12:13:00 +03:00			`for sentence in outputs {`
Tokenizer special token map update (#330) * Updates for compatibility with tokenizers special token rework * Updated mask pipline methods * Bumped version * Fix clippy warnings 2023-01-30 20:53:18 +03:00			`println!("{sentence}");`
Updated documentation, cleaned examples, added integration tests 2021-06-06 14:01:33 +03:00			`}`
			`Ok(())`
			`}`