rust-bert/examples/generation_gpt2_hf_tokenizers.rs
guillaume-be 1f4d344668
Change generate return type to Result (#437)
* - Changed the return type of generate method to be `Result`, removed fallible unwraps

* Fix doctests
2023-12-04 17:58:21 +00:00

62 lines
2.2 KiB
Rust

// Copyright 2019-present, the HuggingFace Inc. team, The Google AI Language Team and Facebook, Inc.
// Copyright 2019 Guillaume Becquin
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
extern crate anyhow;
use rust_bert::pipelines::common::{ModelType, TokenizerOption};
use rust_bert::pipelines::text_generation::{TextGenerationConfig, TextGenerationModel};
use rust_bert::resources::{RemoteResource, ResourceProvider};
use std::fs::File;
use std::io::Write;
use tempfile::TempDir;
fn main() -> anyhow::Result<()> {
// Set-up model
let generate_config = TextGenerationConfig {
model_type: ModelType::GPT2,
max_length: Some(30),
do_sample: false,
num_beams: 1,
temperature: 1.0,
num_return_sequences: 1,
..Default::default()
};
// Create tokenizer
let tmp_dir = TempDir::new()?;
let special_token_map_path = tmp_dir.path().join("special_token_map.json");
let mut tmp_file = File::create(&special_token_map_path)?;
writeln!(
tmp_file,
r#"{{"bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "unk_token": "<|endoftext|>"}}"#
)?;
let tokenizer_path = RemoteResource::from_pretrained((
"gpt2/tokenizer",
"https://huggingface.co/gpt2/resolve/main/tokenizer.json",
))
.get_local_path()?;
let tokenizer =
TokenizerOption::from_hf_tokenizer_file(tokenizer_path, special_token_map_path)?;
let model = TextGenerationModel::new_with_tokenizer(generate_config, tokenizer)?;
let input_context = "The dog";
// let second_input_context = "The cat was";
let output = model.generate(&[input_context], None)?;
for sentence in output {
println!("{sentence:?}");
}
Ok(())
}