rust-bert/examples/keyword_extraction.rs
guillaume-be 5d2b107e99
Keyword/Keyphrase extraction (#295)
* stop word tokenizer implementation

* - Addition of all-mini-lm-l6-v2

* initial implementation of keyword scorer

* Cosine Similarity keyword extraction

* Added lower case parsing from tokenizer config for sentence embeddings

* Initial draft of pipeline complete

* Addition of Maximal Marginal relevance scorer

* Addition of Max Sum scorer

* Lowercase and ngrams handling

* Improved n-gram handling

* Skip n-grams containing stopwords

* Fixed short sentence input and added documentation

* Updated documentation and defaults, added example

* Addition of tests for keywords extractions

* Updated changelog

* Fixed Clippy warnings
2022-11-13 08:51:10 +00:00

41 lines
1.7 KiB
Rust

extern crate anyhow;
use rust_bert::pipelines::keywords_extraction::{
KeywordExtractionConfig, KeywordExtractionModel, KeywordScorerType,
};
use rust_bert::pipelines::sentence_embeddings::{
SentenceEmbeddingsConfig, SentenceEmbeddingsModelType,
};
fn main() -> anyhow::Result<()> {
let keyword_extraction_config = KeywordExtractionConfig {
sentence_embeddings_config: SentenceEmbeddingsConfig::from(
SentenceEmbeddingsModelType::AllMiniLmL6V2,
),
scorer_type: KeywordScorerType::MaxSum,
ngram_range: (1, 1),
num_keywords: 5,
..Default::default()
};
let keyword_extraction_model = KeywordExtractionModel::new(keyword_extraction_config)?;
let input = "Rust is a multi-paradigm, general-purpose programming language. \
Rust emphasizes performance, type safety, and concurrency. Rust enforces memory safety—that is, \
that all references point to valid memory—without requiring the use of a garbage collector or \
reference counting present in other memory-safe languages. To simultaneously enforce \
memory safety and prevent concurrent data races, Rust's borrow checker tracks the object lifetime \
and variable scope of all references in a program during compilation. Rust is popular for \
systems programming but also offers high-level features including functional programming constructs.";
// Credits: Wikimedia https://en.wikipedia.org/wiki/Rust_(programming_language)
let keywords = keyword_extraction_model.predict(&[input])?;
for keyword_list in keywords {
for keyword in keyword_list {
println!("{:?}, {:?}", keyword.text, keyword.score);
}
}
Ok(())
}