mirror of
https://github.com/guillaume-be/rust-bert.git
synced 2024-10-05 16:47:24 +03:00
5d2b107e99
* stop word tokenizer implementation * - Addition of all-mini-lm-l6-v2 * initial implementation of keyword scorer * Cosine Similarity keyword extraction * Added lower case parsing from tokenizer config for sentence embeddings * Initial draft of pipeline complete * Addition of Maximal Marginal relevance scorer * Addition of Max Sum scorer * Lowercase and ngrams handling * Improved n-gram handling * Skip n-grams containing stopwords * Fixed short sentence input and added documentation * Updated documentation and defaults, added example * Addition of tests for keywords extractions * Updated changelog * Fixed Clippy warnings
41 lines
1.7 KiB
Rust
41 lines
1.7 KiB
Rust
extern crate anyhow;
|
|
|
|
use rust_bert::pipelines::keywords_extraction::{
|
|
KeywordExtractionConfig, KeywordExtractionModel, KeywordScorerType,
|
|
};
|
|
use rust_bert::pipelines::sentence_embeddings::{
|
|
SentenceEmbeddingsConfig, SentenceEmbeddingsModelType,
|
|
};
|
|
|
|
fn main() -> anyhow::Result<()> {
|
|
let keyword_extraction_config = KeywordExtractionConfig {
|
|
sentence_embeddings_config: SentenceEmbeddingsConfig::from(
|
|
SentenceEmbeddingsModelType::AllMiniLmL6V2,
|
|
),
|
|
scorer_type: KeywordScorerType::MaxSum,
|
|
ngram_range: (1, 1),
|
|
num_keywords: 5,
|
|
..Default::default()
|
|
};
|
|
|
|
let keyword_extraction_model = KeywordExtractionModel::new(keyword_extraction_config)?;
|
|
|
|
let input = "Rust is a multi-paradigm, general-purpose programming language. \
|
|
Rust emphasizes performance, type safety, and concurrency. Rust enforces memory safety—that is, \
|
|
that all references point to valid memory—without requiring the use of a garbage collector or \
|
|
reference counting present in other memory-safe languages. To simultaneously enforce \
|
|
memory safety and prevent concurrent data races, Rust's borrow checker tracks the object lifetime \
|
|
and variable scope of all references in a program during compilation. Rust is popular for \
|
|
systems programming but also offers high-level features including functional programming constructs.";
|
|
|
|
// Credits: Wikimedia https://en.wikipedia.org/wiki/Rust_(programming_language)
|
|
|
|
let keywords = keyword_extraction_model.predict(&[input])?;
|
|
for keyword_list in keywords {
|
|
for keyword in keyword_list {
|
|
println!("{:?}, {:?}", keyword.text, keyword.score);
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|