add support for markdown files to semantic search

This commit is contained in:
KCaverly 2023-08-01 10:30:34 -04:00
parent 9a50b43eaa
commit e221f23018
2 changed files with 17 additions and 0 deletions

View File

@ -21,6 +21,7 @@ const CODE_CONTEXT_TEMPLATE: &str =
"The below code snippet is from file '<path>'\n\n```<language>\n<item>\n```";
const ENTIRE_FILE_TEMPLATE: &str =
"The below snippet is from file '<path>'\n\n```<language>\n<item>\n```";
const MARKDOWN_CONTEXT_TEMPLATE: &str = "The below file contents is from file '<path>'\n\n<item>";
pub const PARSEABLE_ENTIRE_FILE_TYPES: &[&str] =
&["TOML", "YAML", "CSS", "HEEX", "ERB", "SVELTE", "HTML"];
@ -70,6 +71,19 @@ impl CodeContextRetriever {
}])
}
fn parse_markdown_file(&self, relative_path: &Path, content: &str) -> Result<Vec<Document>> {
let document_span = MARKDOWN_CONTEXT_TEMPLATE
.replace("<path>", relative_path.to_string_lossy().as_ref())
.replace("<item>", &content);
Ok(vec![Document {
range: 0..content.len(),
content: document_span,
embedding: Vec::new(),
name: "Markdown".to_string(),
}])
}
fn get_matches_in_file(
&mut self,
content: &str,
@ -136,6 +150,8 @@ impl CodeContextRetriever {
if PARSEABLE_ENTIRE_FILE_TYPES.contains(&language_name.as_ref()) {
return self.parse_entire_file(relative_path, language_name, &content);
} else if &language_name.to_string() == &"Markdown".to_string() {
return self.parse_markdown_file(relative_path, &content);
}
let mut documents = self.parse_file(content, language)?;

View File

@ -613,6 +613,7 @@ impl SemanticIndex {
.await
{
if !PARSEABLE_ENTIRE_FILE_TYPES.contains(&language.name().as_ref())
&& &language.name().as_ref() != &"Markdown"
&& language
.grammar()
.and_then(|grammar| grammar.embedding_config.as_ref())