improve: tokenzining function names and camel case string (#34)

This commit is contained in:
Johannes Kirschbauer 2023-04-27 18:38:22 +02:00 committed by GitHub
parent c707bd74a7
commit 24cacef89e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 27 additions and 17 deletions

View File

@ -31,9 +31,33 @@ export function NixFunctions(props: FunctionsProps) {
boost += id.includes(term) ? 10 : 0;
return boost;
},
boost: {
id: 10,
name: 8,
category: 6,
example: 0.5,
fn_type: 3,
description: 1,
},
},
tokenize: (text: string): string[] => {
const tokens = text.split(/\W|(?=[A-Z])/);
tokenize: (text: string, fieldName): string[] => {
//split the text into words
const wordTokens = text.split(/\W/);
const containsUpper = (w: string) => Boolean(w.match(/[A-Z]/)?.length);
const tokens = [
// include the words itself if they contain upperCharacters
// mapAttrs -> mapAttrs
...wordTokens.filter(containsUpper),
// but also split words that contain uppercase
// mapAttrs -> [map, Attrs]
...wordTokens
.filter(containsUpper)
.map((t) => t.split(/(?=[A-Z])/))
.flat(),
// just include lowercase words without further tokenizing
// map -> map
...wordTokens.filter((w) => !containsUpper(w)),
];
return tokens;
},
});

View File

@ -62,16 +62,7 @@ export function BasicList(props: BasicListProps) {
const handleSearch = (term: string) => {
setTerm(term);
search(term, {
boost: {
id: 10,
name: 8,
category: 6,
example: 0.5,
fn_type: 3,
description: 1,
},
});
search(term);
setPage(1);
};

View File

@ -1,9 +1,4 @@
import { DocItem, MetaData } from "../models/nix";
// import MiniSearch from 'minisearch'
// export const byMinisearch = (term: string, miniSearch: MiniSearch<DocItem> ) => (data: MetaData): MetaData => {
// return miniSearch.search(term);
// }
export const byQuery =
(rawTerm: string) =>