1
0
Fork 0
mirror of https://github.com/codex-team/codex.docs.git synced 2025-07-20 21:59:41 +02:00
codex.docs/src/backend/controllers/search.ts

164 lines
3.7 KiB
TypeScript
Raw Normal View History

2022-08-11 18:28:15 +03:00
import Page from '../models/page';
import Pages from '../controllers/pages';
class Search {
/**
* Prepare words database
*/
public async index() {
/**
* Prepare pages content for the search
* @todo - it should be done in the background
*/
const pages = await Pages.getAll();
const pagesWords = pages.map(page => {
const pageWords: string[] = [];
page.body.blocks.forEach((block: any) => {
let blockContent = '';
2022-08-15 18:53:57 +03:00
const validBlocks = ['header', 'paragraph', 'list'];
2022-08-11 18:28:15 +03:00
if (!validBlocks.includes(block.type)) {
return;
}
switch (block.type) {
case 'header':
blockContent = block.data.text;
break;
case 'paragraph':
blockContent = block.data.text
break;
2022-08-15 18:53:57 +03:00
case 'list':
blockContent = block.data.items.join(' ');
break;
2022-08-11 18:28:15 +03:00
}
const blockWords: string[] = blockContent
// @todo get text from inline code elements and remove html tags
// lowercase all words
.toLowerCase()
2022-08-15 18:53:57 +03:00
// left only letters (+cyrillic) and numbers
.replace(/[^a-zа-я0-9]/gi, ' ')
2022-08-11 18:28:15 +03:00
// remove multiple spaces
.replace(/\s+/g, ' ')
// split to words by spaces
.split(' ');
pageWords.push(...blockWords);
});
const uniqueWords = [...new Set(pageWords)].sort();
return {
id: page._id,
words: uniqueWords
};
});
return pagesWords;
}
2022-08-15 18:53:57 +03:00
public async query(searchString: string) {
2022-08-11 18:28:15 +03:00
const pages = await Pages.getAll();
const pagesWords = await this.index();
/**
* Search itself
*/
const searchWords = searchString.toLowerCase().split(' ');
const goodPages = pagesWords.map(({ id, words}) => {
const foundWords = searchWords.filter(
word => {
return words.filter(
testWord => {
return testWord.indexOf(word) === 0
}
).length > 0;
}
);
const successRatio = foundWords.length / searchWords.length * 100;
return {
id,
successRatio
}
});
const foundPages = goodPages
2022-08-15 18:53:57 +03:00
.filter(({ successRatio }) => successRatio > 75)
2022-08-11 18:28:15 +03:00
.sort((a, b) => b.successRatio - a.successRatio)
.slice(0, 10);
2022-08-15 18:53:57 +03:00
const returnPages = pages.filter(page => foundPages.some(({ id }) => id === page._id))
.map(page => {
let shortBody = '...';
let score = 1;
page.body.blocks.forEach((block: any) => {
let blockContent = '';
switch (block.type) {
case 'header':
blockContent = block.data.text;
break;
// case 'paragraph':
// blockContent = block.data.text
// break;
//
// case 'list':
// blockContent = block.data.items.join(' ');
// break;
}
searchWords.forEach(word => {
blockContent = blockContent.replace(word, `<span class="search-word">${word}</span>`);
})
// shortBody += blockContent;
});
return {
...page,
shortBody
};
});
2022-08-11 18:28:15 +03:00
// --------- START test ---------
const uniqWords = [...new Set(pagesWords.flatMap(page => page.words))].sort();
uniqWords.forEach(word => {
console.log(word);
})
// --------- END test ---------
return {
2022-08-15 18:53:57 +03:00
suggestions: uniqWords.filter(word => word.indexOf(searchWords.slice(-1)[0]) === 0),
pages: returnPages
2022-08-11 18:28:15 +03:00
}
}
private async search(searchString: string) {
const pages = await this.query(searchString);
return pages;
}
}
export default Search;