2022-08-11 18:28:15 +03:00
|
|
|
|
import Page from '../models/page';
|
|
|
|
|
import Pages from '../controllers/pages';
|
|
|
|
|
|
|
|
|
|
class Search {
|
|
|
|
|
/**
|
|
|
|
|
* Prepare words database
|
|
|
|
|
*/
|
|
|
|
|
public async index() {
|
|
|
|
|
/**
|
|
|
|
|
* Prepare pages content for the search
|
|
|
|
|
* @todo - it should be done in the background
|
|
|
|
|
*/
|
|
|
|
|
const pages = await Pages.getAll();
|
|
|
|
|
const pagesWords = pages.map(page => {
|
|
|
|
|
const pageWords: string[] = [];
|
|
|
|
|
|
|
|
|
|
page.body.blocks.forEach((block: any) => {
|
|
|
|
|
let blockContent = '';
|
|
|
|
|
|
2022-08-15 18:53:57 +03:00
|
|
|
|
const validBlocks = ['header', 'paragraph', 'list'];
|
2022-08-11 18:28:15 +03:00
|
|
|
|
if (!validBlocks.includes(block.type)) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (block.type) {
|
|
|
|
|
case 'header':
|
|
|
|
|
blockContent = block.data.text;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'paragraph':
|
|
|
|
|
blockContent = block.data.text
|
|
|
|
|
break;
|
2022-08-15 18:53:57 +03:00
|
|
|
|
|
|
|
|
|
case 'list':
|
|
|
|
|
blockContent = block.data.items.join(' ');
|
|
|
|
|
break;
|
2022-08-11 18:28:15 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const blockWords: string[] = blockContent
|
|
|
|
|
// @todo get text from inline code elements and remove html tags
|
|
|
|
|
|
|
|
|
|
// lowercase all words
|
|
|
|
|
.toLowerCase()
|
|
|
|
|
|
2022-08-15 18:53:57 +03:00
|
|
|
|
// left only letters (+cyrillic) and numbers
|
|
|
|
|
.replace(/[^a-zа-я0-9]/gi, ' ')
|
|
|
|
|
|
2022-08-11 18:28:15 +03:00
|
|
|
|
// remove multiple spaces
|
|
|
|
|
.replace(/\s+/g, ' ')
|
|
|
|
|
|
|
|
|
|
// split to words by spaces
|
|
|
|
|
.split(' ');
|
|
|
|
|
|
|
|
|
|
pageWords.push(...blockWords);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const uniqueWords = [...new Set(pageWords)].sort();
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id: page._id,
|
|
|
|
|
words: uniqueWords
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return pagesWords;
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-15 18:53:57 +03:00
|
|
|
|
public async query(searchString: string) {
|
2022-08-11 18:28:15 +03:00
|
|
|
|
const pages = await Pages.getAll();
|
|
|
|
|
const pagesWords = await this.index();
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Search itself
|
|
|
|
|
*/
|
|
|
|
|
const searchWords = searchString.toLowerCase().split(' ');
|
|
|
|
|
const goodPages = pagesWords.map(({ id, words}) => {
|
|
|
|
|
const foundWords = searchWords.filter(
|
|
|
|
|
word => {
|
|
|
|
|
return words.filter(
|
|
|
|
|
testWord => {
|
|
|
|
|
return testWord.indexOf(word) === 0
|
|
|
|
|
}
|
|
|
|
|
).length > 0;
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
const successRatio = foundWords.length / searchWords.length * 100;
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
id,
|
|
|
|
|
successRatio
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const foundPages = goodPages
|
2022-08-15 18:53:57 +03:00
|
|
|
|
.filter(({ successRatio }) => successRatio > 75)
|
2022-08-11 18:28:15 +03:00
|
|
|
|
.sort((a, b) => b.successRatio - a.successRatio)
|
|
|
|
|
.slice(0, 10);
|
|
|
|
|
|
2022-08-15 18:53:57 +03:00
|
|
|
|
const returnPages = pages.filter(page => foundPages.some(({ id }) => id === page._id))
|
|
|
|
|
.map(page => {
|
|
|
|
|
let shortBody = '...';
|
|
|
|
|
let score = 1;
|
|
|
|
|
|
|
|
|
|
page.body.blocks.forEach((block: any) => {
|
|
|
|
|
let blockContent = '';
|
|
|
|
|
|
|
|
|
|
switch (block.type) {
|
|
|
|
|
case 'header':
|
|
|
|
|
blockContent = block.data.text;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
// case 'paragraph':
|
|
|
|
|
// blockContent = block.data.text
|
|
|
|
|
// break;
|
|
|
|
|
//
|
|
|
|
|
// case 'list':
|
|
|
|
|
// blockContent = block.data.items.join(' ');
|
|
|
|
|
// break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
searchWords.forEach(word => {
|
|
|
|
|
blockContent = blockContent.replace(word, `<span class="search-word">${word}</span>`);
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
// shortBody += blockContent;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
...page,
|
|
|
|
|
shortBody
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2022-08-11 18:28:15 +03:00
|
|
|
|
|
|
|
|
|
// --------- START test ---------
|
|
|
|
|
|
|
|
|
|
const uniqWords = [...new Set(pagesWords.flatMap(page => page.words))].sort();
|
|
|
|
|
|
|
|
|
|
uniqWords.forEach(word => {
|
|
|
|
|
console.log(word);
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
// --------- END test ---------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
2022-08-15 18:53:57 +03:00
|
|
|
|
suggestions: uniqWords.filter(word => word.indexOf(searchWords.slice(-1)[0]) === 0),
|
|
|
|
|
pages: returnPages
|
2022-08-11 18:28:15 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async search(searchString: string) {
|
|
|
|
|
const pages = await this.query(searchString);
|
|
|
|
|
|
|
|
|
|
return pages;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
export default Search;
|