1
0
Fork 0
mirror of https://github.com/codex-team/codex.docs.git synced 2025-07-21 06:09:41 +02:00

update logic

This commit is contained in:
Taly 2022-08-26 15:08:36 +03:00
parent 03370dd066
commit 0cb69545db
5 changed files with 258 additions and 180 deletions

View file

@ -1,27 +1,199 @@
import Page from '../models/page'; import PageData from '../models/page';
import Pages from '../controllers/pages'; import Pages from '../controllers/pages';
import urlify from '../utils/urlify';
import Page from '../models/page';
class Search { class Search {
/** private words: { [key: string]: {[key: string]: number} } = Object.create(null);
* Prepare words database private pages: PageData[] = [];
*/
public async index() {
/**
* Prepare pages content for the search
* @todo - it should be done in the background
*/
const pages = await Pages.getAll();
const pagesWords = pages.map(page => {
const pageWords: string[] = [];
page.body.blocks.forEach((block: any) => { public async init() {
let blockContent = ''; this.pages = await this.getPages();
const validBlocks = ['header', 'paragraph', 'list']; /**
if (!validBlocks.includes(block.type)) { * Process all pages
*/
for await (const page of this.pages) {
// if (page._id && !this.pages[page._id]) {
// this.pages[page._id] = [];
// }
/**
* Read content blocks from page
*/
for await (const block of page.body.blocks) {
const blockRatio = this.getBlockRatio(block);
const blockContent = this.getCleanTextFromBlock(block);
const blockWords: string[] = this.splitTextToWords(blockContent);
// if (page._id) {
// this.pages[page._id].push(...blockWords);
// }
/**
* Process list of words in a block
*/
for await (const word of blockWords) {
if (!this.words[word]) {
this.words[word] = Object.create(null);
}
if (page._id) {
if (!this.words[word][page._id]) {
this.words[word][page._id] = 0;
}
/**
* Add page id to the list of pages with this word
*/
this.words[word][page._id] += blockRatio;
}
}
}
}
}
public async query(searchString: string) {
try {
await this.init();
} catch (error) {
console.log(error);
throw error;
}
const searchWords = searchString
.trim()
.toLowerCase()
.replace(/\s+/g, ' ')
.split(' ');
const goodPages = (await this.getPagesByWords(searchWords))
.slice(0, 10);
const returnPages: {[key: string]: string|number, ratio: number}[] = [];
goodPages.forEach(({ pageId, ratio }) => {
const page = this.pages.filter(page => page._id === pageId).pop();
if (!page) {
return; return;
} }
let section = '';
page.body.blocks.forEach((block: any) => {
let koef = 0;
let blockContent = this.getCleanTextFromBlock(block);
let shortBody = blockContent;
if (block.type === 'header') {
section = blockContent;
}
searchWords.forEach(word => {
if (blockContent.toLowerCase().indexOf(word) !== -1) {
koef += 1;
}
})
shortBody = this.highlightSubstring(shortBody, searchWords);
if (koef > 0) {
returnPages.push({
...page,
shortBody,
anchor: urlify(section),
section,
ratio: ratio * koef,
})
}
});
});
// // --------- START test ---------
// //
// const uniqWords = [...new Set(pagesWords.flatMap(page => page.words))].sort();
// //
// // uniqWords.forEach(word => {
// // console.log(word);
// // })
//
// // --------- END test ---------
// console.log('RESULT')
// returnPages.forEach(page => {
// console.log(page);
// });
//
// return {
// suggestions: uniqWords.filter(word => word.indexOf(searchWords.slice(-1)[0]) === 0),
// pages: returnPages
// }
return {
suggestions: [],
pages: returnPages
.sort((a, b) => b.ratio - a.ratio)
.slice(0, 15)
}
}
private async getPages() {
return await Pages.getAll();
}
private async getPagesByWords(words: string[]) {
const pagesList: {[key: string]: number} = {};
Object.keys(this.words)
.filter(word => words.indexOf(word) !== -1)
.forEach(word => {
Object.keys(this.words[word])
.forEach(pageId => {
if (!pagesList[pageId]) {
pagesList[pageId] = 0;
}
pagesList[pageId] += this.words[word][pageId]
})
})
const sortedPagesList = Object.keys(pagesList)
.map(pageId => {
return {
pageId,
ratio: pagesList[pageId]
}
})
.sort((a, b) => b.ratio - a.ratio);
return sortedPagesList;
}
private getUnique(elements: string[]) {
return [...new Set(elements)].sort();
}
private getBlockRatio(block: any) {
switch (block.type) {
case 'header':
return 6;
case 'paragraph':
return 2;
case 'list':
return 1;
default:
return 0;
}
}
private getCleanTextFromBlock(block: any): string {
let blockContent = '';
switch (block.type) { switch (block.type) {
case 'header': case 'header':
blockContent = block.data.text; blockContent = block.data.text;
@ -36,15 +208,31 @@ class Search {
break; break;
default: default:
return; return blockContent;
} }
const blockWords: string[] = blockContent blockContent = this.removeHTMLTags(blockContent);
.replace(/<[^>]*>?/gm, '') blockContent = this.removeHTMLSpecialCharacters(blockContent);
return blockContent;
}
private removeHTMLTags(text: string) {
return text.replace(/<[^>]*>?/gm, '');
}
private removeHTMLSpecialCharacters(text: string) {
return text.replace(/&[^;]*;?/gm, '');
}
private splitTextToWords(text: string): string[] {
return text
// lowercase all words // lowercase all words
.toLowerCase() .toLowerCase()
// remove punctuation
.replace(/[.,;:]/gi, '')
// left only letters (+cyrillic) and numbers // left only letters (+cyrillic) and numbers
.replace(/[^a-zа-я0-9]/gi, ' ') .replace(/[^a-zа-я0-9]/gi, ' ')
@ -52,143 +240,24 @@ class Search {
.replace(/\s+/g, ' ') .replace(/\s+/g, ' ')
// split to words by spaces // split to words by spaces
.split(' '); .split(' ')
pageWords.push(...blockWords); // ignore words shorter than 3 chars
}); .filter(word => word.length >= 3);
const uniqueWords = [...new Set(pageWords)].sort();
return {
id: page._id,
words: uniqueWords
};
});
return pagesWords;
} }
public async query(searchString: string) {
const pages = await Pages.getAll();
const pagesWords = await this.index();
/** /**
* Search itself * Highlight substring in string with a span wrapper
*/ */
const searchWords = searchString.toLowerCase().split(' '); private highlightSubstring(text: string, words: string|string[]) {
const goodPages = pagesWords.map(({ id, words}) => { if (typeof words === 'string') {
const foundWords = searchWords.filter( words = [words];
word => {
return words.filter(
testWord => {
return testWord.indexOf(word) === 0
}
).length > 0;
}
);
const successRatio = foundWords.length / searchWords.length * 100;
return {
id,
successRatio
}
});
const foundPages = goodPages
.filter(({ successRatio }) => successRatio > 75)
.sort((a, b) => b.successRatio - a.successRatio)
.slice(0, 10);
const returnPages = pages.filter(page => foundPages.some(({ id }) => id === page._id))
.map(page => {
let shortBody = '';
let flag = false;
let section = '';
let ratio = 0;
page.body.blocks.forEach((block: any) => {
if (flag) return;
let blockContent = '';
switch (block.type) {
case 'header':
blockContent = block.data.text;
ratio = 1;
section = blockContent;
break;
case 'paragraph':
blockContent = block.data.text
ratio = 0.5;
break;
case 'list':
blockContent = block.data.items.join(' ');
ratio = 0.5;
break;
default:
return;
} }
blockContent = blockContent const wordRegExp = new RegExp(words.join('|'), "ig");
.replace(/<[^>]*>?/gm, ''); const CLASS_STYLE = 'search-word';
// .toLowerCase();
searchWords.forEach(word => { return text.replace(wordRegExp, `<span class="${CLASS_STYLE}">$&</span>`);
// blockContent = blockContent.replace(word, `<span class="search-word">${word}</span>`);
if (flag) return;
if (blockContent.toLowerCase().indexOf(word) !== -1) {
shortBody = this.highlightSubstring(blockContent, word);
flag = true;
}
})
});
return {
...page,
shortBody,
anchor: section.replace(/\s+/g, '-').toLowerCase(),
section,
};
});
// --------- START test ---------
//
const uniqWords = [...new Set(pagesWords.flatMap(page => page.words))].sort();
//
// uniqWords.forEach(word => {
// console.log(word);
// })
// --------- END test ---------
return {
suggestions: uniqWords.filter(word => word.indexOf(searchWords.slice(-1)[0]) === 0),
pages: returnPages
}
}
private async search(searchString: string) {
const pages = await this.query(searchString);
return pages;
}
private highlightSubstring(text: string, word: string) {
const wordRegExp = new RegExp(word, "ig");
return text.replace(wordRegExp, '<span class="search-word">$&</span>');
} }
} }

View file

@ -16,6 +16,7 @@ router.get('/search', async (req: Request, res: Response) => {
const startTime = performance.now(); const startTime = performance.now();
const search = new Search(); const search = new Search();
const searchResponse = await search.query(searchString); const searchResponse = await search.query(searchString);
/** End measuring search time */ /** End measuring search time */
@ -30,8 +31,6 @@ router.get('/search', async (req: Request, res: Response) => {
_id: page._id, _id: page._id,
title: page.title, title: page.title,
uri: page.uri, uri: page.uri,
// body: page.body,
// parent: page.parent,
section: page.section, section: page.section,
anchor: page.anchor, anchor: page.anchor,
shortBody: page.shortBody, shortBody: page.shortBody,

View file

@ -23,7 +23,7 @@
{% include 'components/button.twig' with {label: 'Edit', icon: 'pencil', size: 'small', url: '/page/edit/' ~ page._id, class: 'page__header-button'} %} {% include 'components/button.twig' with {label: 'Edit', icon: 'pencil', size: 'small', url: '/page/edit/' ~ page._id, class: 'page__header-button'} %}
{% endif %} {% endif %}
</header> </header>
<h1 class="page__title"> <h1 class="page__title" id="{{ page.title | urlify }}">
{{ page.title }} {{ page.title }}
</h1> </h1>
{% if (config.carbon and config.carbon.placement and config.carbon.serve) %} {% if (config.carbon and config.carbon.placement and config.carbon.serve) %}

View file

@ -13,7 +13,7 @@ export default class Search {
this.isVisible = false; this.isVisible = false;
this.PLACEHOLDER = 'Find in documents...'; this.PLACEHOLDER = 'Search docs';
this.TOGGLER_SHORTCUT = 'CMD+SHIFT+F'; this.TOGGLER_SHORTCUT = 'CMD+SHIFT+F';
this.shortcut = null; this.shortcut = null;
@ -38,7 +38,7 @@ export default class Search {
searchResultItemSection: 'search-result-item__section', searchResultItemSection: 'search-result-item__section',
searchResultItemDescription: 'search-result-item__description', searchResultItemDescription: 'search-result-item__description',
blur: 'blurred', blurred: 'blurred',
noscroll: 'noscroll' noscroll: 'noscroll'
}; };
} }
@ -50,9 +50,15 @@ export default class Search {
this.createDebouncedSearch(); this.createDebouncedSearch();
this.enableShortcutListening(); this.enableShortcutListening();
// ! force open search overlay /**
* Only for development needs:
*
* - force open overlay
* - type a search string
* - fire search
*/
// const testString = 'api method';
// this.toggleSearchOverlay(true); // this.toggleSearchOverlay(true);
// const testString = 'api';
// this.nodes.searchInput.value = testString; // this.nodes.searchInput.value = testString;
// this.debouncedSearch(testString); // this.debouncedSearch(testString);
} }
@ -157,13 +163,14 @@ export default class Search {
const result = document.createElement('a'); const result = document.createElement('a');
result.classList.add(this.CSS.searchResultItem); result.classList.add(this.CSS.searchResultItem);
result.setAttribute('href', url); result.setAttribute('href', url);
result.addEventListener('click', this.toggleSearchOverlay.bind(this, false));
const title = document.createElement('div'); const title = document.createElement('div');
title.classList.add(this.CSS.searchResultItemTitle); title.classList.add(this.CSS.searchResultItemTitle);
title.innerHTML = page.title; title.innerHTML = page.title;
result.appendChild(title); result.appendChild(title);
if (page.section !== page.title) { if (page.section && page.section !== page.title) {
const section = document.createElement('span'); const section = document.createElement('span');
section.classList.add(this.CSS.searchResultItemSection); section.classList.add(this.CSS.searchResultItemSection);
section.innerHTML = `${page.section}`; section.innerHTML = `${page.section}`;

View file

@ -5,7 +5,7 @@
left: 0; left: 0;
width: 100%; width: 100%;
height: 100%; height: 100%;
background: rgba(0, 0, 0, 0.5); background: rgba(0, 0, 0, 0.7);
display: none; display: none;
overflow-y: scroll; overflow-y: scroll;
@ -20,11 +20,14 @@
&-wrapper { &-wrapper {
background: #fff; background: #fff;
margin: 30vh auto 10vh; margin: 30vh auto 20vh;
max-width: var(--layout-width-main-col); max-width: var(--layout-width-main-col);
width: 100%; width: 100%;
@apply --squircle; border-radius: 8px;
box-shadow: 0px 0px 15px rgba(0, 0, 0, 0.1);
overflow: hidden;
} }
&-input { &-input {