1
0
Fork 0
mirror of https://github.com/codex-team/codex.docs.git synced 2025-07-27 00:59:42 +02:00

search implementation

This commit is contained in:
Taly 2022-08-11 18:28:15 +03:00
parent f05eb15b72
commit 79592f0a1d
12 changed files with 440 additions and 28 deletions

View file

@ -0,0 +1,128 @@
import Page from '../models/page';
import Pages from '../controllers/pages';
type SearchResponse = {
completions: string[];
pages: Page[];
}
class Search {
/**
* Prepare words database
*/
public async index() {
/**
* Prepare pages content for the search
* @todo - it should be done in the background
*/
const pages = await Pages.getAll();
const pagesWords = pages.map(page => {
const pageWords: string[] = [];
page.body.blocks.forEach((block: any) => {
let blockContent = '';
const validBlocks = ['header', 'paragraph'];
if (!validBlocks.includes(block.type)) {
return;
}
switch (block.type) {
case 'header':
blockContent = block.data.text;
break;
case 'paragraph':
blockContent = block.data.text
break;
}
const blockWords: string[] = blockContent
// @todo get text from inline code elements and remove html tags
// left only letters and numbers
.replace(/[^a-z0-9]/gi, ' ')
// lowercase all words
.toLowerCase()
// remove multiple spaces
.replace(/\s+/g, ' ')
// split to words by spaces
.split(' ');
pageWords.push(...blockWords);
});
const uniqueWords = [...new Set(pageWords)].sort();
return {
id: page._id,
words: uniqueWords
};
});
return pagesWords;
}
public async query(searchString: string): Promise<SearchResponse> {
const pages = await Pages.getAll();
const pagesWords = await this.index();
/**
* Search itself
*/
const searchWords = searchString.toLowerCase().split(' ');
const goodPages = pagesWords.map(({ id, words}) => {
const foundWords = searchWords.filter(
word => {
return words.filter(
testWord => {
return testWord.indexOf(word) === 0
}
).length > 0;
}
);
const successRatio = foundWords.length / searchWords.length * 100;
return {
id,
successRatio
}
});
const foundPages = goodPages
.filter(({ successRatio }) => successRatio > 50)
.sort((a, b) => b.successRatio - a.successRatio)
.slice(0, 10);
// --------- START test ---------
const uniqWords = [...new Set(pagesWords.flatMap(page => page.words))].sort();
uniqWords.forEach(word => {
console.log(word);
})
// --------- END test ---------
return {
completions: uniqWords.filter(word => word.indexOf(searchWords.slice(-1)[0]) === 0),
pages: pages.filter(page => foundPages.some(({ id }) => id === page._id))
}
}
private async search(searchString: string) {
const pages = await this.query(searchString);
return pages;
}
}
export default Search;

View file

@ -1,12 +1,15 @@
import express from 'express';
import pagesAPI from './pages';
import transportAPI from './transport';
import linksAPI from './links';
import searchAPI from './search';
const router = express.Router();
router.use('/', pagesAPI);
router.use('/', transportAPI);
router.use('/', linksAPI);
router.use('/', searchAPI);
export default router;

View file

@ -0,0 +1,54 @@
import express, { Request, Response } from 'express';
import Search from '../../controllers/search';
const router = express.Router();
/**
* GET /search/:searchString
*
* Search given words in all documents
*/
router.get('/search', async (req: Request, res: Response) => {
try {
const searchString = req.query.text as string;
/** Start measuring search time */
const startTime = performance.now();
const search = new Search();
const searchResponse = await search.query(searchString);
/** End measuring search time */
const endTime = performance.now();
/** Show search time */
const searchItem = (endTime - startTime).toFixed(6);
console.log(`🔎 "${searchString}" ⏱ ${searchItem} ms`);
const compactedPages = searchResponse.pages.map(page => {
return {
_id: page._id,
title: page.title,
uri: page.uri,
// body: page.body,
// parent: page.parent,
};
});
res.json({
success: true,
result: {
completions: searchResponse.completions,
pages: compactedPages,
time: searchItem,
},
});
} catch (err) {
res.status(400).json({
success: false,
error: (err as Error).message,
});
}
});
export default router;

View file

@ -15,7 +15,7 @@
hawkClientToken:"{{ config.hawkClientToken }}",
};
</script>
<body>
<body data-module="search">
{% include "components/header.twig" with res.locals.isAuthorized %}
<div class="docs">