1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-19 21:29:42 +02:00

new search options and schema

This commit is contained in:
Harvey Kandola 2017-08-15 14:15:31 +01:00
parent 6c3042fd4e
commit 2c5f73a486
14 changed files with 283 additions and 118 deletions

View file

@ -4,7 +4,6 @@
1. Remove audit table
2. Remove document.layout field ?
## MYSQL ENCODING
https://stackoverflow.com/questions/37307146/difference-between-utf8mb4-unicode-ci-and-utf8mb4-unicode-520-ci-collations-in-m
@ -13,7 +12,7 @@ https://mathiasbynens.be/notes/mysql-utf8mb4
https://medium.com/@adamhooper/in-mysql-never-use-utf8-use-utf8mb4-11761243e434
## MIGRATE ENCODING
ALTER DATABASE documize CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
ALTER TABLE account CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
@ -38,7 +37,3 @@ ALTER TABLE useraction CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
ALTER TABLE useractivity CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
ALTER TABLE userconfig CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
ALTER TABLE userevent CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
documenttitle, pagetitle, body
CHARACTER SET utf8mb4 COLLATE utf8mb4

View file

@ -0,0 +1,39 @@
/* community edition */
RENAME TABLE search TO search_old;
DROP TABLE IF EXISTS `search`;
CREATE TABLE IF NOT EXISTS `search` (
`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,
`orgid` CHAR(16) NOT NULL COLLATE utf8_bin,
`documentid` CHAR(16) NOT NULL COLLATE utf8_bin,
`itemid` CHAR(16) NOT NULL DEFAULT '' COLLATE utf8_bin,
`itemtype` VARCHAR(10) NOT NULL,
`content` LONGTEXT,
`created` TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX `idx_search_id` (`id` ASC),
INDEX `idx_search_orgid` (`orgid` ASC),
INDEX `idx_search_documentid` (`documentid` ASC),
FULLTEXT INDEX `idx_search_content` (`content`))
DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci
ENGINE = MyISAM;
-- migrate page content
INSERT INTO search (orgid, documentid, itemid, itemtype, content) SELECT orgid, documentid, id AS itemid, 'page' AS itemtype, TRIM(body) AS content FROM search_old;
-- index document title
INSERT INTO search (orgid, documentid, itemid, itemtype, content) SELECT orgid, refid AS documentid, '' AS itemid, 'doc' AS itemtype, TRIM(title) AS content FROM document;
-- index attachment name
INSERT INTO search (orgid, documentid, itemid, itemtype, content) SELECT orgid, documentid, refid AS itemid, 'file' AS itemtype, TRIM(filename) AS content FROM attachment;
-- insert tag 1
insert into search (orgid, documentid, itemid, itemtype, content) SELECT orgid, refid as documentid, '' as itemid, 'tag' as itemtype, TRIM(REPLACE(SUBSTRING_INDEX(tags, '#', 2), '#', '')) AS content FROM document WHERE tags != '';
-- insert tag 2
insert into search (orgid, documentid, itemid, itemtype, content) SELECT orgid, refid as documentid, '' as itemid, 'tag' as itemtype, IF((LENGTH(tags) - LENGTH(REPLACE(tags, '#', '')) - 1) > 1, SUBSTRING_INDEX(SUBSTRING_INDEX(tags, '#', 3), '#', -1), '') AS content FROM document WHERE LENGTH(tags) - LENGTH(REPLACE(tags, "#", "")) > 2;
-- insert tag 3
insert into search (orgid, documentid, itemid, itemtype, content) SELECT orgid, refid as documentid, '' as itemid, 'tag' as itemtype, IF((LENGTH(tags) - LENGTH(REPLACE(tags, '#', '')) - 1) > 2, SUBSTRING_INDEX(SUBSTRING_INDEX(tags, '#', 4), '#', -1), '') AS content FROM document WHERE LENGTH(tags) - LENGTH(REPLACE(tags, "#", "")) > 3;

View file

@ -16,7 +16,6 @@ import (
"encoding/json"
"io/ioutil"
"net/http"
"net/url"
"github.com/documize/community/core/env"
"github.com/documize/community/core/request"
@ -327,27 +326,36 @@ func (h *Handler) SearchDocuments(w http.ResponseWriter, r *http.Request) {
method := "document.search"
ctx := domain.GetRequestContext(r)
keywords := request.Query(r, "keywords")
decoded, err := url.QueryUnescape(keywords)
defer streamutil.Close(r.Body)
body, err := ioutil.ReadAll(r.Body)
if err != nil {
response.WriteBadRequestError(w, method, err.Error())
h.Runtime.Log.Error(method, err)
return
}
results, err := h.Store.Search.Documents(ctx, decoded)
options := search.QueryOptions{}
err = json.Unmarshal(body, &options)
if err != nil {
response.WriteBadRequestError(w, method, err.Error())
h.Runtime.Log.Error(method, err)
return
}
results, err := h.Store.Search.Documents(ctx, options)
if err != nil {
h.Runtime.Log.Error(method, err)
}
// Put in slugs for easy UI display of search URL
for key, result := range results {
result.DocumentSlug = stringutil.MakeSlug(result.DocumentTitle)
result.FolderSlug = stringutil.MakeSlug(result.LabelName)
result.DocumentSlug = stringutil.MakeSlug(result.Document)
result.SpaceSlug = stringutil.MakeSlug(result.Space)
results[key] = result
}
if len(results) == 0 {
results = []search.DocumentSearch{}
results = []search.QueryResult{}
}
h.Store.Audit.Record(ctx, audit.EventTypeSearch)

View file

@ -12,8 +12,8 @@
package mysql
import (
"database/sql"
"fmt"
"regexp"
"strings"
"time"
@ -244,68 +244,156 @@ func (s Scope) Delete(ctx domain.RequestContext, page page.Page) (err error) {
// Documents searches the documents that the client is allowed to see, using the keywords search string, then audits that search.
// Visible documents include both those in the client's own organisation and those that are public, or whose visibility includes the client.
func (s Scope) Documents(ctx domain.RequestContext, keywords string) (results []search.DocumentSearch, err error) {
if len(keywords) == 0 {
func (s Scope) Documents(ctx domain.RequestContext, q search.QueryOptions) (results []search.QueryResult, err error) {
q.Keywords = strings.TrimSpace(q.Keywords)
if len(q.Keywords) == 0 {
return
}
var tagQuery, keywordQuery string
results = []search.QueryResult{}
r, _ := regexp.Compile(`(#[a-z0-9][a-z0-9\-_]*)`)
res := r.FindAllString(keywords, -1)
if len(res) == 0 {
tagQuery = " "
} else {
if len(res) == 1 {
tagQuery = " AND document.tags LIKE '%" + res[0] + "#%' "
} else {
fmt.Println("lots of tags!")
tagQuery = " AND ("
for i := 0; i < len(res); i++ {
tagQuery += "document.tags LIKE '%" + res[i] + "#%'"
if i < len(res)-1 {
tagQuery += " OR "
}
// Match doc names
if q.Doc {
r1, err1 := s.matchFullText(ctx, q.Keywords, "doc")
if err1 != nil {
err = errors.Wrap(err1, "search document names")
return
}
tagQuery += ") "
results = append(results, r1...)
}
keywords = r.ReplaceAllString(keywords, "")
keywords = strings.Replace(keywords, " ", "", -1)
// Match doc content
if q.Content {
r2, err2 := s.matchFullText(ctx, q.Keywords, "page")
if err2 != nil {
err = errors.Wrap(err2, "search document content")
return
}
keywords = strings.TrimSpace(keywords)
if len(keywords) > 0 {
keywordQuery = "AND MATCH(documenttitle,pagetitle,body) AGAINST('" + keywords + "' in boolean mode)"
results = append(results, r2...)
}
sql := `SELECT search.id, documentid, pagetitle, document.labelid, document.title as documenttitle, document.tags,
COALESCE(label.label,'Unknown') AS labelname, document.excerpt as documentexcerpt
FROM search, document LEFT JOIN label ON label.orgid=document.orgid AND label.refid = document.labelid
WHERE search.documentid = document.refid AND search.orgid=? AND document.template=0 ` + tagQuery +
`AND document.labelid IN
(SELECT refid from label WHERE orgid=? AND type=2 AND userid=?
// Match doc tags
if q.Tag {
r3, err3 := s.matchFullText(ctx, q.Keywords, "tag")
if err3 != nil {
err = errors.Wrap(err3, "search document tag")
return
}
results = append(results, r3...)
}
// Match doc attachments
if q.Attachment {
r4, err4 := s.matchLike(ctx, q.Keywords, "file")
if err4 != nil {
err = errors.Wrap(err4, "search document attachments")
return
}
results = append(results, r4...)
}
return
}
func (s Scope) matchFullText(ctx domain.RequestContext, keywords, itemType string) (r []search.QueryResult, err error) {
sql1 := `
SELECT
s.id, s.orgid, s.documentid, s.itemid, s.itemtype,
d.labelid as spaceid, COALESCE(d.title,'Unknown') AS document, d.tags, d.excerpt,
COALESCE(l.label,'Unknown') AS space
FROM
search s,
document d
LEFT JOIN
label l ON l.orgid=d.orgid AND l.refid = d.labelid
WHERE
s.orgid = ?
AND s.itemtype = ?
AND s.documentid = d.refid
-- AND d.template = 0
AND d.labelid IN (SELECT refid from label WHERE orgid=? AND type=2 AND userid=?
UNION ALL SELECT refid FROM label a where orgid=? AND type=1 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid='' AND (canedit=1 OR canview=1))
UNION ALL SELECT refid FROM label a where orgid=? AND type=3 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid=? AND (canedit=1 OR canview=1))) ` + keywordQuery
UNION ALL SELECT refid FROM label a where orgid=? AND type=3 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid=? AND (canedit=1 OR canview=1)))
AND MATCH(s.content) AGAINST(? IN BOOLEAN MODE)`
err = s.Runtime.Db.Select(&results,
sql,
err = s.Runtime.Db.Select(&r,
sql1,
ctx.OrgID,
itemType,
ctx.OrgID,
ctx.UserID,
ctx.OrgID,
ctx.OrgID,
ctx.OrgID,
ctx.OrgID,
ctx.UserID)
ctx.UserID,
keywords)
if err == sql.ErrNoRows {
err = nil
r = []search.QueryResult{}
}
if err != nil {
err = errors.Wrap(err, "search documents")
err = errors.Wrap(err, "search document "+itemType)
return
}
return
}
func (s Scope) matchLike(ctx domain.RequestContext, keywords, itemType string) (r []search.QueryResult, err error) {
// LIKE clause does not like quotes!
keywords = strings.Replace(keywords, "'", "", -1)
keywords = strings.Replace(keywords, "\"", "", -1)
keywords = strings.Replace(keywords, "%", "", -1)
keywords = fmt.Sprintf("%%%s%%", keywords)
sql1 := `
SELECT
s.id, s.orgid, s.documentid, s.itemid, s.itemtype,
d.labelid as spaceid, COALESCE(d.title,'Unknown') AS document, d.tags, d.excerpt,
COALESCE(l.label,'Unknown') AS space
FROM
search s,
document d
LEFT JOIN
label l ON l.orgid=d.orgid AND l.refid = d.labelid
WHERE
s.orgid = ?
AND s.itemtype = ?
AND s.documentid = d.refid
-- AND d.template = 0
AND d.labelid IN (SELECT refid from label WHERE orgid=? AND type=2 AND userid=?
UNION ALL SELECT refid FROM label a where orgid=? AND type=1 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid='' AND (canedit=1 OR canview=1))
UNION ALL SELECT refid FROM label a where orgid=? AND type=3 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid=? AND (canedit=1 OR canview=1)))
AND s.content LIKE ?`
err = s.Runtime.Db.Select(&r,
sql1,
ctx.OrgID,
itemType,
ctx.OrgID,
ctx.UserID,
ctx.OrgID,
ctx.OrgID,
ctx.OrgID,
ctx.OrgID,
ctx.UserID,
keywords)
if err == sql.ErrNoRows {
err = nil
r = []search.QueryResult{}
}
if err != nil {
err = errors.Wrap(err, "search document "+itemType)
return
}

View file

@ -188,7 +188,7 @@ type SearchStorer interface {
UpdateSequence(ctx RequestContext, page page.Page) (err error)
UpdateLevel(ctx RequestContext, page page.Page) (err error)
Delete(ctx RequestContext, page page.Page) (err error)
Documents(ctx RequestContext, keywords string) (results []search.DocumentSearch, err error)
Documents(ctx RequestContext, options search.QueryOptions) (results []search.QueryResult, err error)
}
// Indexer defines required methods for managing search indexing process

View file

@ -16,39 +16,16 @@ export default Ember.Component.extend({
resultPhrase: "",
didReceiveAttrs() {
let results = this.get('results');
let temp = _.groupBy(results, 'documentId');
let documents = [];
_.each(temp, function (document) {
let refs = [];
if (document.length > 1) {
refs = document.slice(1);
}
_.each(refs, function (ref, index) {
ref.comma = index === refs.length - 1 ? "" : ", ";
});
let hasRefs = refs.length > 0;
documents.pushObject({
doc: document[0],
ref: refs,
hasReferences: hasRefs
});
});
let docs = this.get('results');
let phrase = 'Nothing found';
if (results.length > 0) {
let references = results.length === 1 ? "reference" : "references";
let i = results.length;
if (docs.length > 0) {
let references = docs.length === 1 ? "reference" : "references";
let i = docs.length;
phrase = `${i} ${references}`;
}
this.set('resultPhrase', phrase);
this.set('documents', documents);
this.set('documents', docs);
}
});

View file

@ -13,19 +13,51 @@ import Ember from 'ember';
export default Ember.Controller.extend({
searchService: Ember.inject.service('search'),
queryParams: ['filter'],
filter: "",
results: [],
matchDoc: true,
matchContent: true,
matchFile: true,
matchTag: true,
onKeywordChange: function () {
Ember.run.debounce(this, this.fetch, 750);
}.observes('filter'),
onMatchDoc: function () {
Ember.run.debounce(this, this.fetch, 750);
}.observes('matchDoc'),
onMatchContent: function () {
Ember.run.debounce(this, this.fetch, 750);
}.observes('matchContent'),
onMatchTag: function () {
Ember.run.debounce(this, this.fetch, 750);
}.observes('matchTag'),
onMatchFile: function () {
Ember.run.debounce(this, this.fetch, 750);
}.observes('matchFile'),
fetch() {
let self = this;
let payload = {
keywords: this.get('filter'),
doc: this.get('matchDoc'),
attachment: this.get('matchFile'),
tag: this.get('matchTag'),
content: this.get('matchContent')
};
this.get('searchService').find(this.get('filter')).then(function (response) {
payload.keywords = payload.keywords.trim();
if (payload.keywords.length == 0) {
return;
}
if (!payload.doc && !payload.tag && !payload.content && !payload.attachment) {
return;
}
this.get('searchService').find(payload).then(function(response) {
self.set('results', response);
});
}
},
});

View file

@ -1,15 +1,26 @@
{{layout/zone-navigation}}
{{#layout/zone-container}}
{{#layout/zone-sidebar}}
<div class="sidebar-toolbar">
</div>
<div class="sidebar-common">
{{layout/sidebar-intro title="Search" message='#tag, keyword, "some phrase", this AND that, this OR that'}}
{{layout/sidebar-intro title="Search" message='Search across document name, contents, tags and attachment filenames'}}
</div>
<div class="sidebar-wrapper">
<div class="page-search">
<div class="input-control">
{{focus-input type="text" value=filter placeholder='type search phrase (case sensitive)'}}
{{focus-input type="text" value=filter placeholder='type search phrase'}}
{{#ui/ui-checkbox selected=matchDoc}}document name{{/ui/ui-checkbox}}
{{#ui/ui-checkbox selected=matchContent}}document content{{/ui/ui-checkbox}}
{{#ui/ui-checkbox selected=matchFile}}attachment name{{/ui/ui-checkbox}}
{{#ui/ui-checkbox selected=matchTag}}tag name{{/ui/ui-checkbox}}
</div>
<div class="examples">
<p>a OR b</p>
<p>x AND y</p>
<p>"phrase match"</p>
<p>* for wildcard match</p>
</div>
</div>
</div>

View file

@ -19,12 +19,12 @@ export default Ember.Service.extend({
sessionService: service('session'),
ajax: service(),
// getUsers returns all users for organization.
find(keywords) {
let url = "search?keywords=" + encodeURIComponent(keywords);
return this.get('ajax').request(url, {
method: "GET"
// find all matching documents.
find(payload) {
return this.get('ajax').request("search", {
method: "POST",
data: JSON.stringify(payload),
contentType: 'json'
});
},
});

View file

@ -5,6 +5,10 @@
}
}
.examples {
color: $color-gray;
}
.search-results {
> .heading {
font-size: 2rem;

View file

@ -1,13 +1,13 @@
<div class="search-results">
<div class="heading">{{resultPhrase}}</div>
<ul class="list">
{{#each documents key="doc.id" as |result index|}}
{{#each documents key="id" as |result index|}}
<li class="item">
<a class="link" href="s/{{result.doc.folderId}}/{{result.doc.folderSlug}}/d/{{ result.doc.documentId }}/{{result.doc.documentSlug}}?page={{ result.doc.id }}">
<div class="title">{{ result.doc.documentTitle }}</div>
<div class="folder">{{ result.doc.folderName }}</div>
<div class="excerpt">{{ result.doc.documentExcerpt }}</div>
<div class="chips">{{search/tag-list documentTags=result.doc.documentTags}}</div>
<a class="link" href="s/{{result.spaceId}}/{{result.spaceSlug}}/d/{{ result.documentId }}/{{result.documentSlug}}?page={{ result.itemId }}">
<div class="title">{{ result.document }}</div>
<div class="folder">{{ result.space }}</div>
<div class="excerpt">{{ result.excerpt }}</div>
<div class="chips">{{search/tag-list documentTags=result.tags}}</div>
</a>
</li>
{{/each}}

View file

@ -3,5 +3,5 @@
"target": "es6",
"experimentalDecorators": true
},
"exclude": ["node_modules", "bower_components", "tmp", "vendor", ".git", "dist", "dist-prod"]
"exclude": ["node_modules", "bower_components", "tmp", "vendor", ".git", "dist", "dist-prod", "gui/node_modules", "gui/dist", "gui/dist-prod", "gui/tmp"]
}

View file

@ -30,16 +30,27 @@ type Search struct {
Body string
}
// DocumentSearch represents 'presentable' search results.
type DocumentSearch struct {
ID string `json:"id"`
DocumentID string `json:"documentId"`
DocumentTitle string `json:"documentTitle"`
DocumentSlug string `json:"documentSlug"`
DocumentExcerpt string `json:"documentExcerpt"`
Tags string `json:"documentTags"`
PageTitle string `json:"pageTitle"`
LabelID string `json:"folderId"`
LabelName string `json:"folderName"`
FolderSlug string `json:"folderSlug"`
// QueryOptions defines how we search.
type QueryOptions struct {
Keywords string `json:"keywords"`
Doc bool `json:"doc"`
Tag bool `json:"tag"`
Attachment bool `json:"attachment"`
Content bool `json:"content"`
}
// QueryResult represents 'presentable' search results.
type QueryResult struct {
ID string `json:"id"`
OrgID string `json:"orgId"`
ItemID string `json:"itemId"`
ItemType string `json:"itemType"`
DocumentID string `json:"documentId"`
DocumentSlug string `json:"documentSlug"`
Document string `json:"document"`
Excerpt string `json:"excerpt"`
Tags string `json:"tags"`
SpaceID string `json:"spaceId"`
Space string `json:"space"`
SpaceSlug string `json:"spaceSlug"`
}

View file

@ -132,7 +132,7 @@ func RegisterEndpoints(rt *env.Runtime, s *domain.Store) {
Add(rt, RoutePrefixPrivate, "users/{userID}", []string{"DELETE", "OPTIONS"}, nil, user.Delete)
Add(rt, RoutePrefixPrivate, "users/sync", []string{"GET", "OPTIONS"}, nil, keycloak.Sync)
Add(rt, RoutePrefixPrivate, "search", []string{"GET", "OPTIONS"}, nil, document.SearchDocuments)
Add(rt, RoutePrefixPrivate, "search", []string{"POST", "OPTIONS"}, nil, document.SearchDocuments)
Add(rt, RoutePrefixPrivate, "templates", []string{"POST", "OPTIONS"}, nil, template.SaveAs)
Add(rt, RoutePrefixPrivate, "templates", []string{"GET", "OPTIONS"}, nil, template.SavedList)