2017-07-31 18:17:30 +01:00
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
//
// This software (Documize Community Edition) is licensed under
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
//
// You can operate outside the AGPL restrictions by purchasing
// Documize Enterprise Edition and obtaining a commercial license
// by contacting <sales@documize.com>.
//
// https://documize.com
2018-09-26 17:59:56 +01:00
package search
2017-07-31 18:17:30 +01:00
import (
2017-08-15 14:15:31 +01:00
"database/sql"
2017-07-31 18:17:30 +01:00
"fmt"
"strings"
2018-09-27 15:14:48 +01:00
"github.com/documize/community/core/env"
2017-07-31 18:17:30 +01:00
"github.com/documize/community/core/stringutil"
"github.com/documize/community/domain"
2018-09-26 17:59:56 +01:00
"github.com/documize/community/domain/store"
2017-08-15 19:41:44 +01:00
"github.com/documize/community/model/attachment"
"github.com/documize/community/model/doc"
2017-07-31 18:17:30 +01:00
"github.com/documize/community/model/page"
"github.com/documize/community/model/search"
"github.com/pkg/errors"
)
2018-09-26 17:59:56 +01:00
// Store provides data access to space information.
type Store struct {
store . Context
2018-09-27 15:14:48 +01:00
store . SearchStorer
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
// IndexDocument adds search index entries for document inserting title, tags and attachments as
// searchable items. Any existing document entries are removed.
2018-09-26 17:59:56 +01:00
func ( s Store ) IndexDocument ( ctx domain . RequestContext , doc doc . Document , a [ ] attachment . Attachment ) ( err error ) {
2018-09-28 16:33:15 +01:00
method := "search.IndexDocument"
2017-08-15 19:41:44 +01:00
// remove previous search entries
2018-09-26 17:59:56 +01:00
_ , err = ctx . Transaction . Exec ( s . Bind ( "DELETE FROM dmz_search WHERE c_orgid=? AND c_docid=? AND (c_itemtype='doc' OR c_itemtype='file' OR c_itemtype='tag')" ) ,
2017-09-25 14:37:11 +01:00
ctx . OrgID , doc . RefID )
2018-09-28 16:33:15 +01:00
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute delete document index entries" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
return
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
// insert doc title
2018-09-28 16:33:15 +01:00
if s . Runtime . StoreProvider . Type ( ) == env . StoreTypePostgreSQL {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content, c_token) VALUES (?, ?, ?, ?, ?, to_tsvector(?))" ) ,
ctx . OrgID , doc . RefID , "" , "doc" , doc . Name , doc . Name )
} else {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content) VALUES (?, ?, ?, ?, ?)" ) ,
ctx . OrgID , doc . RefID , "" , "doc" , doc . Name )
}
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute insert document title entry" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
return
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
// insert doc tags
tags := strings . Split ( doc . Tags , "#" )
for _ , t := range tags {
2018-09-28 16:33:15 +01:00
t = strings . TrimSpace ( t )
2017-08-15 19:41:44 +01:00
if len ( t ) == 0 {
continue
}
2018-09-28 16:33:15 +01:00
if s . Runtime . StoreProvider . Type ( ) == env . StoreTypePostgreSQL {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content, c_token) VALUES (?, ?, ?, ?, ?, to_tsvector(?))" ) ,
ctx . OrgID , doc . RefID , "" , "tag" , t , t )
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
} else {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content) VALUES (?, ?, ?, ?, ?)" ) ,
ctx . OrgID , doc . RefID , "" , "tag" , t )
}
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute insert document tag entry" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
2017-08-15 19:41:44 +01:00
return
}
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
for _ , file := range a {
2018-09-28 16:33:15 +01:00
if s . Runtime . StoreProvider . Type ( ) == env . StoreTypePostgreSQL {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content, c_token) VALUES (?, ?, ?, ?, ?, to_tsvector(?))" ) ,
ctx . OrgID , doc . RefID , file . RefID , "file" , file . Filename , file . Filename )
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
} else {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content) VALUES (?, ?, ?, ?, ?)" ) ,
ctx . OrgID , doc . RefID , file . RefID , "file" , file . Filename )
}
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute insert document file entry" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
return
2017-08-15 19:41:44 +01:00
}
2017-07-31 18:17:30 +01:00
}
return nil
}
2017-08-15 19:41:44 +01:00
// DeleteDocument removes all search entries for document.
2018-09-26 17:59:56 +01:00
func ( s Store ) DeleteDocument ( ctx domain . RequestContext , ID string ) ( err error ) {
2018-09-28 16:33:15 +01:00
method := "search.DeleteDocument"
2018-09-26 17:59:56 +01:00
_ , err = ctx . Transaction . Exec ( s . Bind ( "DELETE FROM dmz_search WHERE c_orgid=? AND c_docid=?" ) ,
ctx . OrgID , ID )
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute delete document entries" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
return
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
// IndexContent adds search index entry for document context.
// Any existing document entries are removed.
2018-09-26 17:59:56 +01:00
func ( s Store ) IndexContent ( ctx domain . RequestContext , p page . Page ) ( err error ) {
2018-09-28 16:33:15 +01:00
method := "search.IndexContent"
2017-08-15 19:41:44 +01:00
// remove previous search entries
2018-09-26 17:59:56 +01:00
_ , err = ctx . Transaction . Exec ( s . Bind ( "DELETE FROM dmz_search WHERE c_orgid=? AND c_docid=? AND c_itemid=? AND c_itemtype='page'" ) ,
2017-09-25 14:37:11 +01:00
ctx . OrgID , p . DocumentID , p . RefID )
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute delete document content entry" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
return
2017-07-31 18:17:30 +01:00
}
2018-09-28 16:33:15 +01:00
err = nil
2017-07-31 18:17:30 +01:00
2017-08-15 19:41:44 +01:00
// prepare content
content , err := stringutil . HTML ( p . Body ) . Text ( false )
2017-07-31 18:17:30 +01:00
if err != nil {
2017-08-15 20:29:35 +01:00
err = errors . Wrap ( err , "search strip HTML failed" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
2017-08-15 19:41:44 +01:00
return
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
content = strings . TrimSpace ( content )
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
if s . Runtime . StoreProvider . Type ( ) == env . StoreTypePostgreSQL {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content, c_token) VALUES (?, ?, ?, ?, ?, to_tsvector(?))" ) ,
ctx . OrgID , p . DocumentID , p . RefID , "page" , content , content )
} else {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content) VALUES (?, ?, ?, ?, ?)" ) ,
ctx . OrgID , p . DocumentID , p . RefID , "page" , content )
2017-07-31 18:17:30 +01:00
}
2018-09-28 16:33:15 +01:00
if err != nil && err != sql . ErrNoRows {
err = errors . Wrap ( err , "execute insert section content entry" )
s . Runtime . Log . Error ( method , err )
return
}
err = nil
2017-07-31 18:17:30 +01:00
2018-09-28 16:33:15 +01:00
if s . Runtime . StoreProvider . Type ( ) == env . StoreTypePostgreSQL {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content, c_token) VALUES (?, ?, ?, ?, ?, to_tsvector(?))" ) ,
ctx . OrgID , p . DocumentID , p . RefID , "page" , p . Name , p . Name )
} else {
_ , err = ctx . Transaction . Exec ( s . Bind ( "INSERT INTO dmz_search (c_orgid, c_docid, c_itemid, c_itemtype, c_content) VALUES (?, ?, ?, ?, ?)" ) ,
ctx . OrgID , p . DocumentID , p . RefID , "page" , p . Name )
}
if err != nil && err != sql . ErrNoRows {
err = errors . Wrap ( err , "execute insert section title entry" )
s . Runtime . Log . Error ( method , err )
return
2018-03-19 15:04:02 +00:00
}
2017-08-15 19:41:44 +01:00
return nil
2017-07-31 18:17:30 +01:00
}
2017-08-15 19:41:44 +01:00
// DeleteContent removes all search entries for specific document content.
2018-09-26 17:59:56 +01:00
func ( s Store ) DeleteContent ( ctx domain . RequestContext , pageID string ) ( err error ) {
2018-09-28 16:33:15 +01:00
method := "search.DeleteContent"
2017-08-15 19:41:44 +01:00
// remove all search entries
2018-10-10 15:13:09 +01:00
_ , err = ctx . Transaction . Exec ( s . Bind ( "DELETE FROM dmz_search WHERE c_orgid=? AND c_itemid=? AND c_itemtype=?" ) ,
ctx . OrgID , pageID , "page" )
2018-09-28 16:33:15 +01:00
if err != nil && err != sql . ErrNoRows {
2017-08-15 19:41:44 +01:00
err = errors . Wrap ( err , "execute delete document content entry" )
2018-09-28 16:33:15 +01:00
s . Runtime . Log . Error ( method , err )
2017-07-31 18:17:30 +01:00
return
}
2018-10-10 15:13:09 +01:00
return nil
2017-07-31 18:17:30 +01:00
}
// Documents searches the documents that the client is allowed to see, using the keywords search string, then audits that search.
2018-06-22 17:01:26 +01:00
// Visible documents include both those in the client's own organization and those that are public, or whose visibility includes the client.
2018-09-26 17:59:56 +01:00
func ( s Store ) Documents ( ctx domain . RequestContext , q search . QueryOptions ) ( results [ ] search . QueryResult , err error ) {
2017-08-15 14:15:31 +01:00
q . Keywords = strings . TrimSpace ( q . Keywords )
if len ( q . Keywords ) == 0 {
2017-07-31 18:17:30 +01:00
return
}
2017-08-15 14:15:31 +01:00
results = [ ] search . QueryResult { }
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
// Match doc names
if q . Doc {
r1 , err1 := s . matchFullText ( ctx , q . Keywords , "doc" )
if err1 != nil {
err = errors . Wrap ( err1 , "search document names" )
return
}
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
results = append ( results , r1 ... )
}
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
// Match doc content
if q . Content {
r2 , err2 := s . matchFullText ( ctx , q . Keywords , "page" )
if err2 != nil {
err = errors . Wrap ( err2 , "search document content" )
return
}
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
results = append ( results , r2 ... )
}
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
// Match doc tags
if q . Tag {
r3 , err3 := s . matchFullText ( ctx , q . Keywords , "tag" )
if err3 != nil {
err = errors . Wrap ( err3 , "search document tag" )
return
2017-07-31 18:17:30 +01:00
}
2017-08-15 14:15:31 +01:00
results = append ( results , r3 ... )
2017-07-31 18:17:30 +01:00
}
2017-08-15 14:15:31 +01:00
// Match doc attachments
if q . Attachment {
r4 , err4 := s . matchLike ( ctx , q . Keywords , "file" )
if err4 != nil {
err = errors . Wrap ( err4 , "search document attachments" )
return
}
2017-07-31 18:17:30 +01:00
2017-08-15 14:15:31 +01:00
results = append ( results , r4 ... )
2017-07-31 18:17:30 +01:00
}
2018-03-30 17:03:18 +01:00
if len ( results ) == 0 {
results = [ ] search . QueryResult { }
}
2017-08-15 14:15:31 +01:00
return
}
2018-09-26 17:59:56 +01:00
func ( s Store ) matchFullText ( ctx domain . RequestContext , keywords , itemType string ) ( r [ ] search . QueryResult , err error ) {
2018-09-27 15:14:48 +01:00
// Full text search clause specific to database provider
fts := ""
switch s . Runtime . StoreProvider . Type ( ) {
case env . StoreTypeMySQL :
2019-02-28 13:39:53 +00:00
// Tag names can contain hyphens so we have to wrap text in double quotes
// and then the query parser wraps in single quotes.
if itemType == "tag" {
keywords = fmt . Sprintf ( "\"%s\"" , keywords )
}
2018-09-28 16:33:15 +01:00
fts = " AND MATCH(s.c_content) AGAINST(? IN BOOLEAN MODE) "
2018-09-27 15:14:48 +01:00
case env . StoreTypePostgreSQL :
2018-09-28 16:33:15 +01:00
// By default, we expect no Postgres full text search operators.
parser := "plainto_tsquery"
// If we find operators then we have to use correct query processor.
operator := strings . ContainsAny ( keywords , "!()&|*'`\":<->" )
if operator {
parser = "to_tsquery"
}
fts = fmt . Sprintf ( " AND s.c_token @@ %s(?) " , parser )
2018-09-27 15:14:48 +01:00
}
sql1 := s . Bind ( `
SELECT
s . id , s . c_orgid AS orgid , s . c_docid AS documentid , s . c_itemid AS itemid , s . c_itemtype AS itemtype ,
d . c_spaceid as spaceid , COALESCE ( d . c_name , ' Unknown ' ) AS document , d . c_tags AS tags ,
d . c_desc AS excerpt , d . c_template AS template , d . c_versionid AS versionid ,
2019-03-13 11:40:36 +00:00
COALESCE ( l . c_name , ' Unknown ' ) AS space , d . c_created AS created , d . c_revised AS revised
2018-09-27 15:14:48 +01:00
FROM
dmz_search s ,
dmz_doc d
LEFT JOIN
dmz_space l ON l . c_orgid = d . c_orgid AND l . c_refid = d . c_spaceid
WHERE
s . c_orgid = ?
AND s . c_itemtype = ?
AND s . c_docid = d . c_refid
AND d . c_spaceid IN
2018-03-19 15:04:02 +00:00
(
2018-09-27 15:14:48 +01:00
SELECT c_refid FROM dmz_space WHERE c_orgid = ? AND c_refid IN
(
SELECT c_refid from dmz_permission WHERE c_orgid = ? AND c_who = ' user ' AND ( c_whoid = ? OR c_whoid = '0' ) AND c_location = ' space '
UNION ALL
SELECT p . c_refid from dmz_permission p LEFT JOIN dmz_group_member r ON p . c_whoid = r . c_groupid WHERE p . c_orgid = ? AND p . c_who = ' role '
AND p . c_location = ' space ' AND ( r . c_userid = ? OR r . c_userid = '0' )
)
2018-03-19 15:04:02 +00:00
)
2018-09-27 15:14:48 +01:00
` + fts )
2017-08-15 14:15:31 +01:00
err = s . Runtime . Db . Select ( & r ,
sql1 ,
2017-07-31 18:17:30 +01:00
ctx . OrgID ,
2017-08-15 14:15:31 +01:00
itemType ,
2017-07-31 18:17:30 +01:00
ctx . OrgID ,
ctx . OrgID ,
2017-09-18 13:02:15 +01:00
ctx . UserID ,
2017-07-31 18:17:30 +01:00
ctx . OrgID ,
2019-09-17 11:13:34 +01:00
ctx . UserID ,
keywords )
2017-08-15 14:15:31 +01:00
if err == sql . ErrNoRows {
err = nil
r = [ ] search . QueryResult { }
}
if err != nil {
err = errors . Wrap ( err , "search document " + itemType )
}
return
}
2018-09-26 17:59:56 +01:00
func ( s Store ) matchLike ( ctx domain . RequestContext , keywords , itemType string ) ( r [ ] search . QueryResult , err error ) {
2017-08-15 14:15:31 +01:00
// LIKE clause does not like quotes!
keywords = strings . Replace ( keywords , "'" , "" , - 1 )
keywords = strings . Replace ( keywords , "\"" , "" , - 1 )
keywords = strings . Replace ( keywords , "%" , "" , - 1 )
2018-09-28 16:33:15 +01:00
keywords = fmt . Sprintf ( "%%%s%%" , strings . ToLower ( keywords ) )
2017-08-15 14:15:31 +01:00
2018-09-27 15:14:48 +01:00
sql1 := s . Bind ( ` SELECT
2019-04-02 15:30:42 +01:00
s . id , s . c_orgid AS orgid , s . c_docid AS documentid , s . c_itemid AS itemid , s . c_itemtype AS itemtype ,
d . c_spaceid as spaceid , COALESCE ( d . c_name , ' Unknown ' ) AS document , d . c_tags AS tags ,
d . c_desc AS excerpt , d . c_template AS template , d . c_versionid AS versionid ,
COALESCE ( l . c_name , ' Unknown ' ) AS space , d . c_created AS created , d . c_revised AS revised
FROM
2018-09-27 15:14:48 +01:00
dmz_search s ,
dmz_doc d
LEFT JOIN
dmz_space l ON l . c_orgid = d . c_orgid AND l . c_refid = d . c_spaceid
WHERE
s . c_orgid = ?
AND s . c_itemtype = ?
AND s . c_docid = d . c_refid
AND d . c_spaceid IN
2018-03-30 17:03:18 +01:00
(
2018-09-27 15:14:48 +01:00
SELECT c_refid FROM dmz_space WHERE c_orgid = ? AND c_refid IN
(
SELECT c_refid from dmz_permission WHERE c_orgid = ? AND c_who = ' user ' AND ( c_whoid = ? OR c_whoid = '0' ) AND c_location = ' space '
UNION ALL
SELECT p . c_refid from dmz_permission p LEFT JOIN dmz_group_member r ON p . c_whoid = r . c_groupid WHERE p . c_orgid = ? AND p . c_who = ' role '
AND p . c_location = ' space ' AND ( r . c_userid = ? OR r . c_userid = '0' )
)
)
2018-09-28 16:33:15 +01:00
AND LOWER ( s . c_content ) LIKE ? ` )
2017-08-15 14:15:31 +01:00
err = s . Runtime . Db . Select ( & r ,
sql1 ,
ctx . OrgID ,
itemType ,
ctx . OrgID ,
ctx . OrgID ,
2017-09-18 13:02:15 +01:00
ctx . UserID ,
2017-08-15 14:15:31 +01:00
ctx . OrgID ,
ctx . UserID ,
keywords )
if err == sql . ErrNoRows {
err = nil
r = [ ] search . QueryResult { }
}
2017-07-31 18:17:30 +01:00
if err != nil {
2017-08-15 14:15:31 +01:00
err = errors . Wrap ( err , "search document " + itemType )
2017-07-31 18:17:30 +01:00
}
return
}