mirror of
https://github.com/documize/community.git
synced 2025-08-02 20:15:26 +02:00
refactored document & search codebase to new API
This commit is contained in:
parent
d90b3249c3
commit
65390ab67d
12 changed files with 1288 additions and 35 deletions
315
domain/search/mysql/store.go
Normal file
315
domain/search/mysql/store.go
Normal file
|
@ -0,0 +1,315 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package mysql
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/documize/community/core/env"
|
||||
"github.com/documize/community/core/streamutil"
|
||||
"github.com/documize/community/core/stringutil"
|
||||
"github.com/documize/community/domain"
|
||||
"github.com/documize/community/domain/store/mysql"
|
||||
"github.com/documize/community/model"
|
||||
"github.com/documize/community/model/page"
|
||||
"github.com/documize/community/model/search"
|
||||
"github.com/jmoiron/sqlx"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
// Scope provides data access to MySQL.
|
||||
type Scope struct {
|
||||
Runtime *env.Runtime
|
||||
}
|
||||
|
||||
// Add search entry (legacy name: searchAdd).
|
||||
func (s Scope) Add(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
id := page.RefID
|
||||
|
||||
// translate the html into text for the search
|
||||
nonHTML, err := stringutil.HTML(page.Body).Text(false)
|
||||
if err != nil {
|
||||
errors.Wrap(err, "search decode body")
|
||||
return
|
||||
}
|
||||
|
||||
// insert into the search table, getting the document title along the way
|
||||
var stmt *sqlx.Stmt
|
||||
stmt, err = ctx.Transaction.Preparex(
|
||||
"INSERT INTO search (id, orgid, documentid, level, sequence, documenttitle, slug, pagetitle, body, created, revised) " +
|
||||
" SELECT page.refid,page.orgid,document.refid,page.level,page.sequence,document.title,document.slug,page.title,?,page.created,page.revised " +
|
||||
" FROM document,page WHERE page.refid=? AND document.refid=page.documentid")
|
||||
|
||||
defer streamutil.Close(stmt)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "prepare search insert")
|
||||
return
|
||||
}
|
||||
|
||||
_, err = stmt.Exec(nonHTML, id)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "execute search insert")
|
||||
return
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Update search entry (legacy name: searchUpdate).
|
||||
func (s Scope) Update(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
// translate the html into text for the search
|
||||
nonHTML, err := stringutil.HTML(page.Body).Text(false)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "search decode body")
|
||||
return
|
||||
}
|
||||
|
||||
su, err := ctx.Transaction.Preparex("UPDATE search SET pagetitle=?,body=?,sequence=?,level=?,revised=? WHERE id=?")
|
||||
defer streamutil.Close(su)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "prepare search update")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = su.Exec(page.Title, nonHTML, page.Sequence, page.Level, page.Revised, page.RefID)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "execute search update")
|
||||
return
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateDocument search entries for document (legacy name: searchUpdateDocument).
|
||||
func (s Scope) UpdateDocument(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
stmt, err := ctx.Transaction.Preparex("UPDATE search SET documenttitle=?, slug=?, revised=? WHERE documentid=?")
|
||||
defer streamutil.Close(stmt)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "prepare search document update")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = stmt.Exec(page.Title, page.Body, time.Now().UTC(), page.DocumentID)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "execute search document update")
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteDocument removes document search entries (legacy name: searchDeleteDocument)
|
||||
func (s Scope) DeleteDocument(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
var bm = mysql.BaseQuery{}
|
||||
|
||||
_, err = bm.DeleteWhere(ctx.Transaction, fmt.Sprintf("DELETE from search WHERE documentid='%s'", page.DocumentID))
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "delete document search entries")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Rebuild ... (legacy name: searchRebuild)
|
||||
func (s Scope) Rebuild(ctx domain.RequestContext, p page.Page) (err error) {
|
||||
var bm = mysql.BaseQuery{}
|
||||
|
||||
_, err = bm.DeleteWhere(ctx.Transaction, fmt.Sprintf("DELETE from search WHERE documentid='%s'", p.DocumentID))
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
var pages []struct{ ID string }
|
||||
|
||||
stmt2, err := ctx.Transaction.Preparex("SELECT refid as id FROM page WHERE documentid=? ")
|
||||
defer streamutil.Close(stmt2)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
err = stmt2.Select(&pages, p.DocumentID)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
if len(pages) > 0 {
|
||||
for _, pg := range pages {
|
||||
err = s.Add(ctx, page.Page{BaseEntity: model.BaseEntity{RefID: pg.ID}})
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// rebuild doc-level tags & excerpts
|
||||
// get the 0'th page data and rewrite it
|
||||
|
||||
target := page.Page{}
|
||||
|
||||
stmt1, err := ctx.Transaction.Preparex("SELECT * FROM page WHERE refid=?")
|
||||
defer streamutil.Close(stmt1)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
err = stmt1.Get(&target, pages[0].ID)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
|
||||
err = s.Update(ctx, target) // to rebuild the document-level tags + excerpt
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, err.Error())
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// UpdateSequence ... (legacy name: searchUpdateSequence)
|
||||
func (s Scope) UpdateSequence(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
supdate, err := ctx.Transaction.Preparex("UPDATE search SET sequence=?,revised=? WHERE id=?")
|
||||
defer streamutil.Close(supdate)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "prepare search update sequence")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = supdate.Exec(page.Sequence, time.Now().UTC(), page.RefID)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "execute search update sequence")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// UpdateLevel ... legacy name: searchUpdateLevel)
|
||||
func (s Scope) UpdateLevel(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
pageID := page.RefID
|
||||
level := page.Level
|
||||
|
||||
supdate, err := ctx.Transaction.Preparex("UPDATE search SET level=?,revised=? WHERE id=?")
|
||||
defer streamutil.Close(supdate)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "prepare search update level")
|
||||
return err
|
||||
}
|
||||
|
||||
_, err = supdate.Exec(level, time.Now().UTC(), pageID)
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "execute search update level")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Delete ... (legacy name: searchDelete).
|
||||
func (s Scope) Delete(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
var bm = mysql.BaseQuery{}
|
||||
_, err = bm.DeleteConstrainedWithID(ctx.Transaction, "search", ctx.OrgID, page.RefID)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Documents searches the documents that the client is allowed to see, using the keywords search string, then audits that search.
|
||||
// Visible documents include both those in the client's own organisation and those that are public, or whose visibility includes the client.
|
||||
func (s Scope) Documents(ctx domain.RequestContext, keywords string) (results []search.DocumentSearch, err error) {
|
||||
if len(keywords) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
var tagQuery, keywordQuery string
|
||||
|
||||
r, _ := regexp.Compile(`(#[a-z0-9][a-z0-9\-_]*)`)
|
||||
res := r.FindAllString(keywords, -1)
|
||||
|
||||
if len(res) == 0 {
|
||||
tagQuery = " "
|
||||
} else {
|
||||
if len(res) == 1 {
|
||||
tagQuery = " AND document.tags LIKE '%" + res[0] + "#%' "
|
||||
} else {
|
||||
fmt.Println("lots of tags!")
|
||||
|
||||
tagQuery = " AND ("
|
||||
|
||||
for i := 0; i < len(res); i++ {
|
||||
tagQuery += "document.tags LIKE '%" + res[i] + "#%'"
|
||||
if i < len(res)-1 {
|
||||
tagQuery += " OR "
|
||||
}
|
||||
}
|
||||
|
||||
tagQuery += ") "
|
||||
}
|
||||
|
||||
keywords = r.ReplaceAllString(keywords, "")
|
||||
keywords = strings.Replace(keywords, " ", "", -1)
|
||||
}
|
||||
|
||||
keywords = strings.TrimSpace(keywords)
|
||||
|
||||
if len(keywords) > 0 {
|
||||
keywordQuery = "AND MATCH(pagetitle,body) AGAINST('" + keywords + "' in boolean mode)"
|
||||
}
|
||||
|
||||
sql := `SELECT search.id, documentid, pagetitle, document.labelid, document.title as documenttitle, document.tags,
|
||||
COALESCE(label.label,'Unknown') AS labelname, document.excerpt as documentexcerpt
|
||||
FROM search, document LEFT JOIN label ON label.orgid=document.orgid AND label.refid = document.labelid
|
||||
WHERE search.documentid = document.refid AND search.orgid=? AND document.template=0 ` + tagQuery +
|
||||
`AND document.labelid IN
|
||||
(SELECT refid from label WHERE orgid=? AND type=2 AND userid=?
|
||||
UNION ALL SELECT refid FROM label a where orgid=? AND type=1 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid='' AND (canedit=1 OR canview=1))
|
||||
UNION ALL SELECT refid FROM label a where orgid=? AND type=3 AND refid IN (SELECT labelid from labelrole WHERE orgid=? AND userid=? AND (canedit=1 OR canview=1))) ` + keywordQuery
|
||||
// AND MATCH(pagetitle,body)
|
||||
// AGAINST('` + keywords + "' in boolean mode)"
|
||||
|
||||
err = s.Runtime.Db.Select(&results,
|
||||
sql,
|
||||
ctx.OrgID,
|
||||
ctx.OrgID,
|
||||
ctx.UserID,
|
||||
ctx.OrgID,
|
||||
ctx.OrgID,
|
||||
ctx.OrgID,
|
||||
ctx.OrgID,
|
||||
ctx.UserID)
|
||||
|
||||
if err != nil {
|
||||
err = errors.Wrap(err, "search documents")
|
||||
return
|
||||
}
|
||||
|
||||
return
|
||||
}
|
239
domain/search/search.go
Normal file
239
domain/search/search.go
Normal file
|
@ -0,0 +1,239 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package search
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/documize/community/core/env"
|
||||
"github.com/documize/community/domain"
|
||||
"github.com/documize/community/model"
|
||||
"github.com/documize/community/model/doc"
|
||||
"github.com/documize/community/model/page"
|
||||
)
|
||||
|
||||
// Indexer type provides the datastructure for the queues of activity to be serialized through a single background goroutine.
|
||||
// NOTE if the queue becomes full, the system will trigger the rebuilding entire files in order to clear the backlog.
|
||||
type Indexer struct {
|
||||
queue chan queueEntry
|
||||
rebuild map[string]bool
|
||||
rebuildLock sync.RWMutex
|
||||
givenWarning bool
|
||||
runtime *env.Runtime
|
||||
store *domain.Store
|
||||
}
|
||||
|
||||
type queueEntry struct {
|
||||
action func(domain.RequestContext, page.Page) error
|
||||
isRebuild bool
|
||||
page.Page
|
||||
ctx domain.RequestContext
|
||||
}
|
||||
|
||||
var searches *Indexer
|
||||
|
||||
const searchQueueLength = 2048 // NOTE the largest 15Mb docx in the test set generates 2142 queue entries, but the queue is constantly emptied
|
||||
|
||||
// Start the background indexer
|
||||
func Start(rt *env.Runtime, s *domain.Store) {
|
||||
searches = &Indexer{}
|
||||
searches.queue = make(chan queueEntry, searchQueueLength) // provide some decoupling
|
||||
searches.rebuild = make(map[string]bool)
|
||||
searches.runtime = rt
|
||||
searches.store = s
|
||||
|
||||
go searches.searchProcessQueue()
|
||||
}
|
||||
|
||||
// searchProcessQueue is run as a goroutine, it processes the queue of search index update requests.
|
||||
func (m *Indexer) searchProcessQueue() {
|
||||
for {
|
||||
//fmt.Println("DEBUG queue length=", len(Searches.queue))
|
||||
if len(m.queue) <= searchQueueLength/20 { // on a busy server, the queue may never get to zero - so use 5%
|
||||
m.rebuildLock.Lock()
|
||||
for docid := range m.rebuild {
|
||||
m.queue <- queueEntry{
|
||||
action: m.store.Search.Rebuild,
|
||||
isRebuild: true,
|
||||
Page: page.Page{DocumentID: docid},
|
||||
}
|
||||
delete(m.rebuild, docid)
|
||||
}
|
||||
m.rebuildLock.Unlock()
|
||||
}
|
||||
|
||||
qe := <-m.queue
|
||||
doit := true
|
||||
|
||||
if len(qe.DocumentID) > 0 {
|
||||
m.rebuildLock.RLock()
|
||||
if m.rebuild[qe.DocumentID] {
|
||||
doit = false // don't execute an action on a document queued to be rebuilt
|
||||
}
|
||||
m.rebuildLock.RUnlock()
|
||||
}
|
||||
|
||||
if doit {
|
||||
tx, err := m.runtime.Db.Beginx()
|
||||
if err != nil {
|
||||
} else {
|
||||
ctx := qe.ctx
|
||||
ctx.Transaction = tx
|
||||
err = qe.action(ctx, qe.Page)
|
||||
if err != nil {
|
||||
tx.Rollback()
|
||||
// This action has failed, so re-build indexes for the entire document,
|
||||
// provided it was not a re-build command that failed and we know the documentId.
|
||||
if !qe.isRebuild && len(qe.DocumentID) > 0 {
|
||||
m.rebuildLock.Lock()
|
||||
m.rebuild[qe.DocumentID] = true
|
||||
m.rebuildLock.Unlock()
|
||||
}
|
||||
} else {
|
||||
tx.Commit()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Indexer) addQueue(qe queueEntry) error {
|
||||
lsq := len(m.queue)
|
||||
|
||||
if lsq >= (searchQueueLength - 1) {
|
||||
if qe.DocumentID != "" {
|
||||
m.rebuildLock.Lock()
|
||||
if !m.rebuild[qe.DocumentID] {
|
||||
m.runtime.Log.Info(fmt.Sprintf("WARNING: Search Queue Has No Space! Marked rebuild index for document id %s", qe.DocumentID))
|
||||
}
|
||||
m.rebuild[qe.DocumentID] = true
|
||||
m.rebuildLock.Unlock()
|
||||
} else {
|
||||
m.runtime.Log.Error("addQueue", errors.New("WARNING: Search Queue Has No Space! But unable to index unknown document id"))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if lsq > ((8 * searchQueueLength) / 10) {
|
||||
if !m.givenWarning {
|
||||
m.runtime.Log.Info(fmt.Sprintf("WARNING: Searches.queue length %d exceeds 80%% of capacity", lsq))
|
||||
m.givenWarning = true
|
||||
}
|
||||
} else {
|
||||
if m.givenWarning {
|
||||
m.runtime.Log.Info(fmt.Sprintf("INFO: Searches.queue length %d now below 80%% of capacity", lsq))
|
||||
m.givenWarning = false
|
||||
}
|
||||
}
|
||||
|
||||
m.queue <- qe
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add should be called when a new page is added to a document.
|
||||
func (m *Indexer) Add(ctx domain.RequestContext, page page.Page, id string) (err error) {
|
||||
page.RefID = id
|
||||
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.Add,
|
||||
Page: page,
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Update should be called after a page record has been updated.
|
||||
func (m *Indexer) Update(ctx domain.RequestContext, page page.Page) (err error) {
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.Update,
|
||||
Page: page,
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// UpdateDocument should be called after a document record has been updated.
|
||||
func (m *Indexer) UpdateDocument(ctx domain.RequestContext, document doc.Document) (err error) {
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.UpdateDocument,
|
||||
Page: page.Page{
|
||||
DocumentID: document.RefID,
|
||||
Title: document.Title,
|
||||
Body: document.Slug, // NOTE body==slug in this context
|
||||
},
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// DeleteDocument should be called after a document record has been deleted.
|
||||
func (m *Indexer) DeleteDocument(ctx domain.RequestContext, documentID string) (err error) {
|
||||
if len(documentID) > 0 {
|
||||
m.queue <- queueEntry{
|
||||
action: m.store.Search.DeleteDocument,
|
||||
Page: page.Page{DocumentID: documentID},
|
||||
ctx: ctx,
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// UpdateSequence should be called after a page record has been resequenced.
|
||||
func (m *Indexer) UpdateSequence(ctx domain.RequestContext, documentID, pageID string, sequence float64) (err error) {
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.UpdateSequence,
|
||||
Page: page.Page{
|
||||
BaseEntity: model.BaseEntity{RefID: pageID},
|
||||
Sequence: sequence,
|
||||
DocumentID: documentID,
|
||||
},
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// UpdateLevel should be called after the level of a page has been changed.
|
||||
func (m *Indexer) UpdateLevel(ctx domain.RequestContext, documentID, pageID string, level int) (err error) {
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.UpdateLevel,
|
||||
Page: page.Page{
|
||||
BaseEntity: model.BaseEntity{RefID: pageID},
|
||||
Level: uint64(level),
|
||||
DocumentID: documentID,
|
||||
},
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Delete should be called after a page has been deleted.
|
||||
func (m *Indexer) Delete(ctx domain.RequestContext, documentID, pageID string) (rows int64, err error) {
|
||||
err = m.addQueue(queueEntry{
|
||||
action: m.store.Search.Delete,
|
||||
Page: page.Page{
|
||||
BaseEntity: model.BaseEntity{RefID: pageID},
|
||||
DocumentID: documentID,
|
||||
},
|
||||
ctx: ctx,
|
||||
})
|
||||
|
||||
return
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue