1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-08-02 20:15:26 +02:00

Rebuild search index

This commit is contained in:
sauls8t 2018-03-29 12:23:09 +01:00
parent 30315a36c7
commit df0a13b6ed
18 changed files with 960 additions and 669 deletions

View file

@ -23,6 +23,7 @@ import (
"github.com/documize/community/domain"
"github.com/documize/community/domain/auth"
"github.com/documize/community/domain/organization"
indexer "github.com/documize/community/domain/search"
"github.com/documize/community/model/doc"
"github.com/documize/community/model/org"
"github.com/documize/community/model/space"
@ -32,6 +33,7 @@ import (
type Handler struct {
Runtime *env.Runtime
Store *domain.Store
Indexer indexer.Indexer
}
// Meta provides org meta data based upon request domain (e.g. acme.documize.com).
@ -176,7 +178,83 @@ func (h *Handler) Sitemap(w http.ResponseWriter, r *http.Request) {
response.WriteBytes(w, buffer.Bytes())
}
// Reindex indexes all documents and attachments.
func (h *Handler) Reindex(w http.ResponseWriter, r *http.Request) {
ctx := domain.GetRequestContext(r)
if !ctx.Global {
response.WriteForbiddenError(w)
h.Runtime.Log.Info(fmt.Sprintf("%s attempted search reindex"))
return
}
go h.rebuildSearchIndex(ctx)
response.WriteEmpty(w)
}
// rebuildSearchIndex indexes all documents and attachments.
func (h *Handler) rebuildSearchIndex(ctx domain.RequestContext) {
method := "meta.rebuildSearchIndex"
docs, err := h.Store.Meta.GetDocumentsID(ctx)
if err != nil {
h.Runtime.Log.Error(method, err)
return
}
h.Runtime.Log.Info(fmt.Sprintf("Search re-index started for %d documents", len(docs)))
for i := range docs {
d := docs[i]
pages, err := h.Store.Meta.GetDocumentPages(ctx, d)
if err != nil {
h.Runtime.Log.Error(method, err)
return
}
for j := range pages {
h.Indexer.IndexContent(ctx, pages[j])
}
// Log process every N documents.
if i%100 == 0 {
h.Runtime.Log.Info(fmt.Sprintf("Search re-indexed %d documents...", i))
}
}
h.Runtime.Log.Info(fmt.Sprintf("Search re-index finished for %d documents", len(docs)))
}
// SearchStatus returns state of search index
func (h *Handler) SearchStatus(w http.ResponseWriter, r *http.Request) {
method := "meta.SearchStatus"
ctx := domain.GetRequestContext(r)
if !ctx.Global {
response.WriteForbiddenError(w)
h.Runtime.Log.Info(fmt.Sprintf("%s attempted get of search status"))
return
}
count, err := h.Store.Meta.SearchIndexCount(ctx)
if err != nil {
response.WriteServerError(w, method, err)
h.Runtime.Log.Error(method, err)
return
}
var ss = searchStatus{Entries: count}
response.WriteJSON(w, ss)
}
type sitemapItem struct {
URL string
Date string
}
type searchStatus struct {
Entries int `json:"entries"`
}

View file

@ -0,0 +1,73 @@
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
//
// This software (Documize Community Edition) is licensed under
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
//
// You can operate outside the AGPL restrictions by purchasing
// Documize Enterprise Edition and obtaining a commercial license
// by contacting <sales@documize.com>.
//
// https://documize.com
package mysql
import (
"database/sql"
"github.com/documize/community/core/env"
"github.com/documize/community/domain"
"github.com/documize/community/model/page"
"github.com/pkg/errors"
)
// Scope provides data access to MySQL.
type Scope struct {
Runtime *env.Runtime
}
// GetDocumentsID returns every document ID value stored.
// The query runs at the instance level across all tenants.
func (s Scope) GetDocumentsID(ctx domain.RequestContext) (documents []string, err error) {
err = s.Runtime.Db.Select(&documents, `SELECT refid FROM document WHERE lifecycle=1`)
if err == sql.ErrNoRows {
err = nil
documents = []string{}
}
if err != nil {
err = errors.Wrap(err, "failed to get instance document ID values")
}
return
}
// GetDocumentPages returns a slice containing all published page records for a given documentID, in presentation sequence.
func (s Scope) GetDocumentPages(ctx domain.RequestContext, documentID string) (p []page.Page, err error) {
err = s.Runtime.Db.Select(&p,
`SELECT
a.id, a.refid, a.orgid, a.documentid, a.userid, a.contenttype,
a.pagetype, a.level, a.sequence, a.title, a.body, a.revisions,
a.blockid, a.status, a.relativeid, a.created, a.revised
FROM page a
WHERE a.documentid=? AND (a.status=0 OR ((a.status=4 OR a.status=2) AND a.relativeid=''))`,
documentID)
if err != nil {
err = errors.Wrap(err, "failed to get instance document pages")
}
return
}
// SearchIndexCount returns the numnber of index entries.
func (s Scope) SearchIndexCount(ctx domain.RequestContext) (c int, err error) {
row := s.Runtime.Db.QueryRow("SELECT count(*) FROM search")
err = row.Scan(&c)
if err != nil {
err = errors.Wrap(err, "count search index entries")
c = 0
}
return
}

View file

@ -42,6 +42,7 @@ type Store struct {
Document DocumentStorer
Group GroupStorer
Link LinkStorer
Meta MetaStorer
Organization OrganizationStorer
Page PageStorer
Pin PinStorer
@ -282,3 +283,10 @@ type GroupStorer interface {
JoinGroup(ctx RequestContext, groupID, userID string) (err error)
LeaveGroup(ctx RequestContext, groupID, userID string) (err error)
}
// MetaStorer provide specialist methods for global administrators.
type MetaStorer interface {
GetDocumentsID(ctx RequestContext) (documents []string, err error)
GetDocumentPages(ctx RequestContext, documentID string) (p []page.Page, err error)
SearchIndexCount(ctx RequestContext) (c int, err error)
}