mirror of
https://github.com/documize/community.git
synced 2025-07-19 05:09:42 +02:00
major package structure refactoring
This commit is contained in:
parent
7b8cec9a6c
commit
cf58f8164d
73 changed files with 549 additions and 389 deletions
66
core/stringutil/beautify.go
Normal file
66
core/stringutil/beautify.go
Normal file
|
@ -0,0 +1,66 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// BeautifyFilename takes a filename and attempts to turn it into a readable form,
|
||||
// as TitleCase natural language, suitable for the top level of a Document.
|
||||
func BeautifyFilename(fn string) string {
|
||||
_, file := filepath.Split(fn)
|
||||
splits := strings.Split(file, ".")
|
||||
r := []rune(strings.Join(splits[:len(splits)-1], "."))
|
||||
|
||||
// make any non-letter/digit characters space
|
||||
for i := range r {
|
||||
if !(unicode.IsLetter(r[i]) || unicode.IsDigit(r[i]) || r[i] == '.') {
|
||||
r[i] = ' '
|
||||
}
|
||||
}
|
||||
|
||||
// insert spaces in front of any Upper/Lowwer 2-letter combinations
|
||||
addSpaces:
|
||||
for i := range r {
|
||||
if i > 1 { // do not insert a space at the start of the file name
|
||||
if unicode.IsLower(r[i]) && unicode.IsUpper(r[i-1]) && r[i-2] != ' ' {
|
||||
n := make([]rune, len(r)+1)
|
||||
for j := 0; j < i-1; j++ {
|
||||
n[j] = r[j]
|
||||
}
|
||||
n[i-1] = ' '
|
||||
for j := i - 1; j < len(r); j++ {
|
||||
n[j+1] = r[j]
|
||||
}
|
||||
r = n
|
||||
goto addSpaces
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// make the first letter of each word upper case
|
||||
for i := range r {
|
||||
switch i {
|
||||
case 0:
|
||||
r[i] = unicode.ToUpper(r[i])
|
||||
case 1: // the zero element should never be space
|
||||
default:
|
||||
if r[i-1] == ' ' {
|
||||
r[i] = unicode.ToUpper(r[i])
|
||||
}
|
||||
}
|
||||
}
|
||||
return string(r)
|
||||
}
|
25
core/stringutil/beautify_test.go
Normal file
25
core/stringutil/beautify_test.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestBeautify(t *testing.T) {
|
||||
bs(t, "DooDah$day.zip", "Doo Dah Day")
|
||||
}
|
||||
|
||||
func bs(t *testing.T, in, out string) {
|
||||
got := BeautifyFilename(in)
|
||||
if got != out {
|
||||
t.Errorf("BeautifyFilename input `%s` got `%s` expected `%s`\n", in, got, out)
|
||||
}
|
||||
}
|
38
core/stringutil/conjoin.go
Normal file
38
core/stringutil/conjoin.go
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Conjoin returns "Suzzane, Fatima and Brian" from string of items.
|
||||
func Conjoin(conj string, items []string) string {
|
||||
if len(items) == 0 {
|
||||
return ""
|
||||
}
|
||||
if len(items) == 1 {
|
||||
return items[0]
|
||||
}
|
||||
if len(items) == 2 { // "a and b" not "a, and b"
|
||||
return items[0] + " " + conj + " " + items[1]
|
||||
}
|
||||
|
||||
sep := ", "
|
||||
pieces := []string{items[0]}
|
||||
for _, item := range items[1 : len(items)-1] {
|
||||
pieces = append(pieces, sep, item)
|
||||
}
|
||||
pieces = append(pieces, sep, conj, " ", items[len(items)-1])
|
||||
|
||||
return strings.Replace(strings.Join(pieces, ""), ", and ", " and ", 1)
|
||||
}
|
158
core/stringutil/html.go
Normal file
158
core/stringutil/html.go
Normal file
|
@ -0,0 +1,158 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
"golang.org/x/net/html/atom"
|
||||
)
|
||||
|
||||
// HTML describes a chunk of HTML, Text() method returns plain text.
|
||||
type HTML string
|
||||
|
||||
// write out the textual element of the html node, if present, then iterate through the child nodes.
|
||||
func writeText(n *html.Node, b io.Writer, isTest bool) {
|
||||
if !excluded(n) {
|
||||
switch n.Type {
|
||||
case html.TextNode:
|
||||
_, err := b.Write([]byte(n.Data + string(rune(0x200B)))) // + http://en.wikipedia.org/wiki/Zero-width_space
|
||||
if err != nil {
|
||||
}
|
||||
// TODO This use of zero-width-space (subsequently replaced by ' ' or ignored, depending on context)
|
||||
// TODO works well for in-word breaks, but at the expense of concatenating some words in error.
|
||||
// TODO It may be that better examination of the HTML structure could be used to determine
|
||||
// TODO when a space is, or is not, required. In that event we would not use zero-width-space.
|
||||
|
||||
default:
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
writeText(c, b, isTest)
|
||||
}
|
||||
switch n.DataAtom {
|
||||
case 0:
|
||||
if n.Data == "documize" {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "type" {
|
||||
if isTest {
|
||||
var err error
|
||||
switch a.Val {
|
||||
case "field-start":
|
||||
_, err = b.Write([]byte(" [ "))
|
||||
case "field-end":
|
||||
_, err = b.Write([]byte(" ] "))
|
||||
default:
|
||||
_, err = b.Write([]byte(" [ ] "))
|
||||
}
|
||||
if err != nil {
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
case atom.Span, atom.U, atom.B, atom.I, atom.Del, atom.Sub, atom.Sup:
|
||||
//NoOp
|
||||
default:
|
||||
_, err := b.Write([]byte(" ")) // add a space after each main element
|
||||
if err != nil {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func excluded(n *html.Node) bool {
|
||||
if n.DataAtom == atom.Div {
|
||||
for _, a := range n.Attr {
|
||||
if a.Key == "class" {
|
||||
switch a.Val {
|
||||
case "documize-first-page",
|
||||
"documize-exotic-image",
|
||||
"documize-footnote",
|
||||
"documize-graphictext",
|
||||
"documize-math":
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// findBody finds the body HTML node if it exists in the tree. Required to bypass the page title text.
|
||||
func findBody(n *html.Node) *html.Node {
|
||||
if n.DataAtom == atom.Body {
|
||||
return n
|
||||
}
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
r := findBody(c)
|
||||
if r != nil {
|
||||
return r
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Text returns only the plain text elements of the HTML Chunk, concatanated with "\n",
|
||||
// for use in the TOC or for text indexing.
|
||||
func (ch HTML) Text(isTest bool) (string, error) {
|
||||
var b bytes.Buffer
|
||||
doc, err := html.Parse(strings.NewReader(string(ch)))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
body := findBody(doc)
|
||||
if body == nil {
|
||||
body = doc
|
||||
}
|
||||
writeText(body, &b, isTest)
|
||||
return string(b.Bytes()), nil
|
||||
}
|
||||
|
||||
// EscapeHTMLcomplexChars looks for "complex" characters within HTML
|
||||
// and replaces them with the HTML escape codes which describe them.
|
||||
// "Complex" characters are those encoded in more than one byte by UTF8.
|
||||
func EscapeHTMLcomplexChars(s string) string {
|
||||
ret := ""
|
||||
for _, r := range s {
|
||||
if utf8.RuneLen(r) > 1 {
|
||||
ret += fmt.Sprintf("&#%d;", r)
|
||||
} else {
|
||||
ret += string(r)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
// EscapeHTMLcomplexCharsByte looks for "complex" characters within HTML
|
||||
// and replaces them with the HTML escape codes which describe them.
|
||||
// "Complex" characters are those encoded in more than one byte by UTF8.
|
||||
func EscapeHTMLcomplexCharsByte(b []byte) []byte {
|
||||
var ret bytes.Buffer
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
if utf8.RuneLen(r) > 1 {
|
||||
fmt.Fprintf(&ret, "&#%d;", r)
|
||||
} else {
|
||||
_, err := ret.Write(b[:size])
|
||||
if err != nil {
|
||||
}
|
||||
}
|
||||
b = b[size:]
|
||||
}
|
||||
return ret.Bytes()
|
||||
}
|
83
core/stringutil/html_test.go
Normal file
83
core/stringutil/html_test.go
Normal file
|
@ -0,0 +1,83 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestHTML(t *testing.T) {
|
||||
type testConv struct {
|
||||
htm, txt string
|
||||
istest bool
|
||||
}
|
||||
convTest := []testConv{
|
||||
{
|
||||
`<html><head><title>HTML TITLE</title></head><body><p>This <I>is</I>:</p><ul><li><a href="foo">Example</a><li><a href="/bar/baz">HTML text.</a><div class="documize-math">exclueded</div></ul></body></html>`,
|
||||
"This is : Example HTML text. ", false,
|
||||
},
|
||||
{
|
||||
`<p>This is:</p><ul><li><documize type="field-start"></documize> <documize type="field-end"></documize><documize type="unknown"></documize><li><a href="/bar/baz">HTML text.</a></ul>`,
|
||||
"This is: [ ] [ ] HTML text. ", true,
|
||||
},
|
||||
}
|
||||
for _, tst := range convTest {
|
||||
var ch HTML
|
||||
ch = HTML([]byte(tst.htm))
|
||||
//t.Logf("HTML: %s", ch)
|
||||
txt, err := ch.Text(tst.istest)
|
||||
if err != nil {
|
||||
t.Log(err)
|
||||
t.Fail()
|
||||
}
|
||||
expected := compressSpaces(tst.txt)
|
||||
got := compressSpaces(string(txt))
|
||||
if expected != got {
|
||||
t.Errorf("Conversion to text for `%s`, expected: `%s` got: `%s`\n",
|
||||
ch, expected, got)
|
||||
} //else {
|
||||
// t.Logf("Text: %s", txt)
|
||||
//}
|
||||
}
|
||||
}
|
||||
|
||||
func compressSpaces(s string) string {
|
||||
ret := ""
|
||||
inSpace := false
|
||||
for _, r := range s {
|
||||
switch r {
|
||||
case ' ', '\t', '\n', '\u200b' /*zero width space*/ :
|
||||
if !inSpace {
|
||||
ret += " "
|
||||
}
|
||||
inSpace = true
|
||||
default:
|
||||
inSpace = false
|
||||
ret += string(r)
|
||||
}
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func TestHTMLescape(t *testing.T) {
|
||||
tianchao := "兲朝 test"
|
||||
expected := "兲朝 test"
|
||||
|
||||
gotString := EscapeHTMLcomplexChars(tianchao)
|
||||
if gotString != expected {
|
||||
t.Errorf("EscapeHTMLcomplexChars error got `%s` expected `%s`\n", gotString, expected)
|
||||
}
|
||||
|
||||
gotBytes := EscapeHTMLcomplexCharsByte([]byte(tianchao))
|
||||
if string(gotBytes) != expected {
|
||||
t.Errorf("EscapeHTMLcomplexCharsByte error got `%s` expected `%s`\n", string(gotBytes), expected)
|
||||
}
|
||||
|
||||
}
|
34
core/stringutil/initials.go
Normal file
34
core/stringutil/initials.go
Normal file
|
@ -0,0 +1,34 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
// MakeInitials returns user initials from firstname and lastname.
|
||||
func MakeInitials(firstname, lastname string) string {
|
||||
firstname = strings.TrimSpace(firstname)
|
||||
lastname = strings.TrimSpace(lastname)
|
||||
a := ""
|
||||
b := ""
|
||||
|
||||
if len(firstname) > 0 {
|
||||
a = firstname[:1]
|
||||
}
|
||||
|
||||
if len(lastname) > 0 {
|
||||
b = lastname[:1]
|
||||
}
|
||||
|
||||
return strings.ToUpper(a + b)
|
||||
}
|
28
core/stringutil/initials_test.go
Normal file
28
core/stringutil/initials_test.go
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestInitials(t *testing.T) {
|
||||
in(t, "Harvey", "Kandola", "HK")
|
||||
in(t, "Harvey", "", "H")
|
||||
in(t, "", "Kandola", "K")
|
||||
in(t, "", "", "")
|
||||
}
|
||||
|
||||
func in(t *testing.T, firstname, lastname, expecting string) {
|
||||
initials := MakeInitials(firstname, lastname)
|
||||
if initials != expecting {
|
||||
t.Errorf("expecting initials of `%s` got `%s`\n", expecting, initials)
|
||||
}
|
||||
}
|
37
core/stringutil/slug.go
Normal file
37
core/stringutil/slug.go
Normal file
|
@ -0,0 +1,37 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"unicode"
|
||||
)
|
||||
|
||||
// MakeSlug creates a slug, suitable for use in a URL, from a string
|
||||
func MakeSlug(str string) string {
|
||||
slg := strings.Map(
|
||||
func(r rune) rune { // individual mapping of runes into a format suitable for use in a URL
|
||||
r = unicode.ToLower(r)
|
||||
if unicode.IsLower(r) || unicode.IsDigit(r) {
|
||||
return r
|
||||
}
|
||||
return '-'
|
||||
}, str)
|
||||
slg = strings.NewReplacer("---", "-", "--", "-").Replace(slg)
|
||||
for strings.HasSuffix(slg, "-") {
|
||||
slg = strings.TrimSuffix(slg, "-")
|
||||
}
|
||||
for strings.HasPrefix(slg, "-") {
|
||||
slg = strings.TrimPrefix(slg, "-")
|
||||
}
|
||||
return slg
|
||||
}
|
25
core/stringutil/slug_test.go
Normal file
25
core/stringutil/slug_test.go
Normal file
|
@ -0,0 +1,25 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestSlug(t *testing.T) {
|
||||
st(t, " Zip--up ", "zip-up")
|
||||
}
|
||||
|
||||
func st(t *testing.T, in, out string) {
|
||||
got := MakeSlug(in)
|
||||
if got != out {
|
||||
t.Errorf("slug input `%s` got `%s` expected `%s`\n", in, got, out)
|
||||
}
|
||||
}
|
78
core/stringutil/words.go
Normal file
78
core/stringutil/words.go
Normal file
|
@ -0,0 +1,78 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
|
||||
nethtml "golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Words returns a slice of words, where each word contains no whitespace, and each item of punctuation is its own word.
|
||||
// This functionality is provided to enable verification of the text extraction algorithm across different implemntations.
|
||||
func Words(ch HTML, inSqBr int, testMode bool) ([]string, int, error) {
|
||||
txt, err := ch.Text(testMode)
|
||||
if err != nil {
|
||||
return nil, inSqBr, err
|
||||
}
|
||||
txt = nethtml.UnescapeString(txt)
|
||||
|
||||
words := []string{""}
|
||||
|
||||
for _, c := range txt {
|
||||
if inSqBr > 0 {
|
||||
switch c {
|
||||
case ']':
|
||||
inSqBr--
|
||||
case '[':
|
||||
inSqBr++
|
||||
}
|
||||
} else {
|
||||
if c == rune(0x200B) { // http://en.wikipedia.org/wiki/Zero-width_space
|
||||
if testMode {
|
||||
c = ' ' // NOTE only replace with a space here if we are testing
|
||||
}
|
||||
}
|
||||
if c != rune(0x200B) { // http://en.wikipedia.org/wiki/Zero-width_space
|
||||
if c == '[' {
|
||||
inSqBr = 1
|
||||
words = append(words, "[") // open square bracket means potentially elided text
|
||||
words = append(words, "")
|
||||
} else {
|
||||
inSqBr = 0
|
||||
if unicode.IsPunct(c) || unicode.IsSymbol(c) || unicode.IsDigit(c) {
|
||||
if words[len(words)-1] == "" {
|
||||
words[len(words)-1] = string(c)
|
||||
} else {
|
||||
words = append(words, string(c))
|
||||
}
|
||||
words = append(words, "")
|
||||
} else {
|
||||
if unicode.IsGraphic(c) || unicode.IsSpace(c) {
|
||||
if unicode.IsSpace(c) {
|
||||
if words[len(words)-1] != "" {
|
||||
words = append(words, "")
|
||||
}
|
||||
} else {
|
||||
words[len(words)-1] += string(c)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !testMode { // add dummy punctuation if not in test mode to avoid incorrect sentance concatanation
|
||||
words = append(words, ".")
|
||||
}
|
||||
return append(words, ""), inSqBr, nil // make sure there is always a blank entry at the end
|
||||
}
|
57
core/stringutil/words_test.go
Normal file
57
core/stringutil/words_test.go
Normal file
|
@ -0,0 +1,57 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package stringutil
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestWords(t *testing.T) {
|
||||
ws(t, " the quick brown fox jumps over the lazy dog [ ] ["+string(rune(0x200B)), 0, true,
|
||||
"the quick brown fox jumps over the lazy dog [ [", 1)
|
||||
ws(t, "the quick brown [ dog jumps over the lazy ] fox", 0, false,
|
||||
"the quick brown [ fox .", 0)
|
||||
ws(t, "the quick brown;fox;", 0, false,
|
||||
"the quick brown ; fox ; .", 0)
|
||||
ws(t, "the ] quick brown fox ", 1, true,
|
||||
"quick brown fox", 0)
|
||||
}
|
||||
|
||||
func ws(t *testing.T, in string, bktIn int, isTest bool, out string, bktOut int) {
|
||||
wds := strings.Split(out, " ")
|
||||
gotX, bo, e := Words(HTML(in), bktIn, isTest)
|
||||
if e != nil {
|
||||
t.Fatal(e)
|
||||
}
|
||||
if bo != bktOut {
|
||||
t.Errorf("wrong bracket count returned: input `%s` bktIn %d bktOut %d\n", in, bktIn, bktOut)
|
||||
}
|
||||
got := make([]string, 0, len(gotX))
|
||||
for _, v := range gotX { // remove empty entries
|
||||
if v != "" {
|
||||
got = append(got, v)
|
||||
}
|
||||
}
|
||||
if len(got) != len(wds) {
|
||||
t.Errorf("wrong number of words found: input `%s` got %d %v expected %d %v`\n", in, len(got), got, len(wds), wds)
|
||||
} else {
|
||||
sort.Strings(wds)
|
||||
sort.Strings(got)
|
||||
for i := range wds {
|
||||
if wds[i] != got[i] {
|
||||
t.Errorf("wrong word[%d]: input `%s` got %v expected %v\n", i, in, got, wds)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue