1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-22 14:49:42 +02:00
documize/vendor/github.com/rookii/paicehusk/stemmer_test.go
2016-07-07 18:54:16 -07:00

180 lines
5 KiB
Go

// Test file for a Go implementation of the Paice/Husk Stemming algorithm:
// http://www.comp.lancs.ac.uk/computing/research/stemming/Links/paice.htm
// Copyright (c) 2012, Aaron Groves. All rights reserved.
package paicehusk
import (
"testing"
)
// Mostly checking for the Y special cases
var consonanttests = []struct {
word string
offset int
expected bool
}{
{"THEY", 0, true},
{"THEY", 1, true},
{"THEY", 2, false},
{"THEY", 3, true},
{"YOKE", 0, true},
{"synergy", 0, true},
{"synergy", 1, false},
{"synergy", 2, true},
{"synergy", 3, false},
{"synergy", 4, true},
{"synergy", 5, true},
{"synergy", 6, false},
{"男孩boy", 2, true}, // Unicode tests, I hope...
{"男孩boy", 3, false},
{"男孩boy", 4, true},
}
func TestConsonant(t *testing.T) {
for i, tt := range consonanttests {
s := consonant([]rune(tt.word), tt.offset)
if s != tt.expected {
t.Errorf("%v. consonant([]rune(\"%v\"), %v) should be %v, got %v", i, tt.word, tt.offset, tt.expected, s)
}
}
}
func TestVowel(t *testing.T) {
for i, tt := range consonanttests {
s := vowel([]rune(tt.word), tt.offset)
if s != !tt.expected {
t.Errorf("%v. vowel([]rune(\"%v\"), %v) should be %v, got %v", i, tt.word, tt.offset, !tt.expected, s)
}
}
}
// Ensure strings are revered properly
var reversetests = []struct {
in string
expected string
}{
{"Hello", "olleH"},
{"Here's a more complicated string to reverse.", ".esrever ot gnirts detacilpmoc erom a s'ereH"},
}
func TestReverse(t *testing.T) {
for i, tt := range reversetests {
s := reverse(tt.in)
if s != tt.expected {
t.Errorf("%v. reverse(\"%v\") should be %v, got %v", i, tt.in, tt.expected, s)
}
}
}
var ruletests = []struct {
rule string
valid bool
suf string
intact bool
num int
app string
cont bool
}{
{"ai*2.", true, "ai", true, 2, "", false},
{"lib3j>", true, "lib", false, 3, "j", true},
{"There's a rule here somewhere: afab*4fla>", true, "afab", true, 4, "fla", true},
{"ab*2 .", false, "", false, 0, "", false},
{"fire", false, "", false, 0, "", false},
{"asfa __ falkjlk ?!@|..", false, "", false, 0, "", false},
}
// Ensure rules are validated correctly
func TestValidRule(t *testing.T) {
for i, tt := range ruletests {
_, ok := ValidRule(tt.rule)
if ok != tt.valid {
t.Errorf("%v. ValidRule(\"%v\") should be %v, got %v", i, tt.rule, tt.valid, ok)
}
}
}
func TestParseRule(t *testing.T) {
for i, tt := range ruletests {
r, ok := ParseRule(tt.rule)
if ok != tt.valid {
t.Errorf("%v. ParseRule(\"%v\") err should be %v, got %v", i, tt.rule, tt.valid, ok)
} else if ok {
if r.suf != tt.suf {
t.Errorf("%v. r.suf should be \"%v\", got \"%v\"", i, tt.suf, r.suf)
}
if r.intact != tt.intact {
t.Errorf("%v. r.intact should be \"%v\", got \"%v\"", i, tt.intact, r.intact)
}
if r.num != tt.num {
t.Errorf("%v. r.num should be \"%v\", got \"%v\"", i, tt.num, r.num)
}
if r.app != tt.app {
t.Errorf("%v. r.app should be \"%v\", got \"%v\"", i, tt.app, r.app)
}
if r.cont != tt.cont {
t.Errorf("%v. r.cont should be \"%v\", got \"%v\"", i, tt.cont, r.cont)
}
}
}
}
func TestNewRuleTable(t *testing.T) {
f := []string{ruletests[0].rule, ruletests[1].rule, ruletests[2].rule, ruletests[3].rule, ruletests[4].rule, ruletests[5].rule}
table := NewRuleTable(f)
if len(table.Table) != 2 {
t.Errorf("Error: len(table.Table) should be %v, got %v", 2, len(table.Table))
}
if len(table.Table["a"]) != 2 {
t.Errorf("Error: len(table.Table[\"a\"]) should be %v, got %v", 2, len(table.Table))
}
}
var validstemtests = []struct {
stem string
valid bool
}{
{"xvzf", false}, // No vowels
{"fire", true},
{"aa", false}, // No consonant
{"ab", true},
{"a", false}, // No consonant
{"ba", false}, // A First letter consonant requires 3 letter stem
{"baa", true},
{"bba", true},
}
func TestValidStem(t *testing.T) {
for i, tt := range validstemtests {
ok := validStem(tt.stem)
if ok != tt.valid {
t.Errorf("%v. validStem(\"%v\") should be %v, got %v", i, tt.stem, tt.valid, ok)
}
}
}
var stemtests = []struct {
in string
expecting string
}{
{"at", "at"}, // To short
{"rack", "rack"}, // No 'k' rules exist
{"aaron", "aaron"}, // 'N' rules exist but no 'n', or 'no' rule
{"splat", "splat"}, // Resulting stem has no vowels
{"doat", "doat"}, // Resulting stem starts with a consonant but only has 2 letters
{"eat", "eat"}, // Resulting stem starts with a vowel but has only 1 letter
{"ikat", "ik"}, // Resulting stem starts with a vowel and has 2 letters
{"foreseen", "foreseen"}, // Check Protect Rule
{"Ariaan", "aria"}, // Check intact rule
{"explosion", "explod"}, // Check replace rule
{"complicate", "comply"}, // Check partial replacement
{"EXPLOSION", "explod"}, // Check all caps
}
func TestStem(t *testing.T) {
for i, tt := range stemtests {
if test := DefaultRules.Stem(tt.in); test != tt.expecting {
t.Errorf("%v. Error: stemming \"%v\", expected %v, got %v", i, tt.in, tt.expecting, test)
}
}
}