1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-19 05:09:42 +02:00

Improved MS Word document name extraction

Use document filename and not H1 title inside document.

Supports both Linux & MSFT.
This commit is contained in:
McMatts 2019-02-05 20:25:36 +00:00
parent f3e66b73c1
commit 7f2d2c01a6
2 changed files with 79 additions and 4 deletions

View file

@ -17,6 +17,7 @@ import (
"fmt"
"io"
"net/http"
"path/filepath"
"strings"
api "github.com/documize/community/core/convapi"
@ -284,11 +285,11 @@ func convertFileResult(filename string, fileResult *api.DocumentConversionRespon
document.Job = ""
document.Location = filename
// Make document name from filename minus extension.
document.Name = GetDocumentNameFromFilename(filename)
document.Slug = stringutil.MakeSlug(document.Name)
if fileResult != nil {
if len(fileResult.Pages) > 0 {
document.Name = fileResult.Pages[0].Title
document.Slug = stringutil.MakeSlug(fileResult.Pages[0].Title)
}
document.Excerpt = fileResult.Excerpt
}
@ -296,3 +297,24 @@ func convertFileResult(filename string, fileResult *api.DocumentConversionRespon
return document
}
// GetDocumentNameFromFilename strips path and extension.
func GetDocumentNameFromFilename(filename string) (dn string) {
dn = filename
// First try Linux separator.
t := strings.SplitAfter(filename, "/")
if len(t) > 1 {
dn = t[len(t)-1]
} else {
// Now try Linux separator.
t = strings.SplitAfter(filename, "\\")
if len(t) > 1 {
dn = t[len(t)-1]
}
}
// Remove file extension.
dn = strings.TrimSuffix(dn, filepath.Ext(dn))
return
}

View file

@ -0,0 +1,53 @@
package conversion
import (
"testing"
)
// TestFilename validates filename extraction from path
func TestFilename(t *testing.T) {
fn := "/var/folders/vx/lyhy36cn5kl994qj0pt6hgb80000gp/T/documize/_uploads/970f9d07-9bea-48a2-4333-b3c50bca42cd/Demo.docx"
t.Run("Test filename "+fn, func(t *testing.T) {
f := GetDocumentNameFromFilename(fn)
if f != "Demo" {
t.Error("Expected Demo, got " + f)
}
t.Log(f)
})
fn = "/var/Demo Docs.docx"
t.Run("Test filename "+fn, func(t *testing.T) {
f := GetDocumentNameFromFilename(fn)
if f != "Demo Docs" {
t.Error("Expected Demo Docs, got " + f)
}
t.Log(f)
})
fn = "Demo Docs.docx"
t.Run("Test filename "+fn, func(t *testing.T) {
f := GetDocumentNameFromFilename(fn)
if f != "Demo Docs" {
t.Error("Expected Demo Docs, got " + f)
}
t.Log(f)
})
fn = "/DemoDocs.docx"
t.Run("Test filename "+fn, func(t *testing.T) {
f := GetDocumentNameFromFilename(fn)
if f != "DemoDocs" {
t.Error("Expected DemoDocs, got " + f)
}
t.Log(f)
})
fn = "a\\b\\c\\DemoDocs.docx.ppt"
t.Run("Test filename "+fn, func(t *testing.T) {
f := GetDocumentNameFromFilename(fn)
if f != "DemoDocs.docx" {
t.Error("Expected DemoDocs.docx, got " + f)
}
t.Log(f)
})
}