mirror of
https://github.com/documize/community.git
synced 2025-07-19 05:09:42 +02:00
Improved MS Word document name extraction
Use document filename and not H1 title inside document. Supports both Linux & MSFT.
This commit is contained in:
parent
f3e66b73c1
commit
7f2d2c01a6
2 changed files with 79 additions and 4 deletions
|
@ -17,6 +17,7 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
api "github.com/documize/community/core/convapi"
|
||||
|
@ -284,11 +285,11 @@ func convertFileResult(filename string, fileResult *api.DocumentConversionRespon
|
|||
document.Job = ""
|
||||
document.Location = filename
|
||||
|
||||
// Make document name from filename minus extension.
|
||||
document.Name = GetDocumentNameFromFilename(filename)
|
||||
document.Slug = stringutil.MakeSlug(document.Name)
|
||||
|
||||
if fileResult != nil {
|
||||
if len(fileResult.Pages) > 0 {
|
||||
document.Name = fileResult.Pages[0].Title
|
||||
document.Slug = stringutil.MakeSlug(fileResult.Pages[0].Title)
|
||||
}
|
||||
document.Excerpt = fileResult.Excerpt
|
||||
}
|
||||
|
||||
|
@ -296,3 +297,24 @@ func convertFileResult(filename string, fileResult *api.DocumentConversionRespon
|
|||
|
||||
return document
|
||||
}
|
||||
|
||||
// GetDocumentNameFromFilename strips path and extension.
|
||||
func GetDocumentNameFromFilename(filename string) (dn string) {
|
||||
dn = filename
|
||||
|
||||
// First try Linux separator.
|
||||
t := strings.SplitAfter(filename, "/")
|
||||
if len(t) > 1 {
|
||||
dn = t[len(t)-1]
|
||||
} else {
|
||||
// Now try Linux separator.
|
||||
t = strings.SplitAfter(filename, "\\")
|
||||
if len(t) > 1 {
|
||||
dn = t[len(t)-1]
|
||||
}
|
||||
}
|
||||
|
||||
// Remove file extension.
|
||||
dn = strings.TrimSuffix(dn, filepath.Ext(dn))
|
||||
return
|
||||
}
|
||||
|
|
53
domain/conversion/conversion_test.go
Normal file
53
domain/conversion/conversion_test.go
Normal file
|
@ -0,0 +1,53 @@
|
|||
package conversion
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestFilename validates filename extraction from path
|
||||
func TestFilename(t *testing.T) {
|
||||
fn := "/var/folders/vx/lyhy36cn5kl994qj0pt6hgb80000gp/T/documize/_uploads/970f9d07-9bea-48a2-4333-b3c50bca42cd/Demo.docx"
|
||||
t.Run("Test filename "+fn, func(t *testing.T) {
|
||||
f := GetDocumentNameFromFilename(fn)
|
||||
if f != "Demo" {
|
||||
t.Error("Expected Demo, got " + f)
|
||||
}
|
||||
t.Log(f)
|
||||
})
|
||||
|
||||
fn = "/var/Demo Docs.docx"
|
||||
t.Run("Test filename "+fn, func(t *testing.T) {
|
||||
f := GetDocumentNameFromFilename(fn)
|
||||
if f != "Demo Docs" {
|
||||
t.Error("Expected Demo Docs, got " + f)
|
||||
}
|
||||
t.Log(f)
|
||||
})
|
||||
|
||||
fn = "Demo Docs.docx"
|
||||
t.Run("Test filename "+fn, func(t *testing.T) {
|
||||
f := GetDocumentNameFromFilename(fn)
|
||||
if f != "Demo Docs" {
|
||||
t.Error("Expected Demo Docs, got " + f)
|
||||
}
|
||||
t.Log(f)
|
||||
})
|
||||
|
||||
fn = "/DemoDocs.docx"
|
||||
t.Run("Test filename "+fn, func(t *testing.T) {
|
||||
f := GetDocumentNameFromFilename(fn)
|
||||
if f != "DemoDocs" {
|
||||
t.Error("Expected DemoDocs, got " + f)
|
||||
}
|
||||
t.Log(f)
|
||||
})
|
||||
|
||||
fn = "a\\b\\c\\DemoDocs.docx.ppt"
|
||||
t.Run("Test filename "+fn, func(t *testing.T) {
|
||||
f := GetDocumentNameFromFilename(fn)
|
||||
if f != "DemoDocs.docx" {
|
||||
t.Error("Expected DemoDocs.docx, got " + f)
|
||||
}
|
||||
t.Log(f)
|
||||
})
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue