From a9e182de2e9b51871304e12af4f509f5b7f036c6 Mon Sep 17 00:00:00 2001 From: Elliott Stoneham Date: Thu, 22 Sep 2016 15:16:13 +0100 Subject: [PATCH 1/4] first cut word convert --- cmd/wordconvert/README.md | 2 + cmd/wordconvert/main.go | 227 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 229 insertions(+) create mode 100644 cmd/wordconvert/README.md create mode 100644 cmd/wordconvert/main.go diff --git a/cmd/wordconvert/README.md b/cmd/wordconvert/README.md new file mode 100644 index 00000000..762f8d9e --- /dev/null +++ b/cmd/wordconvert/README.md @@ -0,0 +1,2 @@ +This directory contains a command line utility to convert multiple word files using +[the Documize word conversion API](https://documize.com/word-convert). \ No newline at end of file diff --git a/cmd/wordconvert/main.go b/cmd/wordconvert/main.go new file mode 100644 index 00000000..65e0f4b8 --- /dev/null +++ b/cmd/wordconvert/main.go @@ -0,0 +1,227 @@ +// Copyright 2016 Documize Inc. . All rights reserved. +// +// This software (Documize Community Edition) is licensed under +// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html +// +// You can operate outside the AGPL restrictions by purchasing +// Documize Enterprise Edition and obtaining a commercial license +// by contacting . +// +// https://documize.com + +// Package main contains a command line utility to convert multiple word documents using api.documize.com +package main + +import ( + "archive/zip" + "bytes" + "crypto/tls" + "errors" + "flag" + "fmt" + "io" + "io/ioutil" + "mime/multipart" + "net" + "net/http" + "os" + "path" + "strings" +) + +const serverURLfmt = "https://%s/api/1/word" + +var server = flag.String("s", "api.documize.com:443", "the server") +var outputDir = flag.String("o", ".", "specify the directory to hold the output") +var ignoreBadCert = flag.Bool("k", false, "ignore bad certificate errors") +var verbose = flag.Bool("v", false, "verbose progress messages") +var stayziped = flag.Bool("z", false, "do not automatically unzip content") +var token = flag.String("t", "", "authorization token (use your e-mail address during beta period)") +var ignoreErrs = flag.Bool("e", false, "report errors on individual files, but continue") + +func validXtn(fn string) bool { + lcfn := strings.ToLower(fn) + for _, xtn := range []string{".doc", ".docx", ".pdf"} { + if strings.HasSuffix(lcfn, xtn) { + return true + } + } + return false +} + +func errCanContinue(can bool, err error) bool { + if err == nil { + return false + } + fmt.Fprintln(os.Stderr, err) + if *ignoreErrs && can { + return true + } + os.Exit(0) + return true // never reached +} + +func main() { + + flag.Parse() + + if *outputDir != "." { + if err := os.Mkdir(*outputDir, 0777); err != nil && !os.IsExist(err) { + errCanContinue(false, err) + } + } + + host, _, err := net.SplitHostPort(*server) + errCanContinue(false, err) + + tlc := &tls.Config{ + InsecureSkipVerify: *ignoreBadCert, + ServerName: host, + } + + transport := &http.Transport{TLSClientConfig: tlc} + hclient := &http.Client{Transport: transport} + + processFiles(hclient) + + os.Exit(1) +} + +func processFiles(hclient *http.Client) { + + for _, fileName := range flag.Args() { + if validXtn(fileName) { + if *verbose { + fmt.Println("processing", fileName) + } + + content, err := ioutil.ReadFile(fileName) + if errCanContinue(true, err) { + continue + } + + bodyBuf := &bytes.Buffer{} + bodyWriter := multipart.NewWriter(bodyBuf) + + _, fn := path.Split(fileName) + fileWriter, err := bodyWriter.CreateFormFile("wordfile", fn) + if errCanContinue(true, err) { + continue + } + + _, err = io.Copy(fileWriter, bytes.NewReader(content)) + if errCanContinue(true, err) { + continue + } + + contentType := bodyWriter.FormDataContentType() + err = bodyWriter.Close() + if errCanContinue(true, err) { + continue + } + + target := fmt.Sprintf(serverURLfmt, *server) + if *token != "" { + target += "?token=" + *token + } + + req, err := http.NewRequest("POST", + target, + bodyBuf) + if errCanContinue(true, err) { + continue + } + + req.Header.Set("Content-Type", contentType) + resp, err := hclient.Do(req) + if errCanContinue(true, err) { + continue + } + + zipdata, err := ioutil.ReadAll(resp.Body) + if errCanContinue(true, err) { + continue + } + + resp.Body.Close() // ignore error + + if resp.StatusCode != http.StatusOK { + if errCanContinue(true, errors.New("server returned status: "+resp.Status)) { + continue + } + } + + targetDir := *outputDir + "/" + fn + ".content" + if *stayziped { + if err := ioutil.WriteFile(targetDir+".zip", zipdata, 0666); err != nil { + if errCanContinue(true, err) { + continue + } + } + } else { + if errCanContinue(true, unzipFiles(zipdata, targetDir)) { + continue + } + } + } else { + if *verbose { + fmt.Println("ignored", fileName) + } + } + } +} + +func unzipFiles(zipdata []byte, targetDir string) error { + rdr, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) + if err != nil { + return err + } + + if err := os.Mkdir(targetDir, 0777); err != nil && !os.IsExist(err) { + return err + } + +fileLoop: + for _, zf := range rdr.File { + frc, err := zf.Open() + if errCanContinue(true, err) { + continue + } + + filedata, err := ioutil.ReadAll(frc) + if errCanContinue(true, err) { + continue + } + + subTarget := targetDir + "/" + zf.Name + + subDir := path.Dir(subTarget) + + if subDir != targetDir { + rump := strings.TrimPrefix(subDir, targetDir) + tree := strings.Split(rump, "/") + built := "" + for _, thisPart := range tree[1:] { // make sure we have a directory at each level of the tree + built += "/" + thisPart + if err := os.Mkdir(targetDir+built, 0777); err != nil && !os.IsExist(err) { + if errCanContinue(true, err) { + continue fileLoop + } + } + } + } + + if err := ioutil.WriteFile(subTarget, filedata, 0666); err != nil { + if errCanContinue(true, err) { + continue + } + } + + if *verbose { + fmt.Println("wrote", subTarget) + } + frc.Close() + } + + return nil +} From 43a951f02e9f2c767dbc0541b0190a8161ad2270 Mon Sep 17 00:00:00 2001 From: Elliott Stoneham Date: Thu, 22 Sep 2016 21:02:41 +0100 Subject: [PATCH 2/4] add word convert --- build-wordconvert.sh | 19 +++++++++++++++++++ cmd/wordconvert/main.go | 7 ++++++- 2 files changed, 25 insertions(+), 1 deletion(-) create mode 100755 build-wordconvert.sh diff --git a/build-wordconvert.sh b/build-wordconvert.sh new file mode 100755 index 00000000..67da2333 --- /dev/null +++ b/build-wordconvert.sh @@ -0,0 +1,19 @@ +#! /bin/bash + +NOW=$(date) +echo "Build process started $NOW" + +cd .. +for arch in amd64 ; do + for os in darwin linux windows ; do + if [ "$os" == "windows" ] ; then + echo "Compiling wordconvert-$os.exe" + env GOOS=$os GOARCH=$arch go build -o ./bin/wordconvert-$os.exe github.com/documize/community/cmd/wordconvert + else + echo "Compiling wordconvert-$os" + env GOOS=$os GOARCH=$arch go build -o ./bin/wordconvert-$os github.com/documize/community/cmd/wordconvert + fi + done +done + +echo "Finished." diff --git a/cmd/wordconvert/main.go b/cmd/wordconvert/main.go index 65e0f4b8..b71d02db 100644 --- a/cmd/wordconvert/main.go +++ b/cmd/wordconvert/main.go @@ -36,8 +36,9 @@ var outputDir = flag.String("o", ".", "specify the directory to hold the output" var ignoreBadCert = flag.Bool("k", false, "ignore bad certificate errors") var verbose = flag.Bool("v", false, "verbose progress messages") var stayziped = flag.Bool("z", false, "do not automatically unzip content") -var token = flag.String("t", "", "authorization token (use your e-mail address during beta period)") +var token = flag.String("t", "", "authorization token (if you use your e-mail address here during preview period, we will tell you before changes are made)") var ignoreErrs = flag.Bool("e", false, "report errors on individual files, but continue") +var version = flag.Bool("version", false, "display the version of this code") func validXtn(fn string) bool { lcfn := strings.ToLower(fn) @@ -65,6 +66,10 @@ func main() { flag.Parse() + if *version { + fmt.Println("Version: 0.1 preview") + } + if *outputDir != "." { if err := os.Mkdir(*outputDir, 0777); err != nil && !os.IsExist(err) { errCanContinue(false, err) From 2f19d40d63428be94a9f62d38e8074f493a0e356 Mon Sep 17 00:00:00 2001 From: Elliott Stoneham Date: Fri, 23 Sep 2016 14:29:31 +0100 Subject: [PATCH 3/4] fix word convert build error --- build-wordconvert.sh | 7 +++---- cmd/wordconvert/{main.go => wordconvert.go} | 0 2 files changed, 3 insertions(+), 4 deletions(-) rename cmd/wordconvert/{main.go => wordconvert.go} (100%) diff --git a/build-wordconvert.sh b/build-wordconvert.sh index 67da2333..9c096f08 100755 --- a/build-wordconvert.sh +++ b/build-wordconvert.sh @@ -3,15 +3,14 @@ NOW=$(date) echo "Build process started $NOW" -cd .. for arch in amd64 ; do for os in darwin linux windows ; do if [ "$os" == "windows" ] ; then - echo "Compiling wordconvert-$os.exe" - env GOOS=$os GOARCH=$arch go build -o ./bin/wordconvert-$os.exe github.com/documize/community/cmd/wordconvert + echo "Compiling wordconvert.exe" + env GOOS=$os GOARCH=$arch go build -o bin/wordconvert.exe ./cmd/wordconvert else echo "Compiling wordconvert-$os" - env GOOS=$os GOARCH=$arch go build -o ./bin/wordconvert-$os github.com/documize/community/cmd/wordconvert + env GOOS=$os GOARCH=$arch go build -o bin/wordconvert-$os ./cmd/wordconvert fi done done diff --git a/cmd/wordconvert/main.go b/cmd/wordconvert/wordconvert.go similarity index 100% rename from cmd/wordconvert/main.go rename to cmd/wordconvert/wordconvert.go From a8a0ee7c6f8bba13f32c4443d1580b39ef9732d9 Mon Sep 17 00:00:00 2001 From: Elliott Stoneham Date: Fri, 23 Sep 2016 15:31:42 +0100 Subject: [PATCH 4/4] tidy code, link to wordconvert from README.md --- README.md | 4 ++++ cmd/wordconvert/wordconvert.go | 15 ++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2ee29e63..30e0d91a 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,10 @@ Documize is compatible with Auth0 identity as a service. JWT Auth for open source projects +## Word Conversion to HTML + +* [Code for ```wordconvert``` utility](https://github.com/documize/community/tree/master/cmd/wordconvert) + ## Legal https://documize.com diff --git a/cmd/wordconvert/wordconvert.go b/cmd/wordconvert/wordconvert.go index b71d02db..97a74bd6 100644 --- a/cmd/wordconvert/wordconvert.go +++ b/cmd/wordconvert/wordconvert.go @@ -40,6 +40,7 @@ var token = flag.String("t", "", "authorization token (if you use your e-mail ad var ignoreErrs = flag.Bool("e", false, "report errors on individual files, but continue") var version = flag.Bool("version", false, "display the version of this code") +// does the file have a valid extension func validXtn(fn string) bool { lcfn := strings.ToLower(fn) for _, xtn := range []string{".doc", ".docx", ".pdf"} { @@ -50,6 +51,7 @@ func validXtn(fn string) bool { return false } +// errCanContinue is the mechanism to print errors yet continue, if that command line option is chosen func errCanContinue(can bool, err error) bool { if err == nil { return false @@ -95,7 +97,9 @@ func main() { func processFiles(hclient *http.Client) { for _, fileName := range flag.Args() { + if validXtn(fileName) { + if *verbose { fmt.Println("processing", fileName) } @@ -109,7 +113,7 @@ func processFiles(hclient *http.Client) { bodyWriter := multipart.NewWriter(bodyBuf) _, fn := path.Split(fileName) - fileWriter, err := bodyWriter.CreateFormFile("wordfile", fn) + fileWriter, err := bodyWriter.CreateFormFile("wordfile", fn) // name as expected by the API if errCanContinue(true, err) { continue } @@ -127,7 +131,7 @@ func processFiles(hclient *http.Client) { target := fmt.Sprintf(serverURLfmt, *server) if *token != "" { - target += "?token=" + *token + target += "?token=" + *token // NOTE: after the preview phase, token will not be optional } req, err := http.NewRequest("POST", @@ -168,21 +172,26 @@ func processFiles(hclient *http.Client) { continue } } + } else { + if *verbose { fmt.Println("ignored", fileName) } + } } } +// simple unzip func unzipFiles(zipdata []byte, targetDir string) error { + rdr, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) if err != nil { return err } - if err := os.Mkdir(targetDir, 0777); err != nil && !os.IsExist(err) { + if err := os.Mkdir(targetDir, 0777); err != nil && !os.IsExist(err) { // make sure the target directory exists return err }