diff --git a/README.md b/README.md index 2ee29e63..30e0d91a 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,10 @@ Documize is compatible with Auth0 identity as a service. JWT Auth for open source projects +## Word Conversion to HTML + +* [Code for ```wordconvert``` utility](https://github.com/documize/community/tree/master/cmd/wordconvert) + ## Legal https://documize.com diff --git a/build-wordconvert.sh b/build-wordconvert.sh new file mode 100755 index 00000000..9c096f08 --- /dev/null +++ b/build-wordconvert.sh @@ -0,0 +1,18 @@ +#! /bin/bash + +NOW=$(date) +echo "Build process started $NOW" + +for arch in amd64 ; do + for os in darwin linux windows ; do + if [ "$os" == "windows" ] ; then + echo "Compiling wordconvert.exe" + env GOOS=$os GOARCH=$arch go build -o bin/wordconvert.exe ./cmd/wordconvert + else + echo "Compiling wordconvert-$os" + env GOOS=$os GOARCH=$arch go build -o bin/wordconvert-$os ./cmd/wordconvert + fi + done +done + +echo "Finished." diff --git a/cmd/wordconvert/README.md b/cmd/wordconvert/README.md new file mode 100644 index 00000000..762f8d9e --- /dev/null +++ b/cmd/wordconvert/README.md @@ -0,0 +1,2 @@ +This directory contains a command line utility to convert multiple word files using +[the Documize word conversion API](https://documize.com/word-convert). \ No newline at end of file diff --git a/cmd/wordconvert/wordconvert.go b/cmd/wordconvert/wordconvert.go new file mode 100644 index 00000000..97a74bd6 --- /dev/null +++ b/cmd/wordconvert/wordconvert.go @@ -0,0 +1,241 @@ +// Copyright 2016 Documize Inc. . All rights reserved. +// +// This software (Documize Community Edition) is licensed under +// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html +// +// You can operate outside the AGPL restrictions by purchasing +// Documize Enterprise Edition and obtaining a commercial license +// by contacting . +// +// https://documize.com + +// Package main contains a command line utility to convert multiple word documents using api.documize.com +package main + +import ( + "archive/zip" + "bytes" + "crypto/tls" + "errors" + "flag" + "fmt" + "io" + "io/ioutil" + "mime/multipart" + "net" + "net/http" + "os" + "path" + "strings" +) + +const serverURLfmt = "https://%s/api/1/word" + +var server = flag.String("s", "api.documize.com:443", "the server") +var outputDir = flag.String("o", ".", "specify the directory to hold the output") +var ignoreBadCert = flag.Bool("k", false, "ignore bad certificate errors") +var verbose = flag.Bool("v", false, "verbose progress messages") +var stayziped = flag.Bool("z", false, "do not automatically unzip content") +var token = flag.String("t", "", "authorization token (if you use your e-mail address here during preview period, we will tell you before changes are made)") +var ignoreErrs = flag.Bool("e", false, "report errors on individual files, but continue") +var version = flag.Bool("version", false, "display the version of this code") + +// does the file have a valid extension +func validXtn(fn string) bool { + lcfn := strings.ToLower(fn) + for _, xtn := range []string{".doc", ".docx", ".pdf"} { + if strings.HasSuffix(lcfn, xtn) { + return true + } + } + return false +} + +// errCanContinue is the mechanism to print errors yet continue, if that command line option is chosen +func errCanContinue(can bool, err error) bool { + if err == nil { + return false + } + fmt.Fprintln(os.Stderr, err) + if *ignoreErrs && can { + return true + } + os.Exit(0) + return true // never reached +} + +func main() { + + flag.Parse() + + if *version { + fmt.Println("Version: 0.1 preview") + } + + if *outputDir != "." { + if err := os.Mkdir(*outputDir, 0777); err != nil && !os.IsExist(err) { + errCanContinue(false, err) + } + } + + host, _, err := net.SplitHostPort(*server) + errCanContinue(false, err) + + tlc := &tls.Config{ + InsecureSkipVerify: *ignoreBadCert, + ServerName: host, + } + + transport := &http.Transport{TLSClientConfig: tlc} + hclient := &http.Client{Transport: transport} + + processFiles(hclient) + + os.Exit(1) +} + +func processFiles(hclient *http.Client) { + + for _, fileName := range flag.Args() { + + if validXtn(fileName) { + + if *verbose { + fmt.Println("processing", fileName) + } + + content, err := ioutil.ReadFile(fileName) + if errCanContinue(true, err) { + continue + } + + bodyBuf := &bytes.Buffer{} + bodyWriter := multipart.NewWriter(bodyBuf) + + _, fn := path.Split(fileName) + fileWriter, err := bodyWriter.CreateFormFile("wordfile", fn) // name as expected by the API + if errCanContinue(true, err) { + continue + } + + _, err = io.Copy(fileWriter, bytes.NewReader(content)) + if errCanContinue(true, err) { + continue + } + + contentType := bodyWriter.FormDataContentType() + err = bodyWriter.Close() + if errCanContinue(true, err) { + continue + } + + target := fmt.Sprintf(serverURLfmt, *server) + if *token != "" { + target += "?token=" + *token // NOTE: after the preview phase, token will not be optional + } + + req, err := http.NewRequest("POST", + target, + bodyBuf) + if errCanContinue(true, err) { + continue + } + + req.Header.Set("Content-Type", contentType) + resp, err := hclient.Do(req) + if errCanContinue(true, err) { + continue + } + + zipdata, err := ioutil.ReadAll(resp.Body) + if errCanContinue(true, err) { + continue + } + + resp.Body.Close() // ignore error + + if resp.StatusCode != http.StatusOK { + if errCanContinue(true, errors.New("server returned status: "+resp.Status)) { + continue + } + } + + targetDir := *outputDir + "/" + fn + ".content" + if *stayziped { + if err := ioutil.WriteFile(targetDir+".zip", zipdata, 0666); err != nil { + if errCanContinue(true, err) { + continue + } + } + } else { + if errCanContinue(true, unzipFiles(zipdata, targetDir)) { + continue + } + } + + } else { + + if *verbose { + fmt.Println("ignored", fileName) + } + + } + } +} + +// simple unzip +func unzipFiles(zipdata []byte, targetDir string) error { + + rdr, err := zip.NewReader(bytes.NewReader(zipdata), int64(len(zipdata))) + if err != nil { + return err + } + + if err := os.Mkdir(targetDir, 0777); err != nil && !os.IsExist(err) { // make sure the target directory exists + return err + } + +fileLoop: + for _, zf := range rdr.File { + frc, err := zf.Open() + if errCanContinue(true, err) { + continue + } + + filedata, err := ioutil.ReadAll(frc) + if errCanContinue(true, err) { + continue + } + + subTarget := targetDir + "/" + zf.Name + + subDir := path.Dir(subTarget) + + if subDir != targetDir { + rump := strings.TrimPrefix(subDir, targetDir) + tree := strings.Split(rump, "/") + built := "" + for _, thisPart := range tree[1:] { // make sure we have a directory at each level of the tree + built += "/" + thisPart + if err := os.Mkdir(targetDir+built, 0777); err != nil && !os.IsExist(err) { + if errCanContinue(true, err) { + continue fileLoop + } + } + } + } + + if err := ioutil.WriteFile(subTarget, filedata, 0666); err != nil { + if errCanContinue(true, err) { + continue + } + } + + if *verbose { + fmt.Println("wrote", subTarget) + } + frc.Close() + } + + return nil +}