mirror of
https://github.com/documize/community.git
synced 2025-07-19 21:29:42 +02:00
restructure directories
This commit is contained in:
parent
7e4ed6545b
commit
a2ce777762
159 changed files with 320 additions and 323 deletions
384
core/api/convert/html/html_test.go
Normal file
384
core/api/convert/html/html_test.go
Normal file
|
@ -0,0 +1,384 @@
|
|||
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
|
||||
//
|
||||
// This software (Documize Community Edition) is licensed under
|
||||
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
|
||||
//
|
||||
// You can operate outside the AGPL restrictions by purchasing
|
||||
// Documize Enterprise Edition and obtaining a commercial license
|
||||
// by contacting <sales@documize.com>.
|
||||
//
|
||||
// https://documize.com
|
||||
|
||||
package html_test
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
import api "github.com/documize/community/core/convapi"
|
||||
import "github.com/documize/community/core/api/convert/html"
|
||||
|
||||
const b string = `
|
||||
<h1>Markdown: Basics</h1>
|
||||
|
||||
<ul id="ProjectSubmenu">
|
||||
<li><a href="/projects/markdown/" title="Markdown Project Page">Main</a></li>
|
||||
<li><a class="selected" title="Markdown Basics">Basics</a></li>
|
||||
<li><a href="/projects/markdown/syntax" title="Markdown Syntax Documentation">Syntax</a></li>
|
||||
<li><a href="/projects/markdown/license" title="Pricing and License Information">License</a></li>
|
||||
<li><a href="/projects/markdown/dingus" title="Online Markdown Web Form">Dingus</a></li>
|
||||
</ul>
|
||||
|
||||
<h2>Getting the Gist of Markdown's Formatting Syntax</h2>
|
||||
|
||||
<p>This page offers a brief overview of what it's like to use Markdown.
|
||||
The <a href="/projects/markdown/syntax" title="Markdown Syntax">syntax page</a> provides complete, detailed documentation for
|
||||
every feature, but Markdown should be very easy to pick up simply by
|
||||
looking at a few examples of it in action. The examples on this page
|
||||
are written in a before/after style, showing example syntax and the
|
||||
HTML output produced by Markdown.</p>
|
||||
|
||||
<p>It's also helpful to simply try Markdown out; the <a href="/projects/markdown/dingus" title="Markdown Dingus">Dingus</a> is a
|
||||
web application that allows you type your own Markdown-formatted text
|
||||
and translate it to XHTML.</p>
|
||||
|
||||
<p><strong>Note:</strong> This document is itself written using Markdown; you
|
||||
can <a href="/projects/markdown/basics.text">see the source for it by adding '.text' to the URL</a>.</p>
|
||||
|
||||
<h2>Paragraphs, Headers, Blockquotes</h2>
|
||||
|
||||
<p>A paragraph is simply one or more consecutive lines of text, separated
|
||||
by one or more blank lines. (A blank line is any line that looks like a
|
||||
blank line -- a line containing nothing spaces or tabs is considered
|
||||
blank.) Normal paragraphs should not be intended with spaces or tabs.</p>
|
||||
|
||||
<p>Markdown offers two styles of headers: <em>Setext</em> and <em>atx</em>.
|
||||
Setext-style headers for <code><h1></code> and <code><h2></code> are created by
|
||||
"underlining" with equal signs (<code>=</code>) and hyphens (<code>-</code>), respectively.
|
||||
To create an atx-style header, you put 1-6 hash marks (<code>#</code>) at the
|
||||
beginning of the line -- the number of hashes equals the resulting
|
||||
HTML header level.</p>
|
||||
|
||||
<p>Blockquotes are indicated using email-style '<code>></code>' angle brackets.</p>
|
||||
|
||||
<p>Markdown:</p>
|
||||
|
||||
<pre><code>A First Level Header
|
||||
====================
|
||||
|
||||
A Second Level Header
|
||||
---------------------
|
||||
|
||||
Now is the time for all good men to come to
|
||||
the aid of their country. This is just a
|
||||
regular paragraph.
|
||||
|
||||
The quick brown fox jumped over the lazy
|
||||
dog's back.
|
||||
|
||||
### Header 3
|
||||
|
||||
> This is a blockquote.
|
||||
>
|
||||
> This is the second paragraph in the blockquote.
|
||||
>
|
||||
> ## This is an H2 in a blockquote
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><h1>A First Level Header</h1>
|
||||
|
||||
<h2>A Second Level Header</h2>
|
||||
|
||||
<p>Now is the time for all good men to come to
|
||||
the aid of their country. This is just a
|
||||
regular paragraph.</p>
|
||||
|
||||
<p>The quick brown fox jumped over the lazy
|
||||
dog's back.</p>
|
||||
|
||||
<h3>Header 3</h3>
|
||||
|
||||
<blockquote>
|
||||
<p>This is a blockquote.</p>
|
||||
|
||||
<p>This is the second paragraph in the blockquote.</p>
|
||||
|
||||
<h2>This is an H2 in a blockquote</h2>
|
||||
</blockquote>
|
||||
</code></pre>
|
||||
|
||||
<h3>Phrase Emphasis</h3>
|
||||
|
||||
<p>Markdown uses asterisks and underscores to indicate spans of emphasis.</p>
|
||||
|
||||
<p>Markdown:</p>
|
||||
|
||||
<pre><code>Some of these words *are emphasized*.
|
||||
Some of these words _are emphasized also_.
|
||||
|
||||
Use two asterisks for **strong emphasis**.
|
||||
Or, if you prefer, __use two underscores instead__.
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>Some of these words <em>are emphasized</em>.
|
||||
Some of these words <em>are emphasized also</em>.</p>
|
||||
|
||||
<p>Use two asterisks for <strong>strong emphasis</strong>.
|
||||
Or, if you prefer, <strong>use two underscores instead</strong>.</p>
|
||||
</code></pre>
|
||||
|
||||
<h2>Lists</h2>
|
||||
|
||||
<p>Unordered (bulleted) lists use asterisks, pluses, and hyphens (<code>*</code>,
|
||||
<code>+</code>, and <code>-</code>) as list markers. These three markers are
|
||||
interchangable; this:</p>
|
||||
|
||||
<pre><code>* Candy.
|
||||
* Gum.
|
||||
* Booze.
|
||||
</code></pre>
|
||||
|
||||
<p>this:</p>
|
||||
|
||||
<pre><code>+ Candy.
|
||||
+ Gum.
|
||||
+ Booze.
|
||||
</code></pre>
|
||||
|
||||
<p>and this:</p>
|
||||
|
||||
<pre><code>- Candy.
|
||||
- Gum.
|
||||
- Booze.
|
||||
</code></pre>
|
||||
|
||||
<p>all produce the same output:</p>
|
||||
|
||||
<pre><code><ul>
|
||||
<li>Candy.</li>
|
||||
<li>Gum.</li>
|
||||
<li>Booze.</li>
|
||||
</ul>
|
||||
</code></pre>
|
||||
|
||||
<p>Ordered (numbered) lists use regular numbers, followed by periods, as
|
||||
list markers:</p>
|
||||
|
||||
<pre><code>1. Red
|
||||
2. Green
|
||||
3. Blue
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><ol>
|
||||
<li>Red</li>
|
||||
<li>Green</li>
|
||||
<li>Blue</li>
|
||||
</ol>
|
||||
</code></pre>
|
||||
|
||||
<p>If you put blank lines between items, you'll get <code><p></code> tags for the
|
||||
list item text. You can create multi-paragraph list items by indenting
|
||||
the paragraphs by 4 spaces or 1 tab:</p>
|
||||
|
||||
<pre><code>* A list item.
|
||||
|
||||
With multiple paragraphs.
|
||||
|
||||
* Another item in the list.
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><ul>
|
||||
<li><p>A list item.</p>
|
||||
<p>With multiple paragraphs.</p></li>
|
||||
<li><p>Another item in the list.</p></li>
|
||||
</ul>
|
||||
</code></pre>
|
||||
|
||||
<h3>Links</h3>
|
||||
|
||||
<p>Markdown supports two styles for creating links: <em>inline</em> and
|
||||
<em>reference</em>. With both styles, you use square brackets to delimit the
|
||||
text you want to turn into a link.</p>
|
||||
|
||||
<p>Inline-style links use parentheses immediately after the link text.
|
||||
For example:</p>
|
||||
|
||||
<pre><code>This is an [example link](http://example.com/).
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>This is an <a href="http://example.com/">
|
||||
example link</a>.</p>
|
||||
</code></pre>
|
||||
|
||||
<p>Optionally, you may include a title attribute in the parentheses:</p>
|
||||
|
||||
<pre><code>This is an [example link](http://example.com/ "With a Title").
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>This is an <a href="http://example.com/" title="With a Title">
|
||||
example link</a>.</p>
|
||||
</code></pre>
|
||||
|
||||
<p>Reference-style links allow you to refer to your links by names, which
|
||||
you define elsewhere in your document:</p>
|
||||
|
||||
<pre><code>I get 10 times more traffic from [Google][1] than from
|
||||
[Yahoo][2] or [MSN][3].
|
||||
|
||||
[1]: http://google.com/ "Google"
|
||||
[2]: http://search.yahoo.com/ "Yahoo Search"
|
||||
[3]: http://search.msn.com/ "MSN Search"
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>I get 10 times more traffic from <a href="http://google.com/"
|
||||
title="Google">Google</a> than from <a href="http://search.yahoo.com/"
|
||||
title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
|
||||
title="MSN Search">MSN</a>.</p>
|
||||
</code></pre>
|
||||
|
||||
<p>The title attribute is optional. Link names may contain letters,
|
||||
numbers and spaces, but are <em>not</em> case sensitive:</p>
|
||||
|
||||
<pre><code>I start my morning with a cup of coffee and
|
||||
[The New York Times][NY Times].
|
||||
|
||||
[ny times]: http://www.nytimes.com/
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>I start my morning with a cup of coffee and
|
||||
<a href="http://www.nytimes.com/">The New York Times</a>.</p>
|
||||
</code></pre>
|
||||
|
||||
<h3>Images</h3>
|
||||
|
||||
<p>Image syntax is very much like link syntax.</p>
|
||||
|
||||
<p>Inline (titles are optional):</p>
|
||||
|
||||
<pre><code>
|
||||
</code></pre>
|
||||
|
||||
<p>Reference-style:</p>
|
||||
|
||||
<pre><code>![alt text][id]
|
||||
|
||||
[id]: /path/to/img.jpg "Title"
|
||||
</code></pre>
|
||||
|
||||
<p>Both of the above examples produce the same output:</p>
|
||||
|
||||
<pre><code><img src="/path/to/img.jpg" alt="alt text" title="Title" />
|
||||
</code></pre>
|
||||
|
||||
<h3>Code</h3>
|
||||
|
||||
<p>In a regular paragraph, you can create code span by wrapping text in
|
||||
backtick quotes. Any ampersands (<code>&</code>) and angle brackets (<code><</code> or
|
||||
<code>></code>) will automatically be translated into HTML entities. This makes
|
||||
it easy to use Markdown to write about HTML example code:</p>
|
||||
|
||||
<pre><code>I strongly recommend against using any "<blink>" tags.
|
||||
|
||||
I wish SmartyPants used named entities like "&mdash;""
|
||||
instead of decimal-encoded entites like "&#8212;".
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>I strongly recommend against using any
|
||||
<code>&lt;blink&gt;</code> tags.</p>
|
||||
|
||||
<p>I wish SmartyPants used named entities like
|
||||
<code>&amp;mdash;</code> instead of decimal-encoded
|
||||
entites like <code>&amp;#8212;</code>.</p>
|
||||
</code></pre>
|
||||
|
||||
<p>To specify an entire block of pre-formatted code, indent every line of
|
||||
the block by 4 spaces or 1 tab. Just like with code spans, <code>&</code>, <code><</code>,
|
||||
and <code>></code> characters will be escaped automatically.</p>
|
||||
|
||||
<p>Markdown:</p>
|
||||
|
||||
<pre><code>If you want your page to validate under XHTML 1.0 Strict,
|
||||
you've got to put paragraph tags in your blockquotes:
|
||||
|
||||
<blockquote>
|
||||
<p>For example.</p>
|
||||
</blockquote>
|
||||
</code></pre>
|
||||
|
||||
<p>Output:</p>
|
||||
|
||||
<pre><code><p>If you want your page to validate under XHTML 1.0 Strict,
|
||||
you've got to put paragraph tags in your blockquotes:</p>
|
||||
|
||||
<pre><code>&lt;blockquote&gt;
|
||||
&lt;p&gt;For example.&lt;/p&gt;
|
||||
&lt;/blockquote&gt;
|
||||
</code></pre>
|
||||
</code></pre>
|
||||
|
||||
<h4>Header4</h4>
|
||||
<div><div><div><div><div><div>
|
||||
<h5>Header5</h5>Body 555.
|
||||
</div></div></div></div></div></div>
|
||||
<h6>Header6</h6>
|
||||
|
||||
`
|
||||
|
||||
func TestHTML(t *testing.T) {
|
||||
|
||||
req := &api.DocumentConversionRequest{}
|
||||
res := &api.DocumentConversionResponse{}
|
||||
|
||||
err := html.SplitIfHTML(req, res)
|
||||
if err != nil || len(res.PagesHTML) != 0 || len(res.Pages) != 0 || len(res.EmbeddedFiles) != 0 {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
titleTooBig := []byte("<h1>")
|
||||
for i := 0; i < 2048; i++ {
|
||||
titleTooBig = append(titleTooBig, []byte("title too long ")...)
|
||||
}
|
||||
titleTooBig = append(titleTooBig, []byte("</h1>")...)
|
||||
req = &api.DocumentConversionRequest{}
|
||||
res = &api.DocumentConversionResponse{PagesHTML: titleTooBig}
|
||||
err = html.SplitIfHTML(req, res)
|
||||
if err != nil || len(res.Pages[0].Title) > 2000 {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
req = &api.DocumentConversionRequest{}
|
||||
res = &api.DocumentConversionResponse{PagesHTML: []byte(b)}
|
||||
err = html.SplitIfHTML(req, res)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
//for p, pg := range res.Pages {
|
||||
// t.Logf("%d %d %d %s", p, pg.Level, len(pg.Body), pg.Title)
|
||||
//}
|
||||
if !strings.HasPrefix(res.Pages[10].Title, "Header5") ||
|
||||
!strings.HasPrefix(string(res.Pages[10].Body), "Body 555.") {
|
||||
t.Errorf("wrong page ten title: `%s` body: `%s`", res.Pages[10].Title, string(res.Pages[10].Body))
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue