1
0
Fork 0
mirror of https://github.com/documize/community.git synced 2025-07-24 15:49:44 +02:00

rollback page revisions

This commit is contained in:
Harvey Kandola 2016-11-30 17:56:36 -08:00
parent ae03928569
commit 87d1334280
130 changed files with 39899 additions and 34 deletions

View file

@ -8,7 +8,7 @@ The mission is to bring software dev inspired features (refactoring, testing, li
## Latest version
v0.34.1
v0.35.0
## OS Support

View file

@ -0,0 +1,55 @@
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
//
// This software (Documize Community Edition) is licensed under
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
//
// You can operate outside the AGPL restrictions by purchasing
// Documize Enterprise Edition and obtaining a commercial license
// by contacting <sales@documize.com>.
//
// https://documize.com
import Ember from 'ember';
export default Ember.Component.extend({
revision: {},
hasDiff: Ember.computed('diff', function () {
return this.get('diff').length > 0;
}),
didReceiveAttrs() {
let revisions = this.get('revisions');
revisions.forEach((revision) => {
Ember.set(revision, 'deleted', revision.revisions === 0);
});
this.set('revisions', revisions);
},
didInsertElement() {
this._super(...arguments);
this.eventBus.subscribe('resized', this, 'sizeSidebar');
this.sizeSidebar();
},
sizeSidebar() {
let size = $(window).height() - 200;
this.$('.document-history > .sidebar').css('height', size + "px");
},
actions: {
getDiff(revision) {
this.set('revision', revision);
if (!revision.deleted) {
this.attrs.onFetchDiff(revision.pageId, revision.id);
}
},
rollback() {
let revision = this.get('revision');
this.attrs.onRollback(revision.pageId, revision.id);
}
}
});

View file

@ -0,0 +1,20 @@
// Copyright 2016 Documize Inc. <legal@documize.com>. All rights reserved.
//
// This software (Documize Community Edition) is licensed under
// GNU AGPL v3 http://www.gnu.org/licenses/agpl-3.0.en.html
//
// You can operate outside the AGPL restrictions by purchasing
// Documize Enterprise Edition and obtaining a commercial license
// by contacting <sales@documize.com>.
//
// https://documize.com
import Ember from 'ember';
import TooltipMixin from '../../mixins/tooltip';
export default Ember.Component.extend(TooltipMixin, {
didRender() {
let refId = this.get('refId');
this.addTooltip(document.getElementById(`avatar-${refId}`));
},
});

View file

@ -39,6 +39,11 @@ export default Model.extend({
let tocIndent = this.get('tocIndent');
return `margin-left-${tocIndent}`;
}),
hasRevisions: Ember.computed('revisions', function () {
return this.get('revisions') > 0;
}),
created: attr(),
revised: attr()
});

View file

@ -0,0 +1,28 @@
import Ember from 'ember';
import NotifierMixin from '../../../mixins/notifier';
export default Ember.Controller.extend(NotifierMixin, {
documentService: Ember.inject.service('document'),
actions: {
onFetchDiff(pageId, revisionId) {
this.audit.record("compared-diff");
this.get('documentService').getPageRevisionDiff(this.get('model.document.id'), pageId, revisionId).then((revision) => {
this.set('model.diff', revision);
});
},
onRollback(pageId, revisionId) {
this.audit.record("restored-page");
this.get('documentService').rollbackPage(this.get('model.document.id'), pageId, revisionId).then(() => {
this.transitionToRoute('document', {
queryParams: {
page: pageId
}
});
});
}
}
});

View file

@ -0,0 +1,18 @@
import Ember from 'ember';
import AuthenticatedRouteMixin from 'ember-simple-auth/mixins/authenticated-route-mixin';
export default Ember.Route.extend(AuthenticatedRouteMixin, {
documentService: Ember.inject.service('document'),
folderService: Ember.inject.service('folder'),
model() {
return Ember.RSVP.hash({
folders: this.modelFor('document').folders,
folder: this.modelFor('document').folder,
document: this.modelFor('document').document,
pages: this.modelFor('document').pages,
diff: "",
revisions: this.get('documentService').getDocumentRevisions(this.modelFor('document').document.get('id'))
});
}
});

View file

@ -0,0 +1,2 @@
{{document/document-history document=model.document folder=model.folder pages=model.pages revisions=model.revisions diff=model.diff
onFetchDiff=(action 'onFetchDiff') onRollback=(action 'onRollback')}}

View file

@ -47,6 +47,9 @@ export default Router.map(function () {
this.route('wizard', {
path: 'add'
});
this.route('history', {
path: 'history'
});
});
this.route('customize', {

View file

@ -170,6 +170,14 @@ export default Ember.Service.extend({
});
},
getDocumentRevisions(documentId) {
let url = `documents/${documentId}/revisions`;
return this.get('ajax').request(url, {
method: "GET"
});
},
getPageRevisions(documentId, pageId) {
let url = `documents/${documentId}/pages/${pageId}/revisions`;
@ -184,6 +192,10 @@ export default Ember.Service.extend({
return this.get('ajax').request(url, {
method: "GET",
dataType: 'text'
}).then((response) => {
return response;
}).catch((error) => {
return "";
});
},

View file

@ -3,6 +3,7 @@
@import "edit-tools.scss";
@import "editor.scss";
@import "files.scss";
@import "history.scss";
@import "sidebar.scss";
@import "toolbar.scss";
@import "wizard.scss";

View file

@ -0,0 +1,72 @@
.document-history {
position: relative;
> .sidebar {
position: relative;
overflow-y: scroll;
> .heading {
font-size: 1.2rem;
font-family: $font-light;
color: $color-off-black;
}
> .line {
width: 2px;
background-color: $color-stroke;
height: 100%;
position: absolute;
left: 17px;
z-index: -1;
}
> .items {
list-style-type: none;
margin: 0;
padding: 20px 0 10px;
white-space: nowrap;
> .item {
margin: 0;
padding: 10px 0;
width: 100%;
color: $color-off-black;
@include ease-in();
cursor: pointer;
> .avatar-box {
display: inline-block;
}
> .date {
display: inline-block;
font-size: 1rem;
font-family: $font-light;
margin-left: 25px;
}
> .detail {
display: block;
font-size: 1.2rem;
font-family: $font-semibold;
margin-left: 65px;
overflow: hidden;
text-overflow: ellipsis;
}
&:hover {
color: $color-link;
}
}
> .deleted {
color: $color-red;
cursor: not-allowed;
&:hover {
color: $color-red;
}
}
}
}
}

View file

@ -88,5 +88,27 @@
color: $color-primary;
}
}
> a {
display: inline-block;
> .option {
list-style-type: none;
display: inline-block;
margin: 0 0 0 20px;
padding: 0;
color: $color-gray;
cursor: pointer;
@include ease-in();
&:hover {
color: $color-link;
}
> .pinned {
color: $color-primary;
}
}
}
}
}

View file

@ -0,0 +1,40 @@
<div class="container width-100">
<div class="row">
<div class="col-xs-9 col-sm-9 col-md-9 col-lg-9">
{{#if hasDiff}}
<div id="restore-history-button" class="regular-button button-green pull-right margin-right-40">
Restore
</div>
{{#dropdown-dialog target="restore-history-button" position="bottom right" button="Restore" color="flat-green" onAction=(action 'rollback')}}
<p>Are you sure you want to roll back to this version?</p>
{{/dropdown-dialog}}
<div class="clearfix" />
<div class="doc-layout">
{{{diff}}}
</div>
{{/if}}
</div>
<div class="col-xs-3 col-sm-3 col-md-3 col-lg-3">
<div class="document-history">
<div class="sidebar">
<p class="heading">{{revisions.length}} changes</p>
<div class="line" />
<ul class="items">
{{#each revisions as |r|}}
<li class="item {{if r.deleted 'deleted'}}" {{action 'getDiff' r}}>
<div class="avatar-box">
{{ui/ui-avatar refId=r.id firstname=r.firstname lastname=r.lastname}}
</div>
<div class="date">{{time-ago r.created}}</div>
<div class="detail">{{r.title}}</div>
</li>
{{/each}}
</ul>
</div>
</div>
</div>
</div>
</div>

View file

@ -36,6 +36,11 @@
<li class="option" id="pin-document-button"><i class="material-icons">star_border</i></li>
{{/if}}
{{/if}}
{{#if session.authenticated}}
{{#link-to 'document.history'}}
<li class="option"><i class="material-icons">history</i></li>
{{/link-to}}
{{/if}}
{{#if isEditor}}
<li class="option" id="save-template-button"><i class="material-icons">content_copy</i></li>
<li class="option" id="set-meta-button"><i class="material-icons">settings</i></li>

View file

@ -0,0 +1,3 @@
<div class="avatar" id="avatar-{{refId}}" data-tooltip="{{firstname}} {{lastname}}" data-tooltip-position="top center">
{{user-initials firstname lastname}}
</div>

View file

@ -1,6 +1,6 @@
{
"name": "documize",
"version": "0.34.1",
"version": "0.35.0",
"description": "The Document IDE",
"private": true,
"directories": {

View file

@ -26,6 +26,7 @@ import (
"github.com/documize/community/core/log"
"github.com/documize/community/core/section/provider"
"github.com/documize/community/core/utility"
htmldiff "github.com/documize/html-diff"
"github.com/gorilla/mux"
)
@ -645,3 +646,232 @@ func GetDocumentPageMeta(w http.ResponseWriter, r *http.Request) {
writeSuccessBytes(w, json)
}
/********************
* Page Revisions
********************/
// GetDocumentRevisions returns all changes for a document.
func GetDocumentRevisions(w http.ResponseWriter, r *http.Request) {
method := "GetDocumentPageRevisions"
p := request.GetPersister(r)
params := mux.Vars(r)
documentID := params["documentID"]
if len(documentID) == 0 {
writeMissingDataError(w, method, "documentID")
return
}
if !p.CanViewDocument(documentID) {
writeForbiddenError(w)
return
}
revisions, err := p.GetDocumentRevisions(documentID)
payload, err := json.Marshal(revisions)
if err != nil {
writeJSONMarshalError(w, method, "revision", err)
return
}
writeSuccessBytes(w, payload)
}
// GetDocumentPageRevisions returns all changes for a given page.
func GetDocumentPageRevisions(w http.ResponseWriter, r *http.Request) {
method := "GetDocumentPageRevisions"
p := request.GetPersister(r)
params := mux.Vars(r)
documentID := params["documentID"]
if len(documentID) == 0 {
writeMissingDataError(w, method, "documentID")
return
}
if !p.CanViewDocument(documentID) {
writeForbiddenError(w)
return
}
pageID := params["pageID"]
if len(pageID) == 0 {
writeMissingDataError(w, method, "pageID")
return
}
revisions, err := p.GetPageRevisions(pageID)
payload, err := json.Marshal(revisions)
if err != nil {
writeJSONMarshalError(w, method, "revision", err)
return
}
writeSuccessBytes(w, payload)
}
// GetDocumentPageDiff returns HTML diff between two revisions of a given page.
func GetDocumentPageDiff(w http.ResponseWriter, r *http.Request) {
method := "GetDocumentPageDiff"
p := request.GetPersister(r)
params := mux.Vars(r)
documentID := params["documentID"]
if len(documentID) == 0 {
writeMissingDataError(w, method, "documentID")
return
}
if !p.CanViewDocument(documentID) {
writeForbiddenError(w)
return
}
pageID := params["pageID"]
if len(pageID) == 0 {
writeMissingDataError(w, method, "pageID")
return
}
revisionID := params["revisionID"]
if len(revisionID) == 0 {
writeMissingDataError(w, method, "revisionID")
return
}
page, err := p.GetPage(pageID)
if err == sql.ErrNoRows {
writeNotFoundError(w, method, revisionID)
return
}
revision, err := p.GetPageRevision(revisionID)
latestHTML := page.Body
previousHTML := revision.Body
var result []byte
var cfg = &htmldiff.Config{
Granularity: 5,
InsertedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: palegreen;"}},
DeletedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightpink; text-decoration: line-through;"}},
ReplacedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightskyblue;"}},
CleanTags: []string{"documize"},
}
res, err := cfg.HTMLdiff([]string{latestHTML, previousHTML})
if err != nil {
writeServerError(w, method, err)
return
}
result = []byte(res[0])
_, err = w.Write(result)
log.IfErr(err)
}
// RollbackDocumentPage rolls-back to a specific page revision.
func RollbackDocumentPage(w http.ResponseWriter, r *http.Request) {
method := "RollbackDocumentPage"
p := request.GetPersister(r)
params := mux.Vars(r)
documentID := params["documentID"]
if len(documentID) == 0 {
writeMissingDataError(w, method, "documentID")
return
}
pageID := params["pageID"]
if len(pageID) == 0 {
writeMissingDataError(w, method, "pageID")
return
}
revisionID := params["revisionID"]
if len(revisionID) == 0 {
writeMissingDataError(w, method, "revisionID")
return
}
if !p.CanChangeDocument(documentID) {
writeForbiddenError(w)
return
}
tx, err := request.Db.Beginx()
if err != nil {
writeTransactionError(w, method, err)
return
}
p.Context.Transaction = tx
// fetch page
page, err := p.GetPage(pageID)
if err != nil {
writeGeneralSQLError(w, method, err)
return
}
// fetch page meta
meta, err := p.GetPageMeta(pageID)
if err != nil {
writeGeneralSQLError(w, method, err)
return
}
// fetch revision
revision, err := p.GetPageRevision(revisionID)
if err != nil {
writeGeneralSQLError(w, method, err)
return
}
// roll back page
page.Body = revision.Body
refID := util.UniqueID()
err = p.UpdatePage(page, refID, p.Context.UserID, false)
if err != nil {
log.IfErr(tx.Rollback())
writeGeneralSQLError(w, method, err)
return
}
// roll back page meta
meta.Config = revision.Config
meta.RawBody = revision.RawBody
err = p.UpdatePageMeta(meta, false)
if err != nil {
log.IfErr(tx.Rollback())
writeGeneralSQLError(w, method, err)
return
}
log.IfErr(tx.Commit())
payload, err := json.Marshal(page)
if err != nil {
writeJSONMarshalError(w, method, "revision", err)
return
}
writeSuccessBytes(w, payload)
}

View file

@ -159,6 +159,11 @@ func init() {
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/level", []string{"POST", "OPTIONS"}, nil, ChangeDocumentPageLevel))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/sequence", []string{"POST", "OPTIONS"}, nil, ChangeDocumentPageSequence))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/batch", []string{"POST", "OPTIONS"}, nil, GetDocumentPagesBatch))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/{pageID}/revisions", []string{"GET", "OPTIONS"}, nil, GetDocumentPageRevisions))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/{pageID}/revisions/{revisionID}", []string{"GET", "OPTIONS"}, nil, GetDocumentPageDiff))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/{pageID}/revisions/{revisionID}", []string{"POST", "OPTIONS"}, nil, RollbackDocumentPage))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/revisions", []string{"GET", "OPTIONS"}, nil, GetDocumentRevisions))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages", []string{"GET", "OPTIONS"}, nil, GetDocumentPages))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/{pageID}", []string{"PUT", "OPTIONS"}, nil, UpdateDocumentPage))
log.IfErr(Add(RoutePrefixPrivate, "documents/{documentID}/pages/{pageID}", []string{"DELETE", "OPTIONS"}, nil, DeleteDocumentPage))

View file

@ -238,6 +238,27 @@ func (p *PageMeta) SetDefaults() {
}
}
// Revision holds the previous version of a Page.
type Revision struct {
BaseEntity
OrgID string `json:"orgId"`
DocumentID string `json:"documentId"`
PageID string `json:"pageId"`
OwnerID string `json:"ownerId"`
UserID string `json:"userId"`
ContentType string `json:"contentType"`
PageType string `json:"pageType"`
Title string `json:"title"`
Body string `json:"body"`
RawBody string `json:"rawBody"`
Config string `json:"config"`
Email string `json:"email"`
Firstname string `json:"firstname"`
Lastname string `json:"lastname"`
Initials string `json:"initials"`
Revisions int `json:"revisions"`
}
// DocumentMeta details who viewed the document.
type DocumentMeta struct {
Viewers []DocumentMetaViewer `json:"viewers"`

View file

@ -258,36 +258,6 @@ func (p *Persister) UpdatePage(page entity.Page, refID, userID string, skipRevis
}
}
//if page.Level == 1 { // may need to update the document name
//var doc entity.Document
//stmt4, err := p.Context.Transaction.Preparex("SELECT id, refid, orgid, labelid, job, location, title, excerpt, slug, tags, template, created, revised FROM document WHERE refid=?")
//defer utility.Close(stmt4)
//if err != nil {
//log.Error(fmt.Sprintf("Unable to prepare pagemanager doc query for Id %s", page.DocumentID), err)
//return err
//}
//err = stmt4.Get(&doc, page.DocumentID)
//if err != nil {
//log.Error(fmt.Sprintf("Unable to execute pagemanager document query for Id %s", page.DocumentID), err)
//return err
//}
//if doc.Title != page.Title {
//doc.Title = page.Title
//doc.Revised = page.Revised
//err = p.UpdateDocument(doc)
//if err != nil {
//log.Error(fmt.Sprintf("Unable to update document when page 1 altered DocumentId %s", page.DocumentID), err)
//return err
//}
//}
//}
// find any content links in the HTML
links := util.GetContentLinks(page.Body)
@ -336,6 +306,7 @@ func (p *Persister) UpdatePage(page entity.Page, refID, userID string, skipRevis
func (p *Persister) UpdatePageMeta(meta entity.PageMeta, updateUserID bool) (err error) {
err = nil
meta.Revised = time.Now().UTC()
if updateUserID {
meta.UserID = p.Context.UserID
}
@ -424,6 +395,9 @@ func (p *Persister) DeletePage(documentID, pageID string) (rows int64, err error
// mark as orphan links to this page
err = p.MarkOrphanPageLink(pageID)
// nuke revisions
_, err = p.DeletePageRevisions(pageID)
p.Base.Audit(p.Context, "remove-page", documentID, pageID)
}
@ -469,3 +443,68 @@ func (p *Persister) GetDocumentPageMeta(documentID string, externalSourceOnly bo
return
}
/********************
* Page Revisions
********************/
// GetPageRevision returns the revisionID page revision record.
func (p *Persister) GetPageRevision(revisionID string) (revision entity.Revision, err error) {
stmt, err := Db.Preparex("SELECT id, refid, orgid, documentid, ownerid, pageid, userid, contenttype, pagetype, title, body, rawbody, coalesce(config,JSON_UNQUOTE('{}')) as config, created, revised FROM revision WHERE orgid=? and refid=?")
defer utility.Close(stmt)
if err != nil {
log.Error(fmt.Sprintf("Unable to prepare select for revision %s", revisionID), err)
return
}
err = stmt.Get(&revision, p.Context.OrgID, revisionID)
if err != nil {
log.Error(fmt.Sprintf("Unable to execute select for revision %s", revisionID), err)
return
}
return
}
// GetDocumentRevisions returns a slice of page revision records for a given document, in the order they were created.
// Then audits that the get-page-revisions action has occurred.
func (p *Persister) GetDocumentRevisions(documentID string) (revisions []entity.Revision, err error) {
err = Db.Select(&revisions, "SELECT a.id, a.refid, a.orgid, a.documentid, a.ownerid, a.pageid, a.userid, a.contenttype, a.pagetype, a.title, /*a.body, a.rawbody, a.config,*/ a.created, a.revised, coalesce(b.email,'') as email, coalesce(b.firstname,'') as firstname, coalesce(b.lastname,'') as lastname, coalesce(b.initials,'') as initials, coalesce(p.revisions, 0) as revisions FROM revision a LEFT JOIN user b ON a.userid=b.refid LEFT JOIN page p ON a.pageid=p.refid WHERE a.orgid=? AND a.documentid=? AND a.pagetype='section' ORDER BY a.id DESC", p.Context.OrgID, documentID)
if err != nil {
log.Error(fmt.Sprintf("Unable to execute select revisions for org %s and document %s", p.Context.OrgID, documentID), err)
return
}
if len(revisions) == 0 {
revisions = []entity.Revision{}
}
p.Base.Audit(p.Context, "get-document-revisions", documentID, "")
return
}
// GetPageRevisions returns a slice of page revision records for a given pageID, in the order they were created.
// Then audits that the get-page-revisions action has occurred.
func (p *Persister) GetPageRevisions(pageID string) (revisions []entity.Revision, err error) {
err = Db.Select(&revisions, "SELECT a.id, a.refid, a.orgid, a.documentid, a.ownerid, a.pageid, a.userid, a.contenttype, a.pagetype, a.title, /*a.body, a.rawbody, a.config,*/ a.created, a.revised, coalesce(b.email,'') as email, coalesce(b.firstname,'') as firstname, coalesce(b.lastname,'') as lastname, coalesce(b.initials,'') as initials FROM revision a LEFT JOIN user b ON a.userid=b.refid WHERE a.orgid=? AND a.pageid=? AND a.pagetype='section' ORDER BY a.id DESC", p.Context.OrgID, pageID)
if err != nil {
log.Error(fmt.Sprintf("Unable to execute select revisions for org %s and page %s", p.Context.OrgID, pageID), err)
return
}
p.Base.Audit(p.Context, "get-page-revisions", "", pageID)
return
}
// DeletePageRevisions deletes all of the page revision records for a given pageID.
func (p *Persister) DeletePageRevisions(pageID string) (rows int64, err error) {
rows, err = p.Base.DeleteWhere(p.Context.Transaction, fmt.Sprintf("DELETE FROM revision WHERE orgid='%s' AND pageid='%s'", p.Context.OrgID, pageID))
return
}

View file

@ -26,8 +26,8 @@ type ProdInfo struct {
// Product returns product edition details
func Product() (p ProdInfo) {
p.Major = "0"
p.Minor = "34"
p.Patch = "1"
p.Minor = "35"
p.Patch = "0"
p.Version = fmt.Sprintf("%s.%s.%s", p.Major, p.Minor, p.Patch)
p.Edition = "Community"
p.Title = fmt.Sprintf("%s Edition", p.Edition)

30
vendor/github.com/documize/html-diff/.gitignore generated vendored Normal file
View file

@ -0,0 +1,30 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
testout
fuzz
.vscode
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof
*.out
*.pprof
*.svg

21
vendor/github.com/documize/html-diff/LICENSE generated vendored Normal file
View file

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2016 Documize
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

27
vendor/github.com/documize/html-diff/README.md generated vendored Normal file
View file

@ -0,0 +1,27 @@
# html-diff
Calculate difference between two HTML snippets and return those differences as a merged HTML snippet showing the changes.
Usage:
```
var cfg = &htmldiff.Config{
Granularity: 5,
InsertedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: palegreen;"}},
DeletedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightpink;"}},
ReplacedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightskyblue;"}},
CleanTags: []string{""},
}
res, err := cfg.HTMLdiff([]string{previousHTML, latestHTML})
mergedHTML := res[0]
```
![see example_test.go](example_test.png)
Only deals with body HTML, so no headers, only what is within the body element.
Requires Go1.5+, with vendoring support. Vendors "github.com/mb0/diff", "golang.org/x/net/html" and "golang.org/x/net/html/atom".
Running the tests will create output files in testout/*.html.
For fuzz-testing using https://github.com/dvyukov/go-fuzz , the Fuzz() function is in fuzz.go (as at Feb'16 you need to rename the ```vendor``` directory while you fuzz, and go get the dependencies - an issue with "go/importer", see https://github.com/golang/go/issues/13756).
Pull requests welcome.

221
vendor/github.com/documize/html-diff/append.go generated vendored Normal file
View file

@ -0,0 +1,221 @@
package htmldiff
import (
"sort"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// the things we need to know while appending.
type appendContext struct {
c *Config
target, targetBody, lastProto *html.Node
lastText string
lastAction rune
lastPos posT
editList []editEntry
}
// an individual edit action.
type editEntry struct {
action rune
text string
proto *html.Node
pos posT
origSeq int
}
// Len is part of sort.Interface.
func (ap *appendContext) Len() int {
return len(ap.editList)
}
// Swap is part of sort.Interface.
func (ap *appendContext) Swap(i, j int) {
ap.editList[i], ap.editList[j] = ap.editList[j], ap.editList[i]
}
// Less is part of sort.Interface.
func (ap *appendContext) Less(i, j int) bool {
if len(ap.editList[i].pos) > 0 && len(ap.editList[j].pos) > 0 { // if both are in containers
ii := len(ap.editList[i].pos) - 1
jj := len(ap.editList[j].pos) - 1
for ii > 0 && jj > 0 {
if ap.editList[i].pos[ii].nodesBefore < ap.editList[j].pos[jj].nodesBefore {
return true
}
if ap.editList[i].pos[ii].nodesBefore > ap.editList[j].pos[jj].nodesBefore {
return false
}
ii--
jj--
}
}
return ap.editList[i].origSeq < ap.editList[j].origSeq
}
// append a treeRune at location idx to the output, group similar runes together to before calling append0().
func (ap *appendContext) append(action rune, trs []treeRune, idx int) {
if idx >= len(trs) { // defending error found by fuzz testing
return
}
tr := trs[idx]
if tr.leaf == nil {
return
}
// return if we should not be appending this type of node
switch tr.leaf.Type {
case html.DocumentNode:
return
case html.ElementNode:
switch tr.leaf.DataAtom {
case atom.Html:
return
}
}
var text string
if tr.letter > 0 {
text = string(tr.letter)
}
if ap.lastProto == tr.leaf && ap.lastAction == action && tr.leaf.Type == html.TextNode && text != "" && posEqual(ap.lastPos, tr.pos) {
ap.lastText += text
return
}
ap.flush0(action, tr.leaf, tr.pos)
if tr.leaf.Type == html.TextNode { // reload the buffer
ap.lastText = text
return
}
ap.append0(action, "", tr.leaf, tr.pos)
}
func (ap *appendContext) flush() {
ap.flush0(0, nil, nil)
}
func (ap *appendContext) flush0(action rune, proto *html.Node, pos posT) {
if ap.lastText != "" {
ap.append0(ap.lastAction, ap.lastText, ap.lastProto, ap.lastPos) // flush the buffer
}
// reset the buffer
ap.lastProto = proto
ap.lastAction = action
ap.lastPos = pos
ap.lastText = ""
}
// append0 builds up the editList of things to do.
func (ap *appendContext) append0(action rune, text string, proto *html.Node, pos posT) {
os := len(ap.editList)
ap.editList = append(ap.editList, editEntry{action, text, proto, pos, os})
}
// Sort the editList before using append1 on all the sorted edits.
// Sorting is required in order to get edits inside containers in the right order.
func (ap *appendContext) sortAndWrite() {
sort.Stable(ap)
for _, e := range ap.editList {
ap.append1(e.action, e.text, e.proto, e.pos)
}
}
// append1 actually appends to the merged HTML node tree.
func (ap *appendContext) append1(action rune, text string, proto *html.Node, pos posT) {
if proto == nil {
return
}
appendPoint, protoAncestor := ap.lastMatchingLeaf(proto, action, pos)
if appendPoint == nil || protoAncestor == nil {
return
}
if appendPoint.DataAtom != protoAncestor.DataAtom {
return
}
newLeaf := new(html.Node)
copyNode(newLeaf, proto)
if proto.Type == html.TextNode {
newLeaf.Data = text
}
if action != '=' {
insertNode := &html.Node{
Type: html.ElementNode,
DataAtom: atom.Span,
Data: "span",
}
switch action {
case '+':
insertNode.Attr = convertAttributes(ap.c.InsertedSpan)
case '-':
insertNode.Attr = convertAttributes(ap.c.DeletedSpan)
case '~':
insertNode.Attr = convertAttributes(ap.c.ReplacedSpan)
}
insertNode.AppendChild(newLeaf)
newLeaf = insertNode
}
for proto = proto.Parent; proto != nil && proto != protoAncestor; proto = proto.Parent {
above := new(html.Node)
copyNode(above, proto)
above.AppendChild(newLeaf)
newLeaf = above
}
appendPoint.AppendChild(newLeaf)
}
// find the append point in the merged HTML and from where to copy in the source.
func (ap *appendContext) lastMatchingLeaf(proto *html.Node, action rune, pos posT) (appendPoint, protoAncestor *html.Node) {
if ap.targetBody == nil {
ap.targetBody = findBody(ap.target)
}
candidates := []*html.Node{}
for cand := ap.target; cand != nil; cand = cand.LastChild {
candidates = append([]*html.Node{cand}, candidates...)
}
candidates = append(candidates, ap.targetBody) // longstop
for cni, can := range candidates {
_ = cni
gpa := getPos(can) // what we are building
for anc := proto; anc.Parent != nil; anc = anc.Parent {
if anc.Type == html.ElementNode && anc.DataAtom == atom.Html {
break
}
gpb := getPos(anc) // what we are adding in
if ap.leavesEqual(can, anc, action, gpa, gpb) {
return can, anc
}
}
}
return ap.targetBody, proto
}
// are two leaves of a node-tree equal?
func (ap *appendContext) leavesEqual(a, b *html.Node, action rune, gpa, gpb posT) bool {
if a == b {
return true
}
if a == nil || b == nil {
return false
}
if a.Type != html.ElementNode || b.Type != html.ElementNode {
return false // they must both be element nodes to do a comparison
}
if a.DataAtom == atom.Body && b.DataAtom == atom.Body {
return true // body nodes are always equal
}
if !nodeEqual(a, b) {
return false
}
if len(gpa) != len(gpb) {
return false
}
for i := 0; i < len(gpb); i++ {
if gpa[i].nodesBefore < gpb[i].nodesBefore {
return false
}
if gpa[i].node.DataAtom != gpb[i].node.DataAtom {
return false
}
}
return true
}

28
vendor/github.com/documize/html-diff/benchmark_test.go generated vendored Normal file
View file

@ -0,0 +1,28 @@
package htmldiff_test
import (
"strings"
"testing"
"github.com/documize/html-diff"
)
var cfgBench = &htmldiff.Config{
Granularity: 6,
InsertedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: palegreen; text-decoration: underline;"}},
DeletedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightpink; text-decoration: line-through;"}},
ReplacedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightskyblue; text-decoration: overline;"}},
CleanTags: []string{"documize"},
}
func BenchmarkHTMLdiff(b *testing.B) {
bbc := bbcNews1 + bbcNews2
bbclc := strings.ToLower(bbc)
args := []string{bbc, bbclc}
for n := 0; n < b.N; n++ {
_, err := cfgBench.HTMLdiff(args) // don't care about the result as we are looking at speed
if err != nil {
b.Errorf("comparing BBC news with its lower-case self error: %s", err)
}
}
}

67
vendor/github.com/documize/html-diff/clean.go generated vendored Normal file
View file

@ -0,0 +1,67 @@
package htmldiff
import (
htm "html"
"strings"
"unicode/utf8"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
// delAttr() deletes an unwanted attribute.
func delAttr(attr []html.Attribute, ai int) (ret []html.Attribute) {
if len(attr) <= 1 || ai >= len(attr) {
return nil
}
return append(attr[:ai], attr[ai+1:]...)
}
// clean normalises styles/colspan and removes any CleanTags specified, along with newlines;
// but also makes all the character handling (for example "&#160;" as utf-8) the same.
// It returns the estimated number of treeRunes that will be used.
// TODO more cleaning of the input HTML, as required.
func (c *Config) clean(n *html.Node) int {
size := 1
switch n.Type {
case html.ElementNode:
for ai := 0; ai < len(n.Attr); ai++ {
a := n.Attr[ai]
switch {
case strings.ToLower(a.Key) == "style":
if strings.TrimSpace(a.Val) == "" { // delete empty styles
n.Attr = delAttr(n.Attr, ai)
ai--
} else { // tidy non-empty styles
// TODO there could be more here to make sure the style entries are in the same order etc.
n.Attr[ai].Val = strings.Replace(a.Val, " ", "", -1)
if !strings.HasSuffix(n.Attr[ai].Val, ";") {
n.Attr[ai].Val += ";"
}
}
case n.DataAtom == atom.Td &&
strings.ToLower(a.Key) == "colspan" &&
strings.TrimSpace(a.Val) == "1":
n.Attr = delAttr(n.Attr, ai)
ai--
}
}
case html.TextNode:
n.Data = htm.UnescapeString(n.Data)
size += utf8.RuneCountInString(n.Data) - 1 // len(n.Data) would be faster, but use more memory
}
searchChildren:
for ch := n.FirstChild; ch != nil; ch = ch.NextSibling {
switch ch.Type {
case html.ElementNode:
for _, rr := range c.CleanTags {
if rr == ch.Data {
n.RemoveChild(ch)
goto searchChildren
}
}
}
size += c.clean(ch)
}
return size
}

1520
vendor/github.com/documize/html-diff/datafor_test.go generated vendored Normal file

File diff suppressed because one or more lines are too long

29
vendor/github.com/documize/html-diff/example_test.go generated vendored Normal file
View file

@ -0,0 +1,29 @@
package htmldiff_test
import (
"fmt"
"github.com/documize/html-diff"
)
func ExampleHTMLdiff() {
previousHTML := `<p>Bullet list:</p><ul><li>first item</li><li>第二</li><li>3rd</li></ul>`
latestHTML := `<p>Bullet <b>list:</b></p><ul><li>first item</li><li>number two</li><li>3rd</li></ul>`
var cfg = &htmldiff.Config{
Granularity: 5,
InsertedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: palegreen;"}},
DeletedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightpink;"}},
ReplacedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightskyblue;"}},
CleanTags: []string{""},
}
res, err := cfg.HTMLdiff([]string{previousHTML, latestHTML})
if err != nil {
fmt.Println(err)
}
mergedHTML := res[0]
fmt.Println(mergedHTML)
// Output:
// <p>Bullet <b><span style="background-color: lightskyblue;">list:</span></b></p><ul><li>first item</li><li><span style="background-color: lightpink;">第二</span><span style="background-color: palegreen;">number two</span></li><li>3rd</li></ul>
}

45
vendor/github.com/documize/html-diff/example_test.html generated vendored Normal file
View file

@ -0,0 +1,45 @@
<html>
<head></head>
<body>
<table border=1 style="font: 15px arial, sans-serif;">
<tr><th colspan=3>
<b>variables from example_test.go</b>
</th></tr>
<tr>
<th>previousHTML</th>
<th>latestHTML</th>
<th>mergedHTML
</th>
</tr>
<tr>
<td>
<p>Bullet list:</p>
<ul>
<li>first item</li>
<li>第二</li>
<li>3rd</li>
</ul>
</td>
<td>
<p>Bullet <b>list:</b></p>
<ul>
<li>first item</li>
<li>number two</li>
<li>3rd</li>
</ul>
</td>
<td>
<p>Bullet <b><span style="background-color: lightskyblue;">list:</span></b></p>
<ul>
<li>first item</li>
<li><span style="background-color: lightpink;">第二</span><span style="background-color: palegreen;">number two</span></li>
<li>3rd</li>
</ul>
</td>
</tr>
</table>
</body>
</html>

BIN
vendor/github.com/documize/html-diff/example_test.png generated vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 47 KiB

212
vendor/github.com/documize/html-diff/fuzz.go generated vendored Normal file
View file

@ -0,0 +1,212 @@
// +build gofuzz
package htmldiff
import (
"strings"
"unicode/utf8"
"golang.org/x/net/html"
)
type fuzzCfg struct {
cfg *Config
cmp []string
}
func baseFuzzCfg() *fuzzCfg {
return &fuzzCfg{
cfg: &Config{
Granularity: 0,
InsertedSpan: []html.Attribute{{Key: "style", Val: "background-color: palegreen; text-decoration: underline;"}},
DeletedSpan: []html.Attribute{{Key: "style", Val: "background-color: lightpink; text-decoration: line-through;"}},
ReplacedSpan: []html.Attribute{{Key: "style", Val: "background-color: lightskyblue; text-decoration: overline;"}},
CleanTags: nil,
},
}
}
type fuzzer func(*fuzzCfg, []byte, uint)
var fuzzers = []fuzzer{fuz0, fuz1, fuz2, fuz3, fuz4, fuz5, fuz6 /*, fuz7 */}
// Fuzz is the test function for https://github.com/dvyukov/go-fuzz
func Fuzz(data []byte) int {
fcfg := baseFuzzCfg()
seed := uint1(&data)
fuzzers[int(seed)%len(fuzzers)](fcfg, data, seed/uint(len(fuzzers)))
if _, err := fcfg.cfg.HTMLdiff(fcfg.cmp); err != nil {
return 0
}
return 1
}
func fuz0(fcfg *fuzzCfg, data []byte, seed uint) {
raw := string(data)
var x0, x1 string
for i, r := range raw {
if r != utf8.RuneError {
x0 += string(r)
switch i % 4 {
case 0:
case 1:
x1 += string(r)
x1 += string(r)
default:
x1 += string(r)
}
}
}
fcfg.cmp = []string{x0, x1}
}
func fuz1(fcfg *fuzzCfg, data []byte, seed uint) {
fcfg.cfg.Granularity = int(seed)
fcfg.cmp = []string{string(data), strings.ToUpper(string(data))}
}
func fuz2(fcfg *fuzzCfg, data []byte, seed uint) {
fcfg.cfg.Granularity = int(seed)
for ct := int(seed) / 2; ct > 0; ct-- {
fcfg.cfg.CleanTags = append(fcfg.cfg.CleanTags, str5(&data))
}
n := int(seed) + 1
h := make([]string, 0, n)
xx := ""
l := len(data) / int(n)
for n--; n >= 0; n-- {
x := strMax(&data, l)
h = append(h, x)
if len(x) > 0 {
xx = x
}
}
fcfg.cmp = append([]string{xx}, h...)
}
func fuz3(fcfg *fuzzCfg, data []byte, seed uint) {
fcfg.cfg.Granularity = int(seed)
raw := string(data)
var x0, x1 string
for i, r := range raw {
if r != utf8.RuneError {
x0 += string(r)
switch i % 2 {
case 0:
x0 += string(r)
case 1:
x1 += string(r)
}
}
}
fcfg.cmp = []string{x0, x1}
}
func fuz4(fcfg *fuzzCfg, data []byte, seed uint) {
raw := string(data)
x, y := "", ""
for _, r := range raw {
if r != utf8.RuneError {
x += string(r)
y = string(r) + y
}
}
fcfg.cfg.Granularity = len(x) % int(seed+1)
fcfg.cmp = []string{x, y}
}
func fuz5(fcfg *fuzzCfg, data []byte, seed uint) {
raw := string(data)
x := ""
for _, r := range raw {
if r != utf8.RuneError {
x += string(r)
}
}
fcfg.cfg.Granularity = len(x) % int(seed+1)
fcfg.cmp = []string{x, strings.ToLower(x)}
}
func fuz6(fcfg *fuzzCfg, data []byte, seed uint) {
raw := string(data)
x := ""
for _, r := range raw {
if r != utf8.RuneError {
x += string(r)
}
}
fcfg.cmp = []string{x, strings.ToTitle(x)}
}
/* file decode time is too long
func fuz7(fcfg *fuzzCfg, data []byte, seed uint) {
dir := ".." + string(os.PathSeparator) + "testin" // should be running in the fuzz directory
files, err := ioutil.ReadDir(dir)
if err != nil {
return
}
tryAgain:
file := files[int(seed)%len(files)]
baseHTML := ""
fn := file.Name()
var dat []byte
if !strings.HasSuffix(fn, ".html") || fn == "google.html" {
seed++
goto tryAgain
}
ffn := dir + string(os.PathSeparator) + fn
dat, err = ioutil.ReadFile(ffn)
if err != nil {
return
}
baseHTML = string(dat)
ptr := (int(seed) + len(dat)/2) % len(dat)
dat[ptr] = byte(seed) & 0x7f
for x, xx := range dat {
ptr = (ptr + x + int(xx)) % len(dat)
dat[ptr] = xx & 0x7f
}
fcfg.cmp = []string{baseHTML, string(dat)}
}
*/
func uint1(data *[]byte) uint {
if len(*data) < 1 {
return 0
}
r := (*data)[0]
*data = (*data)[1:]
return uint(r)
}
func str5(data *[]byte) string {
if len(*data) < 1 {
return ""
}
l := int((*data)[0])%5 + 1
if len(*data) <= l {
return ""
}
raw := string((*data)[1 : l+1])
*data = (*data)[l:]
s := ""
for _, r := range raw {
if r != utf8.RuneError {
s += string(r)
}
}
return s
}
var tags = []string{"u", "p", "i", "b", "div"}
func strMax(data *[]byte, limit int) string {
s := ""
for max := limit; len(s) < limit && max > 0; max-- {
txt := str5(data)
tag := tags[uint1(data)%uint(len(tags))]
s += "<" + tag + ">" + txt + "</" + tag + ">"
}
return s
}

193
vendor/github.com/documize/html-diff/htmldiff.go generated vendored Normal file
View file

@ -0,0 +1,193 @@
package htmldiff
import (
"bytes"
"errors"
"strings"
"time"
"github.com/mb0/diff"
"golang.org/x/net/html"
)
// Attribute exists so that this package does not export html.Attribute, to allow vendoring of "golang.org/x/net/html".
type Attribute struct {
Namespace, Key, Val string
}
// return the "golang.org/x/net/html" version of a slice of Attribute
func convertAttributes(atts []Attribute) []html.Attribute {
ret := make([]html.Attribute, 0, len(atts))
for _, a := range atts {
ret = append(ret, html.Attribute{
Namespace: a.Namespace,
Key: a.Key,
Val: a.Val,
})
}
return ret
}
// Config describes the way that HTMLdiff works.
type Config struct {
Granularity int // how many letters to put together for a change, if possible
InsertedSpan, DeletedSpan, ReplacedSpan []Attribute // the attributes for the span tags wrapping changes
CleanTags []string // HTML tags to clean from the input
}
// HTMLdiff finds all the differences in the versions of HTML snippits,
// versions[0] is the original, all other versions are the edits to be compared.
// The resulting merged HTML snippits are as many as there are edits to compare.
func (c *Config) HTMLdiff(versions []string) ([]string, error) {
if len(versions) < 2 {
return nil, errors.New("there must be at least two versions to diff, the 0th element is the base")
}
parallelErrors := make(chan error, len(versions))
sourceTrees := make([]*html.Node, len(versions))
sourceTreeRunes := make([]*[]treeRune, len(versions))
firstLeaves := make([]int, len(versions))
for v, vv := range versions {
go func(v int, vv string) {
var err error
sourceTrees[v], err = html.Parse(strings.NewReader(vv))
if err == nil {
tr := make([]treeRune, 0, c.clean(sourceTrees[v]))
sourceTreeRunes[v] = &tr
renderTreeRunes(sourceTrees[v], &tr)
leaf1, ok := firstLeaf(findBody(sourceTrees[v]))
if leaf1 == nil || !ok {
firstLeaves[v] = 0 // could be wrong, but correct for simple examples
} else {
for x, y := range tr {
if y.leaf == leaf1 {
firstLeaves[v] = x
break
}
}
}
}
parallelErrors <- err
}(v, vv)
}
for range versions {
if err := <-parallelErrors; err != nil {
return nil, err
}
}
// now all the input trees are buit, we can do the merge
mergedHTMLs := make([]string, len(versions)-1)
for m := range mergedHTMLs {
go func(m int) {
treeRuneLimit := 250000 // from initial testing
if len(*sourceTreeRunes[0]) > treeRuneLimit || len(*sourceTreeRunes[m+1]) > treeRuneLimit {
parallelErrors <- errors.New("input data too large")
return
}
dd := diffData{a: sourceTreeRunes[0], b: sourceTreeRunes[m+1]}
var changes []diff.Change
ch := make(chan []diff.Change)
go func(ch chan []diff.Change) {
ch <- diff.Diff(len(*sourceTreeRunes[0]), len(*sourceTreeRunes[m+1]), dd)
}(ch)
to := time.After(time.Second * 3)
select {
case <-to:
parallelErrors <- errors.New("diff.Diff() took too long")
go func(ch chan []diff.Change) {
<-ch // make sure the timed-out diff cleans-up
}(ch)
return
case changes = <-ch:
// we have the diff
go func(to <-chan time.Time) {
<-to // make sure we don't leak the timer goroutine
}(to)
}
changes = granular(c.Granularity, dd, changes)
mergedTree, err := c.walkChanges(changes, sourceTreeRunes[0], sourceTreeRunes[m+1], firstLeaves[0], firstLeaves[m+1])
if err != nil {
parallelErrors <- err
return
}
var mergedHTMLbuff bytes.Buffer
err = html.Render(&mergedHTMLbuff, mergedTree)
if err != nil {
parallelErrors <- err
return
}
mergedHTML := mergedHTMLbuff.Bytes()
pfx := []byte("<html><head></head><body>")
sfx := []byte("</body></html>")
if bytes.HasPrefix(mergedHTML, pfx) && bytes.HasSuffix(mergedHTML, sfx) {
mergedHTML = bytes.TrimSuffix(bytes.TrimPrefix(mergedHTML, pfx), sfx)
mergedHTMLs[m] = string(mergedHTML)
parallelErrors <- nil
return
}
parallelErrors <- errors.New("correct render wrapper HTML not found: " + string(mergedHTML))
}(m)
}
for range mergedHTMLs {
if err := <-parallelErrors; err != nil {
return nil, err
}
}
return mergedHTMLs, nil
}
// walkChanges goes through the changes identified by diff, identifies where a change is a repacement,
// then appends the changes to the output set. Once that set is complete, after ctx.flush(),
// they are finally resorted (to re-order those in containers) and written out using ctx.sortAndWrite().
func (c *Config) walkChanges(changes []diff.Change, ap, bp *[]treeRune, aIdx, bIdx int) (*html.Node, error) {
mergedTree, err := html.Parse(strings.NewReader("<html><head></head><body></body></html>"))
if err != nil {
return nil, err
}
a := *ap
b := *bp
ctx := &appendContext{c: c, target: mergedTree}
for _, change := range changes {
for aIdx < change.A && bIdx < change.B {
ctx.append('=', a, aIdx)
aIdx++
bIdx++
}
if change.Del == change.Ins && change.Del > 0 {
for i := 0; i < change.Del; i++ {
if aIdx+i >= len(a) || bIdx+i >= len(b) {
goto textDifferent // defensive after fuzz testing
}
if a[aIdx+i].letter != b[bIdx+i].letter {
goto textDifferent
}
}
for i := 0; i < change.Del; i++ {
ctx.append('~', b, bIdx)
aIdx++
bIdx++
}
goto textSame
}
textDifferent:
for i := 0; i < change.Del; i++ {
ctx.append('-', a, aIdx)
aIdx++
}
for i := 0; i < change.Ins; i++ {
ctx.append('+', b, bIdx)
bIdx++
}
textSame:
}
for aIdx < len(a) && bIdx < len(b) {
ctx.append('=', a, aIdx)
aIdx++
bIdx++
}
ctx.flush()
ctx.sortAndWrite()
return mergedTree, nil
}

369
vendor/github.com/documize/html-diff/htmldiff_test.go generated vendored Normal file
View file

@ -0,0 +1,369 @@
package htmldiff_test
import (
"fmt"
"io/ioutil"
"os"
"runtime"
"strings"
"testing"
"time"
"github.com/documize/html-diff"
)
var cfg = &htmldiff.Config{
Granularity: 6,
InsertedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: palegreen; text-decoration: underline;"}},
DeletedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightpink; text-decoration: line-through;"}},
ReplacedSpan: []htmldiff.Attribute{{Key: "style", Val: "background-color: lightskyblue; text-decoration: overline;"}},
CleanTags: []string{"documize"},
}
type simpleTest struct {
versions, diffs []string
}
var simpleTests = []simpleTest{
{[]string{"chinese中文", `chinese<documize type="field-start"></documize>中文`, "中文", "chinese"},
[]string{"chinese中文",
`<span style="background-color: lightpink; text-decoration: line-through;">chinese</span>中文`,
`chinese<span style="background-color: lightpink; text-decoration: line-through;">中文</span>`}},
{[]string{"hElLo is that documize!", "Hello is that Documize?"},
[]string{`<span style="background-color: lightpink; text-decoration: line-through;">hE</span><span style="background-color: palegreen; text-decoration: underline;">Hel</span>l<span style="background-color: lightpink; text-decoration: line-through;">L</span>o is that <span style="background-color: lightpink; text-decoration: line-through;">d</span><span style="background-color: palegreen; text-decoration: underline;">D</span>ocumize<span style="background-color: lightpink; text-decoration: line-through;">!</span><span style="background-color: palegreen; text-decoration: underline;">?</span>`}},
{[]string{"abc", "<i>abc</i>", "<h1><i>abc</i></h1>"},
[]string{`<i><span style="` + cfg.ReplacedSpan[0].Val + `">abc</span></i>`,
`<h1><i><span style="` + cfg.ReplacedSpan[0].Val + `">abc</span></i></h1>`}},
{[]string{"<p><span>def</span></p>", "def"},
[]string{`<span style="` + cfg.ReplacedSpan[0].Val + `">def</span>`}},
{[]string{`Documize Logo:<img src="http://documize.com/img/documize-logo.png" alt="Documize">`,
"Documize Logo:", `<img src="http://documize.com/img/documize-logo.png" alt="Documize">`},
[]string{`Documize Logo:<span style="background-color: lightpink; text-decoration: line-through;"><img src="http://documize.com/img/documize-logo.png" alt="Documize"/></span>`,
`<span style="background-color: lightpink; text-decoration: line-through;">Documize Logo:</span><img src="http://documize.com/img/documize-logo.png" alt="Documize"/>`}},
{[]string{"<ul><li>1</li><li>2</li><li>3</li></ul>",
"<ul><li>one</li><li>two</li><li>three</li></ul>",
"<ul><li>1</li><li><i>2</i></li><li>3</li><li>4</li></ul>"},
[]string{`<ul><li><span style="background-color: lightpink; text-decoration: line-through;">1</span><span style="background-color: palegreen; text-decoration: underline;">one</span></li><li><span style="background-color: lightpink; text-decoration: line-through;">2</span><span style="background-color: palegreen; text-decoration: underline;">two</span></li><li><span style="background-color: lightpink; text-decoration: line-through;">3</span><span style="background-color: palegreen; text-decoration: underline;">three</span></li></ul>`,
`<ul><li>1</li><li><i><span style="background-color: lightskyblue; text-decoration: overline;">2</span></i></li><li>3</li><li><span style="background-color: palegreen; text-decoration: underline;">4</span></li></ul>`}},
{[]string{doc1 + doc2 + doc3 + doc4, doc1 + doc2 + doc3 + doc4, doc1 + doc3 + doc4, doc1 + "<i>" + doc2 + "</i>" + doc3 + doc4,
doc1 + doc2 + "inserted" + doc3 + doc4, doc1 + doc2 + doc3 + "<div><p>New Div</p></div>" + doc4},
[]string{``,
`<li><span style="background-color: lightpink; text-decoration: line-through;">Automated document formatting</span></li>`,
`<span style="background-color: lightskyblue; text-decoration: overline;">Automated document formatting</span>`,
`<span style="background-color: palegreen; text-decoration: underline;">inserted</span>`,
``}},
{[]string{bbcNews1 + bbcNews2, bbcNews1 + "<div><i>HTML-Diff-Inserted</i></div>" + bbcNews2},
[]string{`<div><i><span style="` + cfg.InsertedSpan[0].Val + `">HTML-Diff-Inserted</span></i></div>`}},
{[]string{`<table border="1" style="width:100%">
<tr>
<td>Jack</td>
<td>and</td>
<td>Jill</td>
</tr>
<tr>
<td>Derby</td>
<td>and</td>
<td>Joan</td>
</tr>
</table>`,
`<table border="1" style="width:100%">
<tr>
<td colspan="1">Jack</td>
<td><b>and</b></td>
<td>Vera</td>
</tr>
<tr>
<td>Derby</td>
<td><i>locomotive</i></td>
<td>works</td>
</tr>
</table>`,
`<table border="1" style="width:100%">
<tr>
<td>Jack</td>
<td>and</td>
<td>Jill</td>
</tr>
<tr>
<td>Samson</td>
<td>and</td>
<td>Delilah</td>
</tr>
<tr>
<td>Derby</td>
<td>and</td>
<td>Joan</td>
</tr>
</table>`,
`<table border="1" style="width:100%">
<tr>
<td>Jack</td>
<td>and</td>
<td>Jill</td>
</tr>
<tr>
<td>Samson</td>
<td>and</td>
<td>Delilah</td>
</tr>
<tr>
<td>Derby</td>
<td>and</td>
<td>Joan</td>
</tr>
<tr>
<td>Tweedledum</td>
<td>and</td>
<td>Tweedledee</td>
</tr>
</table>`, `<div><b><i>...and now for something completely different.</i></b></div>`},
[]string{`<table border="1" style="width:100%;">
<tbody><tr>
<td>Jack</td>
<td><b><span style="background-color: lightskyblue; text-decoration: overline;">and</span></b></td>
<td><span style="background-color: lightpink; text-decoration: line-through;">Jill</span><span style="background-color: palegreen; text-decoration: underline;">Vera</span></td>
</tr>
<tr>
<td>Derby</td>
<td><span style="background-color: lightpink; text-decoration: line-through;">and</span><i><span style="background-color: palegreen; text-decoration: underline;">locomotive</span></i></td>
<td><span style="background-color: lightpink; text-decoration: line-through;">J</span><span style="background-color: palegreen; text-decoration: underline;">w</span>o<span style="background-color: lightpink; text-decoration: line-through;">an</span><span style="background-color: palegreen; text-decoration: underline;">rks</span></td>
</tr>
</tbody></table>`,
`<table border="1" style="width:100%;">
<tbody><tr>
<td>Jack</td>
<td>and</td>
<td>Jill</td>
</tr>
<tr>
<td><span style="background-color: lightpink; text-decoration: line-through;">Derby</span><span style="background-color: palegreen; text-decoration: underline;">Samson</span></td>
<td>and</td>
<td><span style="background-color: lightpink; text-decoration: line-through;">Jo</span><span style="background-color: palegreen; text-decoration: underline;">Delil</span>a<span style="background-color: lightpink; text-decoration: line-through;">n</span><span style="background-color: palegreen; text-decoration: underline;">h</span></td>
</tr>
<span style="background-color: palegreen; text-decoration: underline;"> </span><tr><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Derby</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">and</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Joan</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span></tr><span style="background-color: palegreen; text-decoration: underline;">
</span></tbody></table>`,
`<table border="1" style="width:100%;">
<tbody><tr>
<td>Jack</td>
<td>and</td>
<td>Jill</td>
</tr>
<tr>
<td><span style="background-color: lightpink; text-decoration: line-through;">Derby</span><span style="background-color: palegreen; text-decoration: underline;">Samson</span></td>
<td>and</td>
<td><span style="background-color: lightpink; text-decoration: line-through;">Jo</span><span style="background-color: palegreen; text-decoration: underline;">Delil</span>a<span style="background-color: lightpink; text-decoration: line-through;">n</span><span style="background-color: palegreen; text-decoration: underline;">h</span></td>
</tr>
<span style="background-color: palegreen; text-decoration: underline;"> </span><tr><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Derby</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">and</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Joan</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span></tr><span style="background-color: palegreen; text-decoration: underline;">
</span><tr><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Tweedledum</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">and</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span><td><span style="background-color: palegreen; text-decoration: underline;">Tweedledee</span></td><span style="background-color: palegreen; text-decoration: underline;">
</span></tr><span style="background-color: palegreen; text-decoration: underline;">
</span></tbody></table>`,
`<table border="1" style="width:100%;"><span style="background-color: lightpink; text-decoration: line-through;">
</span><tbody><tr><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">Jack</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">and</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">Jill</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span></tr><span style="background-color: lightpink; text-decoration: line-through;">
</span><tr><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">Derby</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">and</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span><td><span style="background-color: lightpink; text-decoration: line-through;">Joan</span></td><span style="background-color: lightpink; text-decoration: line-through;">
</span></tr><span style="background-color: lightpink; text-decoration: line-through;">
</span></tbody></table><div><b><i><span style="background-color: palegreen; text-decoration: underline;">...and now for something completely different.</span></i></b></div>`}},
{[]string{"", `<ul><li>A</li><li>B</li><li>C</li></ul>`},
[]string{`<ul><li><span style="background-color: palegreen; text-decoration: underline;">A</span></li><li><span style="background-color: palegreen; text-decoration: underline;">B</span></li><li><span style="background-color: palegreen; text-decoration: underline;">C</span></li></ul>`}},
{[]string{`<p style="">The following typographical conventions are used in this Standard:</p><div style="padding-left:30px;text-indent:-10px">&bull; The first occurrence of a new term is written in italics. [<i>Example</i>: &#8230; is considered <i>normative</i>. <i>end example</i>]</div><div style="padding-left:30px;text-indent:-10px">&bull; A term defined as a basic definition is written in bold. [<i>Example</i>: <b>behavior</b> &#8212; External &#8230; <i>end example</i>]</div><div style="padding-left:30px;text-indent:-10px">&bull; The name of an XML element is written using an Element style. [<i>Example</i>: The root element is document.<i> end example</i>]</div><div style="padding-left:30px;text-indent:-10px">&bull; The name of an XML element attribute is written using an Attribute style. [<i>Example</i>: &#8230; an id attribute.<i> end example</i>]</div><div style="padding-left:30px;text-indent:-10px">&bull; An XML element attribute value is written using a constant-width style. [<i>Example</i>: &#8230; value of CommentReference.<i> end example</i>]</div><div style="padding-left:30px;text-indent:-10px">&bull; An XML element type name is written using a Type style. [<i>Example</i>: &#8230; as values of the xsd:anyURI data type.<i> end example</i>]</div>`,
`<p>The following typographical conventions are used in this Standard:</p>
<div style="padding-left: 30px; text-indent: -10px;"> The first occurrence of a new term is written in italics. [<i>Example</i>: is considered <i>normative</i>. <i>end example</i>]</div>
<div style="padding-left: 30px; text-indent: -10px;"> A term defined as a basic definition is written in bold. [<i>Example</i>: <b>behavior</b> <b>External</b> <i>end example</i>]</div>
<div style="padding-left: 30px; text-indent: -10px;"> The name of an XML element attribute is written using an Attribute style. [<i>Example</i>: an id attribute.<i> end example</i>]</div>
<div style="padding-left: 30px; text-indent: -10px;"> And here is another entry in the list!</div>
<div style="padding-left: 30px; text-indent: -10px;"> An XML element attribute value is written using a constant-width style. [<i>Example</i>: value of CommentReference.<i> end example</i>]</div>
<div style="padding-left: 30px; text-indent: -10px;"> An XML element type name is written using a Type style. [<i>Example</i>: as values of the xsd:anyURI data type.<i> end example</i>]</div>
<div style="padding-left: 30px; text-indent: -10px;"> </div>
<div style="padding-left: 30px; text-indent: -10px;">elephant.</div>`},
[]string{`<p>The following typographical conventions are used in this Standard:</p><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"> The first occurrence of a new term is written in italics. [<i>Example</i>: is considered <i>normative</i>. <i>end example</i>]</div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"> A term defined as a basic definition is written in bold. [<i>Example</i>: <b>behavior</b> <b><span style="background-color: lightskyblue; text-decoration: overline;">External</span></b> <i>end example</i>]</div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"> The name of an XML element <span style="background-color: lightpink; text-decoration: line-through;">is written using </span>a<span style="background-color: lightpink; text-decoration: line-through;">n Element style. [</span><i><span style="background-color: lightpink; text-decoration: line-through;">Example</span></i><span style="background-color: lightpink; text-decoration: line-through;">: The </span><span style="background-color: palegreen; text-decoration: underline;">tt</span>r<span style="background-color: lightpink; text-decoration: line-through;">oot element </span>i<span style="background-color: lightpink; text-decoration: line-through;">s document.</span><i><span style="background-color: lightpink; text-decoration: line-through;"> end example</span></i><span style="background-color: lightpink; text-decoration: line-through;">]</span><span style="background-color: lightpink; text-decoration: line-through;"> The name of an XML element attri</span>bute is written using an Attribute style. [<i>Example</i>: an id attribute.<i> end example</i>]</div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"> An<span style="background-color: palegreen; text-decoration: underline;">d</span> <span style="background-color: palegreen; text-decoration: underline;">here is another entry in the list!</span></div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"><span style="background-color: palegreen; text-decoration: underline;"> An </span>XML element attribute value is written using a constant-width style. [<i>Example</i>: value of CommentReference.<i> end example</i>]</div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"> An XML element type name is written using a Type style. [<i>Example</i>: as values of the xsd:anyURI data type.<i> end example</i>]</div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"><span style="background-color: palegreen; text-decoration: underline;"> </span></div><span style="background-color: palegreen; text-decoration: underline;">
</span><div style="padding-left:30px;text-indent:-10px;"><span style="background-color: palegreen; text-decoration: underline;">elephant.</span></div>`}},
{[]string{`
<p>The Graph that Generates Stories</p>
<p>StoryGraph is a graph designed to generate and narrate the causal interactions between things in a world. The graph can be populated with entities and expressive rules about the interactions between specific entities or different classes of entities. General rules can create new entities in the graph populated with the specific entities that triggered the rule and attributes defined by those entities. Entities have lifetimes and (coming soon) behaviors that trigger events over time.</p>
<p>Story graph is inspired by <a href="http://www.cs.cmu.edu/~cmartens/thesis/">progamming interactive worlds with linear logic</a> by <a href="http://www.cs.cmu.edu/~cmartens/index.html">Chris Martens</a> although it doesn&#8217;t realize any of the specific principles she develops in that thesis.</p>
<p>There is a more or less fleshed out example in ./example.js that produces sometimes surreal interactions in a snowy forest. To run that example, clone the repo and run the example directly with node.js:</p>
<pre><code class="language-shell">$ node example.js
</code></pre>
<p>You will see output something like this:</p>
<pre><code>The river joins with the shadow for a moment. The river does a whirling dance with the shadow. A bluejay discovers the river dancing with the shadow. A bluejay observes the patterns of the river dancing with the shadow. A bluejay dwells in the stillness of life. A duck approaches the whisper. A duck and the whisper pass eachother quietly.
</code></pre>
<p>This project is licensed under the terms of the MIT license.</p>
<p>##Types</p>
<p>The first step is to define types. While it is possible to make rules that apply to specific things, you&#8217;ll probably want to make general rules that apply to classes of things. First the basics:</p>
`, `<p>The Graph that Generates Stories</p>
<p>StoryGraph is a graph designed to generate and narrate the causal interactions between things in a world. The graph can be populated with entities and expressive rules about the interactions between specific entities or different classes of entities. General rules can create new entities in the graph populated with the specific entities that triggered the rule and attributes defined by those entities. Entities have lifetimes and (coming soon) behaviors that trigger events over time.</p>
<p>Story graph is inspired by <a href="http://www.cs.cmu.edu/~cmartens/thesis/">progamming interactive worlds with linear logic</a> by <a href="http://www.cs.cmu.edu/~cmartens/index.html">Chris Martens</a> although it doesnt realize any of the specific principles she develops in that thesis.</p>
<p>There is a more or less fleshed out example in ./example.js that produces sometimes surreal interactions in a snowy forest. To run that example, clone the repo and run the example directly with node.js:</p>
<pre><code class="language-shell">$ node example.js
</code></pre>
<p>Elliott has input another line.</p>
<p>You will see output something like this:</p>
<pre><code>The river joins with the shadow for a moment. The river does a whirling dance with the shadow. A bluejay discovers the river dancing with the shadow. A bluejay observes the patterns of the river dancing with the shadow. A bluejay dwells in the stillness of life. A duck approaches the whisper. A duck and the whisper pass eachother quietly.
</code></pre>
<p>This project is licensed under the terms of the MIT license.</p>
<p>##Types</p>
<p>The first step is to define types. While it is possible to make rules that apply to specific things, youll probably want to make general rules that apply to classes of things. First the basics:</p>`},
[]string{`<p>The Graph that Generates Stories</p>
<span style="background-color: lightpink; text-decoration: line-through;">
</span><p>StoryGraph is a graph designed to generate and narrate the causal interactions between things in a world. The graph can be populated with entities and expressive rules about the interactions between specific entities or different classes of entities. General rules can create new entities in the graph populated with the specific entities that triggered the rule and attributes defined by those entities. Entities have lifetimes and (coming soon) behaviors that trigger events over time.</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>
<p>Story graph is inspired by <a href="http://www.cs.cmu.edu/~cmartens/thesis/">progamming interactive worlds with linear logic</a> by <a href="http://www.cs.cmu.edu/~cmartens/index.html">Chris Martens</a> although it doesnt realize any of the specific principles she develops in that thesis.</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>
<p>There is a more or less fleshed out example in ./example.js that produces sometimes surreal interactions in a snowy forest. To run that example, clone the repo and run the example directly with node.js:</p>
<span style="background-color: lightpink; text-decoration: line-through;">
</span><pre><code class="language-shell">$ node example.js
</code></pre>
<p><span style="background-color: palegreen; text-decoration: underline;">Elliott has input another line.</span></p>
<p>You will see output something like this:</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>
<pre><code>The river joins with the shadow for a moment. The river does a whirling dance with the shadow. A bluejay discovers the river dancing with the shadow. A bluejay observes the patterns of the river dancing with the shadow. A bluejay dwells in the stillness of life. A duck approaches the whisper. A duck and the whisper pass eachother quietly.
</code></pre>
<span style="background-color: lightpink; text-decoration: line-through;">
</span><p>This project is licensed under the terms of the MIT license.</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>
<p>##Types</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>
<p>The first step is to define types. While it is possible to make rules that apply to specific things, youll probably want to make general rules that apply to classes of things. First the basics:</p><span style="background-color: lightpink; text-decoration: line-through;">
</span>`}},
}
func TestSimple(t *testing.T) {
for s, st := range simpleTests {
res, err := cfg.HTMLdiff(st.versions)
if err != nil {
t.Errorf("Simple test %d had error %v", s, err)
}
for d := range st.diffs {
if d < len(res) {
fn := fmt.Sprintf("testout/simple%d-%d.html", s, d)
err := ioutil.WriteFile(fn, []byte(res[d]), 0777)
if err != nil {
t.Error(err)
}
if !strings.Contains(res[d], st.diffs[d]) {
if len(res[d]) < 1024 {
t.Errorf("Simple test %d diff %d wanted: `%s` got: `%s`", s, d, st.diffs[d], res[d])
} else {
t.Errorf("Simple test %d diff %d failed see file: `%s`", s, d, fn)
}
}
}
}
}
}
func TestTimeoutAndMemory(t *testing.T) {
dir := "." + string(os.PathSeparator) + "testin"
files, err := ioutil.ReadDir(dir)
if err != nil {
t.Fatal(err)
}
testHTML := make([]string, 0, len(files))
names := make([]string, 0, len(files))
for _, file := range files {
fn := file.Name()
if strings.HasSuffix(fn, ".html") {
ffn := dir + string(os.PathSeparator) + fn
dat, err := ioutil.ReadFile(ffn)
if err != nil {
t.Fatal(err)
}
testHTML = append(testHTML, string(dat))
names = append(names, fn)
}
}
fmt.Println("NOTE: this test may take a few minutes to run")
var ms runtime.MemStats
var alloc1 uint64
goroutineCount1 := 2 // the number of goroutines in a quiet state, more if test flags are used
for i := 0; i < 2; i++ {
testToMem(testHTML, names, t)
limit := 60
var goroutineCount, secs int
for secs = 1; secs <= limit; secs++ {
time.Sleep(time.Second) // wait for the timeout goroutines to finish
goroutineCount, _ = runtime.GoroutineProfile(nil)
if goroutineCount == goroutineCount1 {
goto correctGoroutines
}
}
t.Error(fmt.Sprintln("after ", secs, "seconds, num goroutines", goroutineCount, "when should be", goroutineCount1))
correctGoroutines:
runtime.GC()
runtime.ReadMemStats(&ms)
if alloc1 == 0 {
alloc1 = ms.Alloc // this is set here to allow for static data set-up by 1st pass through
fmt.Println("NOTE: base case established in", secs, "seconds. Memory used=", alloc1)
} else {
increase := (100.0 * float64(ms.Alloc) / float64(alloc1)) - 100.0
if increase > 0.2 { // %
t.Error(fmt.Sprintln("run", i, "memory usage changed from", alloc1, "to", ms.Alloc, "increase:", ms.Alloc-alloc1, "which is", increase, "%"))
}
}
}
}
func testToMem(testHTML, names []string, t *testing.T) {
for f := range testHTML {
args := []string{testHTML[f], strings.ToLower(testHTML[f])}
_, err := cfg.HTMLdiff(args) // don't care about the result as we are looking for crashes and time-outs
if err != nil {
if names[f] != "google.html" && names[f] != "bing.html" { // we expect errors on these two
t.Errorf("comparing %s with its lower-case self error: %s", names[f], err)
}
}
}
}

68
vendor/github.com/documize/html-diff/nodes.go generated vendored Normal file
View file

@ -0,0 +1,68 @@
package htmldiff
import (
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
func copyNode(to, from *html.Node) {
to.Attr = from.Attr
to.Data = from.Data
to.DataAtom = from.DataAtom
to.Namespace = from.Namespace
to.Type = from.Type
}
func nodeEqual(base, comp *html.Node) bool {
return base.Data == comp.Data && nodeEqualExText(base, comp)
}
// findBody finds the first body HTML node if it exists in the tree. Required to bypass the page title text.
func findBody(n *html.Node) *html.Node {
if n.Type == html.ElementNode && n.DataAtom == atom.Body {
return n
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
r := findBody(c)
if r != nil {
return r
}
}
return nil
}
// find the first leaf in the tree that is a text node.
func firstLeaf(n *html.Node) (*html.Node, bool) {
if n != nil {
switch n.Type {
case html.TextNode:
return n, true
}
// no valid node found
for c := n.FirstChild; c != nil; c = c.NextSibling {
r, ok := firstLeaf(c)
if ok {
return r, ok
}
}
}
return nil, false
}
// find if this or any parent is a container element where position is important like a list or table.
func inContainer(n *html.Node) bool {
if n == nil {
return false
}
if n.Type == html.ElementNode {
switch n.DataAtom {
case atom.Body:
return false
case atom.Td, atom.Th, atom.Tr, atom.Tbody, atom.Thead, atom.Tfoot,
atom.Table, atom.Caption, atom.Colgroup, atom.Col, // tables
atom.Li, atom.Ul, atom.Ol: // lists
return true
}
}
return inContainer(n.Parent)
}

47
vendor/github.com/documize/html-diff/pos.go generated vendored Normal file
View file

@ -0,0 +1,47 @@
package htmldiff
import "golang.org/x/net/html"
// posTT gives the relative position within one level of a nested container.
type posTT struct {
nodesBefore int
node *html.Node
}
// posT gives the relative position within a nested set of containers.
type posT []posTT
// getPos returns the relative posion of this node within the enclosing containers, if there are any.
func getPos(n *html.Node) posT {
if n == nil {
return nil
}
depth := 0
for root := n; inContainer(root); root = root.Parent {
depth++
}
ret := make([]posTT, 0, depth) // for speed
for root := n; depth > 0; root = root.Parent {
var before int
for sib := root.Parent.FirstChild; sib != root; sib = sib.NextSibling {
if sib.Type == html.ElementNode {
before++
}
}
ret = append(ret, posTT{before, root})
depth--
}
return ret
}
func posEqual(a, b posT) bool {
if len(a) != len(b) {
return false
}
for i, aa := range a {
if aa.nodesBefore != b[i].nodesBefore {
return false
}
}
return true
}

2589
vendor/github.com/documize/html-diff/testin/bbc.html generated vendored Normal file

File diff suppressed because one or more lines are too long

57
vendor/github.com/documize/html-diff/testin/bing.html generated vendored Normal file

File diff suppressed because one or more lines are too long

1261
vendor/github.com/documize/html-diff/testin/google.html generated vendored Normal file

File diff suppressed because one or more lines are too long

1749
vendor/github.com/documize/html-diff/testin/reuters.html generated vendored Normal file

File diff suppressed because it is too large Load diff

124
vendor/github.com/documize/html-diff/treerunes.go generated vendored Normal file
View file

@ -0,0 +1,124 @@
package htmldiff
import (
"github.com/mb0/diff"
"golang.org/x/net/html"
)
// treeRune holds an individual rune in the HTML along with the node it is in and, for convienience, its position (if in a container).
type treeRune struct {
leaf *html.Node
letter rune
pos posT
}
// diffData is a type that exists in order to provide a diff.Data interface. It holds the two sets of treeRunes to difference.
type diffData struct {
a, b *[]treeRune
}
// Equal exists to fulfill the diff.Data interface.
// NOTE: this is usually the most called function in the package!
func (dd diffData) Equal(i, j int) bool {
if (*dd.a)[i].letter != (*dd.b)[j].letter {
return false
}
if !posEqual((*dd.a)[i].pos, (*dd.b)[j].pos) {
return false
}
return nodeBranchesEqual((*dd.a)[i].leaf, (*dd.b)[j].leaf)
}
// nodeBranchesEqual checks that two leaves come from branches that can be compared.
func nodeBranchesEqual(leafA, leafB *html.Node) bool {
if !nodeEqualExText(leafA, leafB) {
return false
}
if leafA.Parent == nil && leafB.Parent == nil {
return true // at the top of the tree
}
if leafA.Parent != nil && leafB.Parent != nil {
return nodeEqualExText(leafA.Parent, leafB.Parent) // go up to the next level
}
return false // one of the leaves has a parent, the other does not
}
// attrEqual checks that the attributes of two nodes are the same.
func attrEqual(base, comp *html.Node) bool {
if len(comp.Attr) != len(base.Attr) {
return false
}
for a := range comp.Attr {
if comp.Attr[a].Key != base.Attr[a].Key ||
comp.Attr[a].Namespace != base.Attr[a].Namespace ||
comp.Attr[a].Val != base.Attr[a].Val {
return false
}
}
return true
}
// compares nodes excluding their text
func nodeEqualExText(base, comp *html.Node) bool {
if comp.DataAtom != base.DataAtom ||
comp.Namespace != base.Namespace ||
comp.Type != base.Type {
return false
}
return attrEqual(base, comp)
}
// renders a tree of nodes into a slice of treeRunes.
func renderTreeRunes(n *html.Node, tr *[]treeRune) {
p := getPos(n)
if n.FirstChild == nil { // it is a leaf node
switch n.Type {
case html.TextNode:
if len(n.Data) == 0 {
*tr = append(*tr, treeRune{leaf: n, letter: '\u200b' /* zero-width space */, pos: p}) // make sure we catch the node, even if no data
} else {
for _, r := range []rune(n.Data) {
*tr = append(*tr, treeRune{leaf: n, letter: r, pos: p})
}
}
default:
*tr = append(*tr, treeRune{leaf: n, letter: 0, pos: p})
}
} else {
for c := n.FirstChild; c != nil; c = c.NextSibling {
renderTreeRunes(c, tr)
}
}
}
// wrapper for diff.Granular() -- should only concatanate changes for similar text nodes
func granular(gran int, dd diffData, changes []diff.Change) []diff.Change {
ret := make([]diff.Change, 0, len(changes))
startSame := 0
changeCount := 0
lastAleaf, lastBleaf := (*dd.a)[0].leaf, (*dd.b)[0].leaf
for c, cc := range changes {
if cc.A < len(*dd.a) && cc.B < len(*dd.b) &&
lastAleaf.Type == html.TextNode && lastBleaf.Type == html.TextNode &&
(*dd.a)[cc.A].leaf == lastAleaf && (*dd.b)[cc.B].leaf == lastBleaf &&
nodeEqualExText(lastAleaf, lastBleaf) { // TODO is this last constraint required?
// do nothing yet, queue it up until there is a difference
changeCount++
} else { // no match
if changeCount > 0 { // flush
ret = append(ret, diff.Granular(gran, changes[startSame:startSame+changeCount])...)
}
ret = append(ret, cc)
startSame = c + 1 // the one after this
changeCount = 0
if cc.A < len(*dd.a) && cc.B < len(*dd.b) {
lastAleaf, lastBleaf = (*dd.a)[cc.A].leaf, (*dd.b)[cc.B].leaf
}
}
}
if changeCount > 0 { // flush
ret = append(ret, diff.Granular(gran, changes[startSame:])...)
}
return ret
}

View file

@ -0,0 +1,22 @@
Copyright (c) 2012 Martin Schnabel. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,34 @@
diff
====
A difference algorithm package for go.
The algorithm is described by Eugene Myers in
["An O(ND) Difference Algorithm and its Variations"](http://www.xmailserver.org/diff2.pdf).
Example
-------
You can use diff.Ints, diff.Runes, diff.ByteStrings, and diff.Bytes
diff.Runes([]rune("sögen"), []rune("mögen")) // returns []Changes{{0,0,1,1}}
or you can implement diff.Data
type MixedInput struct {
A []int
B []string
}
func (m *MixedInput) Equal(i, j int) bool {
return m.A[i] == len(m.B[j])
}
and call
m := &MixedInput{..}
diff.Diff(len(m.A), len(m.B), m)
Also has granularity functions to merge changes that are close by.
diff.Granular(1, diff.ByteStrings("emtire", "umpire")) // returns []Changes{{0,0,3,3}}
Documentation at http://godoc.org/github.com/mb0/diff

View file

@ -0,0 +1,222 @@
// Copyright 2012 Martin Schnabel. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package diff implements a difference algorithm.
// The algorithm is described in "An O(ND) Difference Algorithm and its Variations", Eugene Myers, Algorithmica Vol. 1 No. 2, 1986, pp. 251-266.
package diff
// A type that satisfies diff.Data can be diffed by this package.
// It typically has two sequences A and B of comparable elements.
type Data interface {
// Equal returns whether the elements at i and j are considered equal.
Equal(i, j int) bool
}
// ByteStrings returns the differences of two strings in bytes.
func ByteStrings(a, b string) []Change {
return Diff(len(a), len(b), &strings{a, b})
}
type strings struct{ a, b string }
func (d *strings) Equal(i, j int) bool { return d.a[i] == d.b[j] }
// Bytes returns the difference of two byte slices
func Bytes(a, b []byte) []Change {
return Diff(len(a), len(b), &bytes{a, b})
}
type bytes struct{ a, b []byte }
func (d *bytes) Equal(i, j int) bool { return d.a[i] == d.b[j] }
// Ints returns the difference of two int slices
func Ints(a, b []int) []Change {
return Diff(len(a), len(b), &ints{a, b})
}
type ints struct{ a, b []int }
func (d *ints) Equal(i, j int) bool { return d.a[i] == d.b[j] }
// Runes returns the difference of two rune slices
func Runes(a, b []rune) []Change {
return Diff(len(a), len(b), &runes{a, b})
}
type runes struct{ a, b []rune }
func (d *runes) Equal(i, j int) bool { return d.a[i] == d.b[j] }
// Granular merges neighboring changes smaller than the specified granularity.
// The changes must be ordered by ascending positions as returned by this package.
func Granular(granularity int, changes []Change) []Change {
if len(changes) == 0 {
return changes
}
gap := 0
for i := 1; i < len(changes); i++ {
curr := changes[i]
prev := changes[i-gap-1]
// same as curr.B-(prev.B+prev.Ins); consistency is key
if curr.A-(prev.A+prev.Del) <= granularity {
// merge changes:
curr = Change{
A: prev.A, B: prev.B, // start at same spot
Del: curr.A - prev.A + curr.Del, // from first to end of second
Ins: curr.B - prev.B + curr.Ins, // from first to end of second
}
gap++
}
changes[i-gap] = curr
}
return changes[:len(changes)-gap]
}
// Diff returns the differences of data.
// data.Equal is called repeatedly with 0<=i<n and 0<=j<m
func Diff(n, m int, data Data) []Change {
c := &context{data: data}
if n > m {
c.flags = make([]byte, n)
} else {
c.flags = make([]byte, m)
}
c.max = n + m + 1
c.compare(0, 0, n, m)
return c.result(n, m)
}
// A Change contains one or more deletions or inserts
// at one position in two sequences.
type Change struct {
A, B int // position in input a and b
Del int // delete Del elements from input a
Ins int // insert Ins elements from input b
}
type context struct {
data Data
flags []byte // element bits 1 delete, 2 insert
max int
// forward and reverse d-path endpoint x components
forward, reverse []int
}
func (c *context) compare(aoffset, boffset, alimit, blimit int) {
// eat common prefix
for aoffset < alimit && boffset < blimit && c.data.Equal(aoffset, boffset) {
aoffset++
boffset++
}
// eat common suffix
for alimit > aoffset && blimit > boffset && c.data.Equal(alimit-1, blimit-1) {
alimit--
blimit--
}
// both equal or b inserts
if aoffset == alimit {
for boffset < blimit {
c.flags[boffset] |= 2
boffset++
}
return
}
// a deletes
if boffset == blimit {
for aoffset < alimit {
c.flags[aoffset] |= 1
aoffset++
}
return
}
x, y := c.findMiddleSnake(aoffset, boffset, alimit, blimit)
c.compare(aoffset, boffset, x, y)
c.compare(x, y, alimit, blimit)
}
func (c *context) findMiddleSnake(aoffset, boffset, alimit, blimit int) (int, int) {
// midpoints
fmid := aoffset - boffset
rmid := alimit - blimit
// correct offset in d-path slices
foff := c.max - fmid
roff := c.max - rmid
isodd := (rmid-fmid)&1 != 0
maxd := (alimit - aoffset + blimit - boffset + 2) / 2
// allocate when first used
if c.forward == nil {
c.forward = make([]int, 2*c.max)
c.reverse = make([]int, 2*c.max)
}
c.forward[c.max+1] = aoffset
c.reverse[c.max-1] = alimit
var x, y int
for d := 0; d <= maxd; d++ {
// forward search
for k := fmid - d; k <= fmid+d; k += 2 {
if k == fmid-d || k != fmid+d && c.forward[foff+k+1] > c.forward[foff+k-1] {
x = c.forward[foff+k+1] // down
} else {
x = c.forward[foff+k-1] + 1 // right
}
y = x - k
for x < alimit && y < blimit && c.data.Equal(x, y) {
x++
y++
}
c.forward[foff+k] = x
if isodd && k > rmid-d && k < rmid+d {
if c.reverse[roff+k] <= c.forward[foff+k] {
return x, x - k
}
}
}
// reverse search x,y correspond to u,v
for k := rmid - d; k <= rmid+d; k += 2 {
if k == rmid+d || k != rmid-d && c.reverse[roff+k-1] < c.reverse[roff+k+1] {
x = c.reverse[roff+k-1] // up
} else {
x = c.reverse[roff+k+1] - 1 // left
}
y = x - k
for x > aoffset && y > boffset && c.data.Equal(x-1, y-1) {
x--
y--
}
c.reverse[roff+k] = x
if !isodd && k >= fmid-d && k <= fmid+d {
if c.reverse[roff+k] <= c.forward[foff+k] {
// lookup opposite end
x = c.forward[foff+k]
return x, x - k
}
}
}
}
panic("should never be reached")
}
func (c *context) result(n, m int) (res []Change) {
var x, y int
for x < n || y < m {
if x < n && y < m && c.flags[x]&1 == 0 && c.flags[y]&2 == 0 {
x++
y++
} else {
a := x
b := y
for x < n && (y >= m || c.flags[x]&1 != 0) {
x++
}
for y < m && (x >= n || c.flags[y]&2 != 0) {
y++
}
if a < x || b < y {
res = append(res, Change{a, b, x - a, y - b})
}
}
}
return
}

View file

@ -0,0 +1,239 @@
// Copyright 2012 Martin Schnabel. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff_test
import (
"github.com/mb0/diff"
"testing"
)
type testcase struct {
name string
a, b []int
res []diff.Change
}
var tests = []testcase{
{"shift",
[]int{1, 2, 3},
[]int{0, 1, 2, 3},
[]diff.Change{{0, 0, 0, 1}},
},
{"push",
[]int{1, 2, 3},
[]int{1, 2, 3, 4},
[]diff.Change{{3, 3, 0, 1}},
},
{"unshift",
[]int{0, 1, 2, 3},
[]int{1, 2, 3},
[]diff.Change{{0, 0, 1, 0}},
},
{"pop",
[]int{1, 2, 3, 4},
[]int{1, 2, 3},
[]diff.Change{{3, 3, 1, 0}},
},
{"all changed",
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
[]int{10, 11, 12, 13, 14},
[]diff.Change{
{0, 0, 10, 5},
},
},
{"all same",
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
[]int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
[]diff.Change{},
},
{"wrap",
[]int{1},
[]int{0, 1, 2, 3},
[]diff.Change{
{0, 0, 0, 1},
{1, 2, 0, 2},
},
},
{"snake",
[]int{0, 1, 2, 3, 4, 5},
[]int{1, 2, 3, 4, 5, 6},
[]diff.Change{
{0, 0, 1, 0},
{6, 5, 0, 1},
},
},
// note: input is ambiguous
// first two traces differ from fig.1
// it still is a lcs and ses path
{"paper fig. 1",
[]int{1, 2, 3, 1, 2, 2, 1},
[]int{3, 2, 1, 2, 1, 3},
[]diff.Change{
{0, 0, 1, 1},
{2, 2, 1, 0},
{5, 4, 1, 0},
{7, 5, 0, 1},
},
},
}
func TestDiffAB(t *testing.T) {
for _, test := range tests {
res := diff.Ints(test.a, test.b)
if len(res) != len(test.res) {
t.Error(test.name, "expected length", len(test.res), "for", res)
continue
}
for i, c := range test.res {
if c != res[i] {
t.Error(test.name, "expected ", c, "got", res[i])
}
}
}
}
func TestDiffBA(t *testing.T) {
// interesting: fig.1 Diff(b, a) results in the same path as `diff -d a b`
tests[len(tests)-1].res = []diff.Change{
{0, 0, 2, 0},
{3, 1, 1, 0},
{5, 2, 0, 1},
{7, 5, 0, 1},
}
for _, test := range tests {
res := diff.Ints(test.b, test.a)
if len(res) != len(test.res) {
t.Error(test.name, "expected length", len(test.res), "for", res)
continue
}
for i, c := range test.res {
// flip change data also
rc := diff.Change{c.B, c.A, c.Ins, c.Del}
if rc != res[i] {
t.Error(test.name, "expected ", rc, "got", res[i])
}
}
}
}
func diffsEqual(a, b []diff.Change) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i] != b[i] {
return false
}
}
return true
}
func TestGranularStrings(t *testing.T) {
a := "abcdefghijklmnopqrstuvwxyza"
b := "AbCdeFghiJklmnOpqrstUvwxyzab"
// each iteration of i increases granularity and will absorb one more lower-letter-followed-by-upper-letters sequence
changesI := [][]diff.Change{
{{0, 0, 1, 1}, {2, 2, 1, 1}, {5, 5, 1, 1}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
{{0, 0, 3, 3}, {5, 5, 1, 1}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
{{0, 0, 6, 6}, {9, 9, 1, 1}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
{{0, 0, 10, 10}, {14, 14, 1, 1}, {20, 20, 1, 1}, {27, 27, 0, 1}},
{{0, 0, 15, 15}, {20, 20, 1, 1}, {27, 27, 0, 1}},
{{0, 0, 21, 21}, {27, 27, 0, 1}},
{{0, 0, 27, 28}},
}
for i := 0; i < len(changesI); i++ {
diffs := diff.Granular(i, diff.ByteStrings(a, b))
if !diffsEqual(diffs, changesI[i]) {
t.Errorf("expected %v, got %v", diffs, changesI[i])
}
}
}
func TestDiffRunes(t *testing.T) {
a := []rune("brown fox jumps over the lazy dog")
b := []rune("brwn faax junps ovver the lay dago")
res := diff.Runes(a, b)
echange := []diff.Change{
{2, 2, 1, 0},
{7, 6, 1, 2},
{12, 12, 1, 1},
{18, 18, 0, 1},
{27, 28, 1, 0},
{31, 31, 0, 2},
{32, 34, 1, 0},
}
for i, c := range res {
t.Log(c)
if c != echange[i] {
t.Error("expected", echange[i], "got", c)
}
}
}
func TestDiffByteStrings(t *testing.T) {
a := "brown fox jumps over the lazy dog"
b := "brwn faax junps ovver the lay dago"
res := diff.ByteStrings(a, b)
echange := []diff.Change{
{2, 2, 1, 0},
{7, 6, 1, 2},
{12, 12, 1, 1},
{18, 18, 0, 1},
{27, 28, 1, 0},
{31, 31, 0, 2},
{32, 34, 1, 0},
}
for i, c := range res {
t.Log(c)
if c != echange[i] {
t.Error("expected", echange[i], "got", c)
}
}
}
type ints struct{ a, b []int }
func (d *ints) Equal(i, j int) bool { return d.a[i] == d.b[j] }
func BenchmarkDiff(b *testing.B) {
t := tests[len(tests)-1]
d := &ints{t.a, t.b}
n, m := len(d.a), len(d.b)
for i := 0; i < b.N; i++ {
diff.Diff(n, m, d)
}
}
func BenchmarkInts(b *testing.B) {
t := tests[len(tests)-1]
d1 := t.a
d2 := t.b
for i := 0; i < b.N; i++ {
diff.Ints(d1, d2)
}
}
func BenchmarkDiffRunes(b *testing.B) {
d1 := []rune("1231221")
d2 := []rune("321213")
for i := 0; i < b.N; i++ {
diff.Runes(d1, d2)
}
}
func BenchmarkDiffBytes(b *testing.B) {
d1 := []byte("lorem ipsum dolor sit amet consectetur")
d2 := []byte("lorem lovesum daenerys targaryen ami consecteture")
for i := 0; i < b.N; i++ {
diff.Bytes(d1, d2)
}
}
func BenchmarkDiffByteStrings(b *testing.B) {
d1 := "lorem ipsum dolor sit amet consectetur"
d2 := "lorem lovesum daenerys targaryen ami consecteture"
for i := 0; i < b.N; i++ {
diff.ByteStrings(d1, d2)
}
}

View file

@ -0,0 +1,57 @@
// Copyright 2012 Martin Schnabel. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package diff_test
import (
"fmt"
"github.com/mb0/diff"
)
// Diff on inputs with different representations
type MixedInput struct {
A []int
B []string
}
var names map[string]int
func (m *MixedInput) Equal(a, b int) bool {
return m.A[a] == names[m.B[b]]
}
func ExampleDiff() {
names = map[string]int{
"one": 1,
"two": 2,
"three": 3,
}
m := &MixedInput{
[]int{1, 2, 3, 1, 2, 2, 1},
[]string{"three", "two", "one", "two", "one", "three"},
}
changes := diff.Diff(len(m.A), len(m.B), m)
for _, c := range changes {
fmt.Println("change at", c.A, c.B)
}
// Output:
// change at 0 0
// change at 2 2
// change at 5 4
// change at 7 5
}
func ExampleGranular() {
a := "hElLo!"
b := "hello!"
changes := diff.Granular(5, diff.ByteStrings(a, b)) // ignore small gaps in differences
for l := len(changes) - 1; l >= 0; l-- {
change := changes[l]
b = b[:change.B] + "|" + b[change.B:change.B+change.Ins] + "|" + b[change.B+change.Ins:]
}
fmt.Println(b)
// Output:
// h|ell|o!
}

View file

@ -0,0 +1,78 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package atom provides integer codes (also known as atoms) for a fixed set of
// frequently occurring HTML strings: tag names and attribute keys such as "p"
// and "id".
//
// Sharing an atom's name between all elements with the same tag can result in
// fewer string allocations when tokenizing and parsing HTML. Integer
// comparisons are also generally faster than string comparisons.
//
// The value of an atom's particular code is not guaranteed to stay the same
// between versions of this package. Neither is any ordering guaranteed:
// whether atom.H1 < atom.H2 may also change. The codes are not guaranteed to
// be dense. The only guarantees are that e.g. looking up "div" will yield
// atom.Div, calling atom.Div.String will return "div", and atom.Div != 0.
package atom // import "golang.org/x/net/html/atom"
// Atom is an integer code for a string. The zero value maps to "".
type Atom uint32
// String returns the atom's name.
func (a Atom) String() string {
start := uint32(a >> 8)
n := uint32(a & 0xff)
if start+n > uint32(len(atomText)) {
return ""
}
return atomText[start : start+n]
}
func (a Atom) string() string {
return atomText[a>>8 : a>>8+a&0xff]
}
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s []byte) uint32 {
for i := range s {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
func match(s string, t []byte) bool {
for i, c := range t {
if s[i] != c {
return false
}
}
return true
}
// Lookup returns the atom whose name is s. It returns zero if there is no
// such atom. The lookup is case sensitive.
func Lookup(s []byte) Atom {
if len(s) == 0 || len(s) > maxAtomLen {
return 0
}
h := fnv(hash0, s)
if a := table[h&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
if a := table[(h>>16)&uint32(len(table)-1)]; int(a&0xff) == len(s) && match(a.string(), s) {
return a
}
return 0
}
// String returns a string whose contents are equal to s. In that sense, it is
// equivalent to string(s) but may be more efficient.
func String(s []byte) string {
if a := Lookup(s); a != 0 {
return a.String()
}
return string(s)
}

View file

@ -0,0 +1,109 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package atom
import (
"sort"
"testing"
)
func TestKnown(t *testing.T) {
for _, s := range testAtomList {
if atom := Lookup([]byte(s)); atom.String() != s {
t.Errorf("Lookup(%q) = %#x (%q)", s, uint32(atom), atom.String())
}
}
}
func TestHits(t *testing.T) {
for _, a := range table {
if a == 0 {
continue
}
got := Lookup([]byte(a.String()))
if got != a {
t.Errorf("Lookup(%q) = %#x, want %#x", a.String(), uint32(got), uint32(a))
}
}
}
func TestMisses(t *testing.T) {
testCases := []string{
"",
"\x00",
"\xff",
"A",
"DIV",
"Div",
"dIV",
"aa",
"a\x00",
"ab",
"abb",
"abbr0",
"abbr ",
" abbr",
" a",
"acceptcharset",
"acceptCharset",
"accept_charset",
"h0",
"h1h2",
"h7",
"onClick",
"λ",
// The following string has the same hash (0xa1d7fab7) as "onmouseover".
"\x00\x00\x00\x00\x00\x50\x18\xae\x38\xd0\xb7",
}
for _, tc := range testCases {
got := Lookup([]byte(tc))
if got != 0 {
t.Errorf("Lookup(%q): got %d, want 0", tc, got)
}
}
}
func TestForeignObject(t *testing.T) {
const (
afo = Foreignobject
afO = ForeignObject
sfo = "foreignobject"
sfO = "foreignObject"
)
if got := Lookup([]byte(sfo)); got != afo {
t.Errorf("Lookup(%q): got %#v, want %#v", sfo, got, afo)
}
if got := Lookup([]byte(sfO)); got != afO {
t.Errorf("Lookup(%q): got %#v, want %#v", sfO, got, afO)
}
if got := afo.String(); got != sfo {
t.Errorf("Atom(%#v).String(): got %q, want %q", afo, got, sfo)
}
if got := afO.String(); got != sfO {
t.Errorf("Atom(%#v).String(): got %q, want %q", afO, got, sfO)
}
}
func BenchmarkLookup(b *testing.B) {
sortedTable := make([]string, 0, len(table))
for _, a := range table {
if a != 0 {
sortedTable = append(sortedTable, a.String())
}
}
sort.Strings(sortedTable)
x := make([][]byte, 1000)
for i := range x {
x[i] = []byte(sortedTable[i%len(sortedTable)])
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
for _, s := range x {
Lookup(s)
}
}
}

View file

@ -0,0 +1,648 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ignore
package main
// This program generates table.go and table_test.go.
// Invoke as
//
// go run gen.go |gofmt >table.go
// go run gen.go -test |gofmt >table_test.go
import (
"flag"
"fmt"
"math/rand"
"os"
"sort"
"strings"
)
// identifier converts s to a Go exported identifier.
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset".
func identifier(s string) string {
b := make([]byte, 0, len(s))
cap := true
for _, c := range s {
if c == '-' {
cap = true
continue
}
if cap && 'a' <= c && c <= 'z' {
c -= 'a' - 'A'
}
cap = false
b = append(b, byte(c))
}
return string(b)
}
var test = flag.Bool("test", false, "generate table_test.go")
func main() {
flag.Parse()
var all []string
all = append(all, elements...)
all = append(all, attributes...)
all = append(all, eventHandlers...)
all = append(all, extra...)
sort.Strings(all)
if *test {
fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\n")
fmt.Printf("var testAtomList = []string{\n")
for _, s := range all {
fmt.Printf("\t%q,\n", s)
}
fmt.Printf("}\n")
return
}
// uniq - lists have dups
// compute max len too
maxLen := 0
w := 0
for _, s := range all {
if w == 0 || all[w-1] != s {
if maxLen < len(s) {
maxLen = len(s)
}
all[w] = s
w++
}
}
all = all[:w]
// Find hash that minimizes table size.
var best *table
for i := 0; i < 1000000; i++ {
if best != nil && 1<<(best.k-1) < len(all) {
break
}
h := rand.Uint32()
for k := uint(0); k <= 16; k++ {
if best != nil && k >= best.k {
break
}
var t table
if t.init(h, k, all) {
best = &t
break
}
}
}
if best == nil {
fmt.Fprintf(os.Stderr, "failed to construct string table\n")
os.Exit(1)
}
// Lay out strings, using overlaps when possible.
layout := append([]string{}, all...)
// Remove strings that are substrings of other strings
for changed := true; changed; {
changed = false
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i != j && t != "" && strings.Contains(s, t) {
changed = true
layout[j] = ""
}
}
}
}
// Join strings where one suffix matches another prefix.
for {
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k],
// maximizing overlap length k.
besti := -1
bestj := -1
bestk := 0
for i, s := range layout {
if s == "" {
continue
}
for j, t := range layout {
if i == j {
continue
}
for k := bestk + 1; k <= len(s) && k <= len(t); k++ {
if s[len(s)-k:] == t[:k] {
besti = i
bestj = j
bestk = k
}
}
}
}
if bestk > 0 {
layout[besti] += layout[bestj][bestk:]
layout[bestj] = ""
continue
}
break
}
text := strings.Join(layout, "")
atom := map[string]uint32{}
for _, s := range all {
off := strings.Index(text, s)
if off < 0 {
panic("lost string " + s)
}
atom[s] = uint32(off<<8 | len(s))
}
// Generate the Go code.
fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n")
fmt.Printf("package atom\n\nconst (\n")
for _, s := range all {
fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s])
}
fmt.Printf(")\n\n")
fmt.Printf("const hash0 = %#x\n\n", best.h0)
fmt.Printf("const maxAtomLen = %d\n\n", maxLen)
fmt.Printf("var table = [1<<%d]Atom{\n", best.k)
for i, s := range best.tab {
if s == "" {
continue
}
fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s)
}
fmt.Printf("}\n")
datasize := (1 << best.k) * 4
fmt.Printf("const atomText =\n")
textsize := len(text)
for len(text) > 60 {
fmt.Printf("\t%q +\n", text[:60])
text = text[60:]
}
fmt.Printf("\t%q\n\n", text)
fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize)
}
type byLen []string
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) }
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
func (x byLen) Len() int { return len(x) }
// fnv computes the FNV hash with an arbitrary starting value h.
func fnv(h uint32, s string) uint32 {
for i := 0; i < len(s); i++ {
h ^= uint32(s[i])
h *= 16777619
}
return h
}
// A table represents an attempt at constructing the lookup table.
// The lookup table uses cuckoo hashing, meaning that each string
// can be found in one of two positions.
type table struct {
h0 uint32
k uint
mask uint32
tab []string
}
// hash returns the two hashes for s.
func (t *table) hash(s string) (h1, h2 uint32) {
h := fnv(t.h0, s)
h1 = h & t.mask
h2 = (h >> 16) & t.mask
return
}
// init initializes the table with the given parameters.
// h0 is the initial hash value,
// k is the number of bits of hash value to use, and
// x is the list of strings to store in the table.
// init returns false if the table cannot be constructed.
func (t *table) init(h0 uint32, k uint, x []string) bool {
t.h0 = h0
t.k = k
t.tab = make([]string, 1<<k)
t.mask = 1<<k - 1
for _, s := range x {
if !t.insert(s) {
return false
}
}
return true
}
// insert inserts s in the table.
func (t *table) insert(s string) bool {
h1, h2 := t.hash(s)
if t.tab[h1] == "" {
t.tab[h1] = s
return true
}
if t.tab[h2] == "" {
t.tab[h2] = s
return true
}
if t.push(h1, 0) {
t.tab[h1] = s
return true
}
if t.push(h2, 0) {
t.tab[h2] = s
return true
}
return false
}
// push attempts to push aside the entry in slot i.
func (t *table) push(i uint32, depth int) bool {
if depth > len(t.tab) {
return false
}
s := t.tab[i]
h1, h2 := t.hash(s)
j := h1 + h2 - i
if t.tab[j] != "" && !t.push(j, depth+1) {
return false
}
t.tab[j] = s
return true
}
// The lists of element names and attribute keys were taken from
// https://html.spec.whatwg.org/multipage/indices.html#index
// as of the "HTML Living Standard - Last Updated 21 February 2015" version.
var elements = []string{
"a",
"abbr",
"address",
"area",
"article",
"aside",
"audio",
"b",
"base",
"bdi",
"bdo",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"cite",
"code",
"col",
"colgroup",
"command",
"data",
"datalist",
"dd",
"del",
"details",
"dfn",
"dialog",
"div",
"dl",
"dt",
"em",
"embed",
"fieldset",
"figcaption",
"figure",
"footer",
"form",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"hgroup",
"hr",
"html",
"i",
"iframe",
"img",
"input",
"ins",
"kbd",
"keygen",
"label",
"legend",
"li",
"link",
"map",
"mark",
"menu",
"menuitem",
"meta",
"meter",
"nav",
"noscript",
"object",
"ol",
"optgroup",
"option",
"output",
"p",
"param",
"pre",
"progress",
"q",
"rp",
"rt",
"ruby",
"s",
"samp",
"script",
"section",
"select",
"small",
"source",
"span",
"strong",
"style",
"sub",
"summary",
"sup",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"tr",
"track",
"u",
"ul",
"var",
"video",
"wbr",
}
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3
var attributes = []string{
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"alt",
"async",
"autocomplete",
"autofocus",
"autoplay",
"challenge",
"charset",
"checked",
"cite",
"class",
"cols",
"colspan",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"datetime",
"default",
"defer",
"dir",
"dirname",
"disabled",
"download",
"draggable",
"dropzone",
"enctype",
"for",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"headers",
"height",
"hidden",
"high",
"href",
"hreflang",
"http-equiv",
"icon",
"id",
"inputmode",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"keytype",
"kind",
"label",
"lang",
"list",
"loop",
"low",
"manifest",
"max",
"maxlength",
"media",
"mediagroup",
"method",
"min",
"minlength",
"multiple",
"muted",
"name",
"novalidate",
"open",
"optimum",
"pattern",
"ping",
"placeholder",
"poster",
"preload",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"sandbox",
"spellcheck",
"scope",
"scoped",
"seamless",
"selected",
"shape",
"size",
"sizes",
"sortable",
"sorted",
"span",
"src",
"srcdoc",
"srclang",
"start",
"step",
"style",
"tabindex",
"target",
"title",
"translate",
"type",
"typemustmatch",
"usemap",
"value",
"width",
"wrap",
}
var eventHandlers = []string{
"onabort",
"onautocomplete",
"onautocompleteerror",
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
}
// extra are ad-hoc values not covered by any of the lists above.
var extra = []string{
"align",
"annotation",
"annotation-xml",
"applet",
"basefont",
"bgsound",
"big",
"blink",
"center",
"color",
"desc",
"face",
"font",
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive.
"foreignobject",
"frame",
"frameset",
"image",
"isindex",
"listing",
"malignmark",
"marquee",
"math",
"mglyph",
"mi",
"mn",
"mo",
"ms",
"mtext",
"nobr",
"noembed",
"noframes",
"plaintext",
"prompt",
"public",
"spacer",
"strike",
"svg",
"system",
"tt",
"xmp",
}

View file

@ -0,0 +1,713 @@
// generated by go run gen.go; DO NOT EDIT
package atom
const (
A Atom = 0x1
Abbr Atom = 0x4
Accept Atom = 0x2106
AcceptCharset Atom = 0x210e
Accesskey Atom = 0x3309
Action Atom = 0x1f606
Address Atom = 0x4f307
Align Atom = 0x1105
Alt Atom = 0x4503
Annotation Atom = 0x1670a
AnnotationXml Atom = 0x1670e
Applet Atom = 0x2b306
Area Atom = 0x2fa04
Article Atom = 0x38807
Aside Atom = 0x8305
Async Atom = 0x7b05
Audio Atom = 0xa605
Autocomplete Atom = 0x1fc0c
Autofocus Atom = 0xb309
Autoplay Atom = 0xce08
B Atom = 0x101
Base Atom = 0xd604
Basefont Atom = 0xd608
Bdi Atom = 0x1a03
Bdo Atom = 0xe703
Bgsound Atom = 0x11807
Big Atom = 0x12403
Blink Atom = 0x12705
Blockquote Atom = 0x12c0a
Body Atom = 0x2f04
Br Atom = 0x202
Button Atom = 0x13606
Canvas Atom = 0x7f06
Caption Atom = 0x1bb07
Center Atom = 0x5b506
Challenge Atom = 0x21f09
Charset Atom = 0x2807
Checked Atom = 0x32807
Cite Atom = 0x3c804
Class Atom = 0x4de05
Code Atom = 0x14904
Col Atom = 0x15003
Colgroup Atom = 0x15008
Color Atom = 0x15d05
Cols Atom = 0x16204
Colspan Atom = 0x16207
Command Atom = 0x17507
Content Atom = 0x42307
Contenteditable Atom = 0x4230f
Contextmenu Atom = 0x3310b
Controls Atom = 0x18808
Coords Atom = 0x19406
Crossorigin Atom = 0x19f0b
Data Atom = 0x44a04
Datalist Atom = 0x44a08
Datetime Atom = 0x23c08
Dd Atom = 0x26702
Default Atom = 0x8607
Defer Atom = 0x14b05
Del Atom = 0x3ef03
Desc Atom = 0x4db04
Details Atom = 0x4807
Dfn Atom = 0x6103
Dialog Atom = 0x1b06
Dir Atom = 0x6903
Dirname Atom = 0x6907
Disabled Atom = 0x10c08
Div Atom = 0x11303
Dl Atom = 0x11e02
Download Atom = 0x40008
Draggable Atom = 0x17b09
Dropzone Atom = 0x39108
Dt Atom = 0x50902
Em Atom = 0x6502
Embed Atom = 0x6505
Enctype Atom = 0x21107
Face Atom = 0x5b304
Fieldset Atom = 0x1b008
Figcaption Atom = 0x1b80a
Figure Atom = 0x1cc06
Font Atom = 0xda04
Footer Atom = 0x8d06
For Atom = 0x1d803
ForeignObject Atom = 0x1d80d
Foreignobject Atom = 0x1e50d
Form Atom = 0x1f204
Formaction Atom = 0x1f20a
Formenctype Atom = 0x20d0b
Formmethod Atom = 0x2280a
Formnovalidate Atom = 0x2320e
Formtarget Atom = 0x2470a
Frame Atom = 0x9a05
Frameset Atom = 0x9a08
H1 Atom = 0x26e02
H2 Atom = 0x29402
H3 Atom = 0x2a702
H4 Atom = 0x2e902
H5 Atom = 0x2f302
H6 Atom = 0x50b02
Head Atom = 0x2d504
Header Atom = 0x2d506
Headers Atom = 0x2d507
Height Atom = 0x25106
Hgroup Atom = 0x25906
Hidden Atom = 0x26506
High Atom = 0x26b04
Hr Atom = 0x27002
Href Atom = 0x27004
Hreflang Atom = 0x27008
Html Atom = 0x25504
HttpEquiv Atom = 0x2780a
I Atom = 0x601
Icon Atom = 0x42204
Id Atom = 0x8502
Iframe Atom = 0x29606
Image Atom = 0x29c05
Img Atom = 0x2a103
Input Atom = 0x3e805
Inputmode Atom = 0x3e809
Ins Atom = 0x1a803
Isindex Atom = 0x2a907
Ismap Atom = 0x2b005
Itemid Atom = 0x33c06
Itemprop Atom = 0x3c908
Itemref Atom = 0x5ad07
Itemscope Atom = 0x2b909
Itemtype Atom = 0x2c308
Kbd Atom = 0x1903
Keygen Atom = 0x3906
Keytype Atom = 0x53707
Kind Atom = 0x10904
Label Atom = 0xf005
Lang Atom = 0x27404
Legend Atom = 0x18206
Li Atom = 0x1202
Link Atom = 0x12804
List Atom = 0x44e04
Listing Atom = 0x44e07
Loop Atom = 0xf404
Low Atom = 0x11f03
Malignmark Atom = 0x100a
Manifest Atom = 0x5f108
Map Atom = 0x2b203
Mark Atom = 0x1604
Marquee Atom = 0x2cb07
Math Atom = 0x2d204
Max Atom = 0x2e103
Maxlength Atom = 0x2e109
Media Atom = 0x6e05
Mediagroup Atom = 0x6e0a
Menu Atom = 0x33804
Menuitem Atom = 0x33808
Meta Atom = 0x45d04
Meter Atom = 0x24205
Method Atom = 0x22c06
Mglyph Atom = 0x2a206
Mi Atom = 0x2eb02
Min Atom = 0x2eb03
Minlength Atom = 0x2eb09
Mn Atom = 0x23502
Mo Atom = 0x3ed02
Ms Atom = 0x2bc02
Mtext Atom = 0x2f505
Multiple Atom = 0x30308
Muted Atom = 0x30b05
Name Atom = 0x6c04
Nav Atom = 0x3e03
Nobr Atom = 0x5704
Noembed Atom = 0x6307
Noframes Atom = 0x9808
Noscript Atom = 0x3d208
Novalidate Atom = 0x2360a
Object Atom = 0x1ec06
Ol Atom = 0xc902
Onabort Atom = 0x13a07
Onafterprint Atom = 0x1c00c
Onautocomplete Atom = 0x1fa0e
Onautocompleteerror Atom = 0x1fa13
Onbeforeprint Atom = 0x6040d
Onbeforeunload Atom = 0x4e70e
Onblur Atom = 0xaa06
Oncancel Atom = 0xe908
Oncanplay Atom = 0x28509
Oncanplaythrough Atom = 0x28510
Onchange Atom = 0x3a708
Onclick Atom = 0x31007
Onclose Atom = 0x31707
Oncontextmenu Atom = 0x32f0d
Oncuechange Atom = 0x3420b
Ondblclick Atom = 0x34d0a
Ondrag Atom = 0x35706
Ondragend Atom = 0x35709
Ondragenter Atom = 0x3600b
Ondragleave Atom = 0x36b0b
Ondragover Atom = 0x3760a
Ondragstart Atom = 0x3800b
Ondrop Atom = 0x38f06
Ondurationchange Atom = 0x39f10
Onemptied Atom = 0x39609
Onended Atom = 0x3af07
Onerror Atom = 0x3b607
Onfocus Atom = 0x3bd07
Onhashchange Atom = 0x3da0c
Oninput Atom = 0x3e607
Oninvalid Atom = 0x3f209
Onkeydown Atom = 0x3fb09
Onkeypress Atom = 0x4080a
Onkeyup Atom = 0x41807
Onlanguagechange Atom = 0x43210
Onload Atom = 0x44206
Onloadeddata Atom = 0x4420c
Onloadedmetadata Atom = 0x45510
Onloadstart Atom = 0x46b0b
Onmessage Atom = 0x47609
Onmousedown Atom = 0x47f0b
Onmousemove Atom = 0x48a0b
Onmouseout Atom = 0x4950a
Onmouseover Atom = 0x4a20b
Onmouseup Atom = 0x4ad09
Onmousewheel Atom = 0x4b60c
Onoffline Atom = 0x4c209
Ononline Atom = 0x4cb08
Onpagehide Atom = 0x4d30a
Onpageshow Atom = 0x4fe0a
Onpause Atom = 0x50d07
Onplay Atom = 0x51706
Onplaying Atom = 0x51709
Onpopstate Atom = 0x5200a
Onprogress Atom = 0x52a0a
Onratechange Atom = 0x53e0c
Onreset Atom = 0x54a07
Onresize Atom = 0x55108
Onscroll Atom = 0x55f08
Onseeked Atom = 0x56708
Onseeking Atom = 0x56f09
Onselect Atom = 0x57808
Onshow Atom = 0x58206
Onsort Atom = 0x58b06
Onstalled Atom = 0x59509
Onstorage Atom = 0x59e09
Onsubmit Atom = 0x5a708
Onsuspend Atom = 0x5bb09
Ontimeupdate Atom = 0xdb0c
Ontoggle Atom = 0x5c408
Onunload Atom = 0x5cc08
Onvolumechange Atom = 0x5d40e
Onwaiting Atom = 0x5e209
Open Atom = 0x3cf04
Optgroup Atom = 0xf608
Optimum Atom = 0x5eb07
Option Atom = 0x60006
Output Atom = 0x49c06
P Atom = 0xc01
Param Atom = 0xc05
Pattern Atom = 0x5107
Ping Atom = 0x7704
Placeholder Atom = 0xc30b
Plaintext Atom = 0xfd09
Poster Atom = 0x15706
Pre Atom = 0x25e03
Preload Atom = 0x25e07
Progress Atom = 0x52c08
Prompt Atom = 0x5fa06
Public Atom = 0x41e06
Q Atom = 0x13101
Radiogroup Atom = 0x30a
Readonly Atom = 0x2fb08
Rel Atom = 0x25f03
Required Atom = 0x1d008
Reversed Atom = 0x5a08
Rows Atom = 0x9204
Rowspan Atom = 0x9207
Rp Atom = 0x1c602
Rt Atom = 0x13f02
Ruby Atom = 0xaf04
S Atom = 0x2c01
Samp Atom = 0x4e04
Sandbox Atom = 0xbb07
Scope Atom = 0x2bd05
Scoped Atom = 0x2bd06
Script Atom = 0x3d406
Seamless Atom = 0x31c08
Section Atom = 0x4e207
Select Atom = 0x57a06
Selected Atom = 0x57a08
Shape Atom = 0x4f905
Size Atom = 0x55504
Sizes Atom = 0x55505
Small Atom = 0x18f05
Sortable Atom = 0x58d08
Sorted Atom = 0x19906
Source Atom = 0x1aa06
Spacer Atom = 0x2db06
Span Atom = 0x9504
Spellcheck Atom = 0x3230a
Src Atom = 0x3c303
Srcdoc Atom = 0x3c306
Srclang Atom = 0x41107
Start Atom = 0x38605
Step Atom = 0x5f704
Strike Atom = 0x53306
Strong Atom = 0x55906
Style Atom = 0x61105
Sub Atom = 0x5a903
Summary Atom = 0x61607
Sup Atom = 0x61d03
Svg Atom = 0x62003
System Atom = 0x62306
Tabindex Atom = 0x46308
Table Atom = 0x42d05
Target Atom = 0x24b06
Tbody Atom = 0x2e05
Td Atom = 0x4702
Template Atom = 0x62608
Textarea Atom = 0x2f608
Tfoot Atom = 0x8c05
Th Atom = 0x22e02
Thead Atom = 0x2d405
Time Atom = 0xdd04
Title Atom = 0xa105
Tr Atom = 0x10502
Track Atom = 0x10505
Translate Atom = 0x14009
Tt Atom = 0x5302
Type Atom = 0x21404
Typemustmatch Atom = 0x2140d
U Atom = 0xb01
Ul Atom = 0x8a02
Usemap Atom = 0x51106
Value Atom = 0x4005
Var Atom = 0x11503
Video Atom = 0x28105
Wbr Atom = 0x12103
Width Atom = 0x50705
Wrap Atom = 0x58704
Xmp Atom = 0xc103
)
const hash0 = 0xc17da63e
const maxAtomLen = 19
var table = [1 << 9]Atom{
0x1: 0x48a0b, // onmousemove
0x2: 0x5e209, // onwaiting
0x3: 0x1fa13, // onautocompleteerror
0x4: 0x5fa06, // prompt
0x7: 0x5eb07, // optimum
0x8: 0x1604, // mark
0xa: 0x5ad07, // itemref
0xb: 0x4fe0a, // onpageshow
0xc: 0x57a06, // select
0xd: 0x17b09, // draggable
0xe: 0x3e03, // nav
0xf: 0x17507, // command
0x11: 0xb01, // u
0x14: 0x2d507, // headers
0x15: 0x44a08, // datalist
0x17: 0x4e04, // samp
0x1a: 0x3fb09, // onkeydown
0x1b: 0x55f08, // onscroll
0x1c: 0x15003, // col
0x20: 0x3c908, // itemprop
0x21: 0x2780a, // http-equiv
0x22: 0x61d03, // sup
0x24: 0x1d008, // required
0x2b: 0x25e07, // preload
0x2c: 0x6040d, // onbeforeprint
0x2d: 0x3600b, // ondragenter
0x2e: 0x50902, // dt
0x2f: 0x5a708, // onsubmit
0x30: 0x27002, // hr
0x31: 0x32f0d, // oncontextmenu
0x33: 0x29c05, // image
0x34: 0x50d07, // onpause
0x35: 0x25906, // hgroup
0x36: 0x7704, // ping
0x37: 0x57808, // onselect
0x3a: 0x11303, // div
0x3b: 0x1fa0e, // onautocomplete
0x40: 0x2eb02, // mi
0x41: 0x31c08, // seamless
0x42: 0x2807, // charset
0x43: 0x8502, // id
0x44: 0x5200a, // onpopstate
0x45: 0x3ef03, // del
0x46: 0x2cb07, // marquee
0x47: 0x3309, // accesskey
0x49: 0x8d06, // footer
0x4a: 0x44e04, // list
0x4b: 0x2b005, // ismap
0x51: 0x33804, // menu
0x52: 0x2f04, // body
0x55: 0x9a08, // frameset
0x56: 0x54a07, // onreset
0x57: 0x12705, // blink
0x58: 0xa105, // title
0x59: 0x38807, // article
0x5b: 0x22e02, // th
0x5d: 0x13101, // q
0x5e: 0x3cf04, // open
0x5f: 0x2fa04, // area
0x61: 0x44206, // onload
0x62: 0xda04, // font
0x63: 0xd604, // base
0x64: 0x16207, // colspan
0x65: 0x53707, // keytype
0x66: 0x11e02, // dl
0x68: 0x1b008, // fieldset
0x6a: 0x2eb03, // min
0x6b: 0x11503, // var
0x6f: 0x2d506, // header
0x70: 0x13f02, // rt
0x71: 0x15008, // colgroup
0x72: 0x23502, // mn
0x74: 0x13a07, // onabort
0x75: 0x3906, // keygen
0x76: 0x4c209, // onoffline
0x77: 0x21f09, // challenge
0x78: 0x2b203, // map
0x7a: 0x2e902, // h4
0x7b: 0x3b607, // onerror
0x7c: 0x2e109, // maxlength
0x7d: 0x2f505, // mtext
0x7e: 0xbb07, // sandbox
0x7f: 0x58b06, // onsort
0x80: 0x100a, // malignmark
0x81: 0x45d04, // meta
0x82: 0x7b05, // async
0x83: 0x2a702, // h3
0x84: 0x26702, // dd
0x85: 0x27004, // href
0x86: 0x6e0a, // mediagroup
0x87: 0x19406, // coords
0x88: 0x41107, // srclang
0x89: 0x34d0a, // ondblclick
0x8a: 0x4005, // value
0x8c: 0xe908, // oncancel
0x8e: 0x3230a, // spellcheck
0x8f: 0x9a05, // frame
0x91: 0x12403, // big
0x94: 0x1f606, // action
0x95: 0x6903, // dir
0x97: 0x2fb08, // readonly
0x99: 0x42d05, // table
0x9a: 0x61607, // summary
0x9b: 0x12103, // wbr
0x9c: 0x30a, // radiogroup
0x9d: 0x6c04, // name
0x9f: 0x62306, // system
0xa1: 0x15d05, // color
0xa2: 0x7f06, // canvas
0xa3: 0x25504, // html
0xa5: 0x56f09, // onseeking
0xac: 0x4f905, // shape
0xad: 0x25f03, // rel
0xae: 0x28510, // oncanplaythrough
0xaf: 0x3760a, // ondragover
0xb0: 0x62608, // template
0xb1: 0x1d80d, // foreignObject
0xb3: 0x9204, // rows
0xb6: 0x44e07, // listing
0xb7: 0x49c06, // output
0xb9: 0x3310b, // contextmenu
0xbb: 0x11f03, // low
0xbc: 0x1c602, // rp
0xbd: 0x5bb09, // onsuspend
0xbe: 0x13606, // button
0xbf: 0x4db04, // desc
0xc1: 0x4e207, // section
0xc2: 0x52a0a, // onprogress
0xc3: 0x59e09, // onstorage
0xc4: 0x2d204, // math
0xc5: 0x4503, // alt
0xc7: 0x8a02, // ul
0xc8: 0x5107, // pattern
0xc9: 0x4b60c, // onmousewheel
0xca: 0x35709, // ondragend
0xcb: 0xaf04, // ruby
0xcc: 0xc01, // p
0xcd: 0x31707, // onclose
0xce: 0x24205, // meter
0xcf: 0x11807, // bgsound
0xd2: 0x25106, // height
0xd4: 0x101, // b
0xd5: 0x2c308, // itemtype
0xd8: 0x1bb07, // caption
0xd9: 0x10c08, // disabled
0xdb: 0x33808, // menuitem
0xdc: 0x62003, // svg
0xdd: 0x18f05, // small
0xde: 0x44a04, // data
0xe0: 0x4cb08, // ononline
0xe1: 0x2a206, // mglyph
0xe3: 0x6505, // embed
0xe4: 0x10502, // tr
0xe5: 0x46b0b, // onloadstart
0xe7: 0x3c306, // srcdoc
0xeb: 0x5c408, // ontoggle
0xed: 0xe703, // bdo
0xee: 0x4702, // td
0xef: 0x8305, // aside
0xf0: 0x29402, // h2
0xf1: 0x52c08, // progress
0xf2: 0x12c0a, // blockquote
0xf4: 0xf005, // label
0xf5: 0x601, // i
0xf7: 0x9207, // rowspan
0xfb: 0x51709, // onplaying
0xfd: 0x2a103, // img
0xfe: 0xf608, // optgroup
0xff: 0x42307, // content
0x101: 0x53e0c, // onratechange
0x103: 0x3da0c, // onhashchange
0x104: 0x4807, // details
0x106: 0x40008, // download
0x109: 0x14009, // translate
0x10b: 0x4230f, // contenteditable
0x10d: 0x36b0b, // ondragleave
0x10e: 0x2106, // accept
0x10f: 0x57a08, // selected
0x112: 0x1f20a, // formaction
0x113: 0x5b506, // center
0x115: 0x45510, // onloadedmetadata
0x116: 0x12804, // link
0x117: 0xdd04, // time
0x118: 0x19f0b, // crossorigin
0x119: 0x3bd07, // onfocus
0x11a: 0x58704, // wrap
0x11b: 0x42204, // icon
0x11d: 0x28105, // video
0x11e: 0x4de05, // class
0x121: 0x5d40e, // onvolumechange
0x122: 0xaa06, // onblur
0x123: 0x2b909, // itemscope
0x124: 0x61105, // style
0x127: 0x41e06, // public
0x129: 0x2320e, // formnovalidate
0x12a: 0x58206, // onshow
0x12c: 0x51706, // onplay
0x12d: 0x3c804, // cite
0x12e: 0x2bc02, // ms
0x12f: 0xdb0c, // ontimeupdate
0x130: 0x10904, // kind
0x131: 0x2470a, // formtarget
0x135: 0x3af07, // onended
0x136: 0x26506, // hidden
0x137: 0x2c01, // s
0x139: 0x2280a, // formmethod
0x13a: 0x3e805, // input
0x13c: 0x50b02, // h6
0x13d: 0xc902, // ol
0x13e: 0x3420b, // oncuechange
0x13f: 0x1e50d, // foreignobject
0x143: 0x4e70e, // onbeforeunload
0x144: 0x2bd05, // scope
0x145: 0x39609, // onemptied
0x146: 0x14b05, // defer
0x147: 0xc103, // xmp
0x148: 0x39f10, // ondurationchange
0x149: 0x1903, // kbd
0x14c: 0x47609, // onmessage
0x14d: 0x60006, // option
0x14e: 0x2eb09, // minlength
0x14f: 0x32807, // checked
0x150: 0xce08, // autoplay
0x152: 0x202, // br
0x153: 0x2360a, // novalidate
0x156: 0x6307, // noembed
0x159: 0x31007, // onclick
0x15a: 0x47f0b, // onmousedown
0x15b: 0x3a708, // onchange
0x15e: 0x3f209, // oninvalid
0x15f: 0x2bd06, // scoped
0x160: 0x18808, // controls
0x161: 0x30b05, // muted
0x162: 0x58d08, // sortable
0x163: 0x51106, // usemap
0x164: 0x1b80a, // figcaption
0x165: 0x35706, // ondrag
0x166: 0x26b04, // high
0x168: 0x3c303, // src
0x169: 0x15706, // poster
0x16b: 0x1670e, // annotation-xml
0x16c: 0x5f704, // step
0x16d: 0x4, // abbr
0x16e: 0x1b06, // dialog
0x170: 0x1202, // li
0x172: 0x3ed02, // mo
0x175: 0x1d803, // for
0x176: 0x1a803, // ins
0x178: 0x55504, // size
0x179: 0x43210, // onlanguagechange
0x17a: 0x8607, // default
0x17b: 0x1a03, // bdi
0x17c: 0x4d30a, // onpagehide
0x17d: 0x6907, // dirname
0x17e: 0x21404, // type
0x17f: 0x1f204, // form
0x181: 0x28509, // oncanplay
0x182: 0x6103, // dfn
0x183: 0x46308, // tabindex
0x186: 0x6502, // em
0x187: 0x27404, // lang
0x189: 0x39108, // dropzone
0x18a: 0x4080a, // onkeypress
0x18b: 0x23c08, // datetime
0x18c: 0x16204, // cols
0x18d: 0x1, // a
0x18e: 0x4420c, // onloadeddata
0x190: 0xa605, // audio
0x192: 0x2e05, // tbody
0x193: 0x22c06, // method
0x195: 0xf404, // loop
0x196: 0x29606, // iframe
0x198: 0x2d504, // head
0x19e: 0x5f108, // manifest
0x19f: 0xb309, // autofocus
0x1a0: 0x14904, // code
0x1a1: 0x55906, // strong
0x1a2: 0x30308, // multiple
0x1a3: 0xc05, // param
0x1a6: 0x21107, // enctype
0x1a7: 0x5b304, // face
0x1a8: 0xfd09, // plaintext
0x1a9: 0x26e02, // h1
0x1aa: 0x59509, // onstalled
0x1ad: 0x3d406, // script
0x1ae: 0x2db06, // spacer
0x1af: 0x55108, // onresize
0x1b0: 0x4a20b, // onmouseover
0x1b1: 0x5cc08, // onunload
0x1b2: 0x56708, // onseeked
0x1b4: 0x2140d, // typemustmatch
0x1b5: 0x1cc06, // figure
0x1b6: 0x4950a, // onmouseout
0x1b7: 0x25e03, // pre
0x1b8: 0x50705, // width
0x1b9: 0x19906, // sorted
0x1bb: 0x5704, // nobr
0x1be: 0x5302, // tt
0x1bf: 0x1105, // align
0x1c0: 0x3e607, // oninput
0x1c3: 0x41807, // onkeyup
0x1c6: 0x1c00c, // onafterprint
0x1c7: 0x210e, // accept-charset
0x1c8: 0x33c06, // itemid
0x1c9: 0x3e809, // inputmode
0x1cb: 0x53306, // strike
0x1cc: 0x5a903, // sub
0x1cd: 0x10505, // track
0x1ce: 0x38605, // start
0x1d0: 0xd608, // basefont
0x1d6: 0x1aa06, // source
0x1d7: 0x18206, // legend
0x1d8: 0x2d405, // thead
0x1da: 0x8c05, // tfoot
0x1dd: 0x1ec06, // object
0x1de: 0x6e05, // media
0x1df: 0x1670a, // annotation
0x1e0: 0x20d0b, // formenctype
0x1e2: 0x3d208, // noscript
0x1e4: 0x55505, // sizes
0x1e5: 0x1fc0c, // autocomplete
0x1e6: 0x9504, // span
0x1e7: 0x9808, // noframes
0x1e8: 0x24b06, // target
0x1e9: 0x38f06, // ondrop
0x1ea: 0x2b306, // applet
0x1ec: 0x5a08, // reversed
0x1f0: 0x2a907, // isindex
0x1f3: 0x27008, // hreflang
0x1f5: 0x2f302, // h5
0x1f6: 0x4f307, // address
0x1fa: 0x2e103, // max
0x1fb: 0xc30b, // placeholder
0x1fc: 0x2f608, // textarea
0x1fe: 0x4ad09, // onmouseup
0x1ff: 0x3800b, // ondragstart
}
const atomText = "abbradiogrouparamalignmarkbdialogaccept-charsetbodyaccesskey" +
"genavaluealtdetailsampatternobreversedfnoembedirnamediagroup" +
"ingasyncanvasidefaultfooterowspanoframesetitleaudionblurubya" +
"utofocusandboxmplaceholderautoplaybasefontimeupdatebdoncance" +
"labelooptgrouplaintextrackindisabledivarbgsoundlowbrbigblink" +
"blockquotebuttonabortranslatecodefercolgroupostercolorcolspa" +
"nnotation-xmlcommandraggablegendcontrolsmallcoordsortedcross" +
"originsourcefieldsetfigcaptionafterprintfigurequiredforeignO" +
"bjectforeignobjectformactionautocompleteerrorformenctypemust" +
"matchallengeformmethodformnovalidatetimeterformtargetheightm" +
"lhgroupreloadhiddenhigh1hreflanghttp-equivideoncanplaythroug" +
"h2iframeimageimglyph3isindexismappletitemscopeditemtypemarqu" +
"eematheaderspacermaxlength4minlength5mtextareadonlymultiplem" +
"utedonclickoncloseamlesspellcheckedoncontextmenuitemidoncuec" +
"hangeondblclickondragendondragenterondragleaveondragoverondr" +
"agstarticleondropzonemptiedondurationchangeonendedonerroronf" +
"ocusrcdocitempropenoscriptonhashchangeoninputmodeloninvalido" +
"nkeydownloadonkeypressrclangonkeyupublicontenteditableonlang" +
"uagechangeonloadeddatalistingonloadedmetadatabindexonloadsta" +
"rtonmessageonmousedownonmousemoveonmouseoutputonmouseoveronm" +
"ouseuponmousewheelonofflineononlineonpagehidesclassectionbef" +
"oreunloaddresshapeonpageshowidth6onpausemaponplayingonpopsta" +
"teonprogresstrikeytypeonratechangeonresetonresizestrongonscr" +
"ollonseekedonseekingonselectedonshowraponsortableonstalledon" +
"storageonsubmitemrefacenteronsuspendontoggleonunloadonvolume" +
"changeonwaitingoptimumanifestepromptoptionbeforeprintstylesu" +
"mmarysupsvgsystemplate"

View file

@ -0,0 +1,351 @@
// generated by go run gen.go -test; DO NOT EDIT
package atom
var testAtomList = []string{
"a",
"abbr",
"abbr",
"accept",
"accept-charset",
"accesskey",
"action",
"address",
"align",
"alt",
"annotation",
"annotation-xml",
"applet",
"area",
"article",
"aside",
"async",
"audio",
"autocomplete",
"autofocus",
"autoplay",
"b",
"base",
"basefont",
"bdi",
"bdo",
"bgsound",
"big",
"blink",
"blockquote",
"body",
"br",
"button",
"canvas",
"caption",
"center",
"challenge",
"charset",
"checked",
"cite",
"cite",
"class",
"code",
"col",
"colgroup",
"color",
"cols",
"colspan",
"command",
"command",
"content",
"contenteditable",
"contextmenu",
"controls",
"coords",
"crossorigin",
"data",
"data",
"datalist",
"datetime",
"dd",
"default",
"defer",
"del",
"desc",
"details",
"dfn",
"dialog",
"dir",
"dirname",
"disabled",
"div",
"dl",
"download",
"draggable",
"dropzone",
"dt",
"em",
"embed",
"enctype",
"face",
"fieldset",
"figcaption",
"figure",
"font",
"footer",
"for",
"foreignObject",
"foreignobject",
"form",
"form",
"formaction",
"formenctype",
"formmethod",
"formnovalidate",
"formtarget",
"frame",
"frameset",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"header",
"headers",
"height",
"hgroup",
"hidden",
"high",
"hr",
"href",
"hreflang",
"html",
"http-equiv",
"i",
"icon",
"id",
"iframe",
"image",
"img",
"input",
"inputmode",
"ins",
"isindex",
"ismap",
"itemid",
"itemprop",
"itemref",
"itemscope",
"itemtype",
"kbd",
"keygen",
"keytype",
"kind",
"label",
"label",
"lang",
"legend",
"li",
"link",
"list",
"listing",
"loop",
"low",
"malignmark",
"manifest",
"map",
"mark",
"marquee",
"math",
"max",
"maxlength",
"media",
"mediagroup",
"menu",
"menuitem",
"meta",
"meter",
"method",
"mglyph",
"mi",
"min",
"minlength",
"mn",
"mo",
"ms",
"mtext",
"multiple",
"muted",
"name",
"nav",
"nobr",
"noembed",
"noframes",
"noscript",
"novalidate",
"object",
"ol",
"onabort",
"onafterprint",
"onautocomplete",
"onautocompleteerror",
"onbeforeprint",
"onbeforeunload",
"onblur",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onhashchange",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
"onlanguagechange",
"onload",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmessage",
"onmousedown",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onmousewheel",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpause",
"onplay",
"onplaying",
"onpopstate",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onseeked",
"onseeking",
"onselect",
"onshow",
"onsort",
"onstalled",
"onstorage",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onunload",
"onvolumechange",
"onwaiting",
"open",
"optgroup",
"optimum",
"option",
"output",
"p",
"param",
"pattern",
"ping",
"placeholder",
"plaintext",
"poster",
"pre",
"preload",
"progress",
"prompt",
"public",
"q",
"radiogroup",
"readonly",
"rel",
"required",
"reversed",
"rows",
"rowspan",
"rp",
"rt",
"ruby",
"s",
"samp",
"sandbox",
"scope",
"scoped",
"script",
"seamless",
"section",
"select",
"selected",
"shape",
"size",
"sizes",
"small",
"sortable",
"sorted",
"source",
"spacer",
"span",
"span",
"spellcheck",
"src",
"srcdoc",
"srclang",
"start",
"step",
"strike",
"strong",
"style",
"style",
"sub",
"summary",
"sup",
"svg",
"system",
"tabindex",
"table",
"target",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"time",
"title",
"title",
"tr",
"track",
"translate",
"tt",
"type",
"typemustmatch",
"u",
"ul",
"usemap",
"value",
"var",
"video",
"wbr",
"width",
"wrap",
"xmp",
}

View file

@ -0,0 +1,257 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package charset provides common text encodings for HTML documents.
//
// The mapping from encoding labels to encodings is defined at
// https://encoding.spec.whatwg.org/.
package charset // import "golang.org/x/net/html/charset"
import (
"bytes"
"fmt"
"io"
"mime"
"strings"
"unicode/utf8"
"golang.org/x/net/html"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/htmlindex"
"golang.org/x/text/transform"
)
// Lookup returns the encoding with the specified label, and its canonical
// name. It returns nil and the empty string if label is not one of the
// standard encodings for HTML. Matching is case-insensitive and ignores
// leading and trailing whitespace. Encoders will use HTML escape sequences for
// runes that are not supported by the character set.
func Lookup(label string) (e encoding.Encoding, name string) {
e, err := htmlindex.Get(label)
if err != nil {
return nil, ""
}
name, _ = htmlindex.Name(e)
return &htmlEncoding{e}, name
}
type htmlEncoding struct{ encoding.Encoding }
func (h *htmlEncoding) NewEncoder() *encoding.Encoder {
// HTML requires a non-terminating legacy encoder. We use HTML escapes to
// substitute unsupported code points.
return encoding.HTMLEscapeUnsupported(h.Encoding.NewEncoder())
}
// DetermineEncoding determines the encoding of an HTML document by examining
// up to the first 1024 bytes of content and the declared Content-Type.
//
// See http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#determining-the-character-encoding
func DetermineEncoding(content []byte, contentType string) (e encoding.Encoding, name string, certain bool) {
if len(content) > 1024 {
content = content[:1024]
}
for _, b := range boms {
if bytes.HasPrefix(content, b.bom) {
e, name = Lookup(b.enc)
return e, name, true
}
}
if _, params, err := mime.ParseMediaType(contentType); err == nil {
if cs, ok := params["charset"]; ok {
if e, name = Lookup(cs); e != nil {
return e, name, true
}
}
}
if len(content) > 0 {
e, name = prescan(content)
if e != nil {
return e, name, false
}
}
// Try to detect UTF-8.
// First eliminate any partial rune at the end.
for i := len(content) - 1; i >= 0 && i > len(content)-4; i-- {
b := content[i]
if b < 0x80 {
break
}
if utf8.RuneStart(b) {
content = content[:i]
break
}
}
hasHighBit := false
for _, c := range content {
if c >= 0x80 {
hasHighBit = true
break
}
}
if hasHighBit && utf8.Valid(content) {
return encoding.Nop, "utf-8", false
}
// TODO: change default depending on user's locale?
return charmap.Windows1252, "windows-1252", false
}
// NewReader returns an io.Reader that converts the content of r to UTF-8.
// It calls DetermineEncoding to find out what r's encoding is.
func NewReader(r io.Reader, contentType string) (io.Reader, error) {
preview := make([]byte, 1024)
n, err := io.ReadFull(r, preview)
switch {
case err == io.ErrUnexpectedEOF:
preview = preview[:n]
r = bytes.NewReader(preview)
case err != nil:
return nil, err
default:
r = io.MultiReader(bytes.NewReader(preview), r)
}
if e, _, _ := DetermineEncoding(preview, contentType); e != encoding.Nop {
r = transform.NewReader(r, e.NewDecoder())
}
return r, nil
}
// NewReaderLabel returns a reader that converts from the specified charset to
// UTF-8. It uses Lookup to find the encoding that corresponds to label, and
// returns an error if Lookup returns nil. It is suitable for use as
// encoding/xml.Decoder's CharsetReader function.
func NewReaderLabel(label string, input io.Reader) (io.Reader, error) {
e, _ := Lookup(label)
if e == nil {
return nil, fmt.Errorf("unsupported charset: %q", label)
}
return transform.NewReader(input, e.NewDecoder()), nil
}
func prescan(content []byte) (e encoding.Encoding, name string) {
z := html.NewTokenizer(bytes.NewReader(content))
for {
switch z.Next() {
case html.ErrorToken:
return nil, ""
case html.StartTagToken, html.SelfClosingTagToken:
tagName, hasAttr := z.TagName()
if !bytes.Equal(tagName, []byte("meta")) {
continue
}
attrList := make(map[string]bool)
gotPragma := false
const (
dontKnow = iota
doNeedPragma
doNotNeedPragma
)
needPragma := dontKnow
name = ""
e = nil
for hasAttr {
var key, val []byte
key, val, hasAttr = z.TagAttr()
ks := string(key)
if attrList[ks] {
continue
}
attrList[ks] = true
for i, c := range val {
if 'A' <= c && c <= 'Z' {
val[i] = c + 0x20
}
}
switch ks {
case "http-equiv":
if bytes.Equal(val, []byte("content-type")) {
gotPragma = true
}
case "content":
if e == nil {
name = fromMetaElement(string(val))
if name != "" {
e, name = Lookup(name)
if e != nil {
needPragma = doNeedPragma
}
}
}
case "charset":
e, name = Lookup(string(val))
needPragma = doNotNeedPragma
}
}
if needPragma == dontKnow || needPragma == doNeedPragma && !gotPragma {
continue
}
if strings.HasPrefix(name, "utf-16") {
name = "utf-8"
e = encoding.Nop
}
if e != nil {
return e, name
}
}
}
}
func fromMetaElement(s string) string {
for s != "" {
csLoc := strings.Index(s, "charset")
if csLoc == -1 {
return ""
}
s = s[csLoc+len("charset"):]
s = strings.TrimLeft(s, " \t\n\f\r")
if !strings.HasPrefix(s, "=") {
continue
}
s = s[1:]
s = strings.TrimLeft(s, " \t\n\f\r")
if s == "" {
return ""
}
if q := s[0]; q == '"' || q == '\'' {
s = s[1:]
closeQuote := strings.IndexRune(s, rune(q))
if closeQuote == -1 {
return ""
}
return s[:closeQuote]
}
end := strings.IndexAny(s, "; \t\n\f\r")
if end == -1 {
end = len(s)
}
return s[:end]
}
return ""
}
var boms = []struct {
bom []byte
enc string
}{
{[]byte{0xfe, 0xff}, "utf-16be"},
{[]byte{0xff, 0xfe}, "utf-16le"},
{[]byte{0xef, 0xbb, 0xbf}, "utf-8"},
}

View file

@ -0,0 +1,237 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package charset
import (
"bytes"
"encoding/xml"
"io/ioutil"
"runtime"
"strings"
"testing"
"golang.org/x/text/transform"
)
func transformString(t transform.Transformer, s string) (string, error) {
r := transform.NewReader(strings.NewReader(s), t)
b, err := ioutil.ReadAll(r)
return string(b), err
}
type testCase struct {
utf8, other, otherEncoding string
}
// testCases for encoding and decoding.
var testCases = []testCase{
{"Résumé", "Résumé", "utf8"},
{"Résumé", "R\xe9sum\xe9", "latin1"},
{"これは漢字です。", "S0\x8c0o0\"oW[g0Y0\x020", "UTF-16LE"},
{"これは漢字です。", "0S0\x8c0oo\"[W0g0Y0\x02", "UTF-16BE"},
{"Hello, world", "Hello, world", "ASCII"},
{"Gdańsk", "Gda\xf1sk", "ISO-8859-2"},
{"Ââ Čč Đđ Ŋŋ Õõ Šš Žž Åå Ää", "\xc2\xe2 \xc8\xe8 \xa9\xb9 \xaf\xbf \xd5\xf5 \xaa\xba \xac\xbc \xc5\xe5 \xc4\xe4", "ISO-8859-10"},
{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "ISO-8859-11"},
{"latviešu", "latvie\xf0u", "ISO-8859-13"},
{"Seònaid", "Se\xf2naid", "ISO-8859-14"},
{"€1 is cheap", "\xa41 is cheap", "ISO-8859-15"},
{"românește", "rom\xe2ne\xbate", "ISO-8859-16"},
{"nutraĵo", "nutra\xbco", "ISO-8859-3"},
{"Kalâdlit", "Kal\xe2dlit", "ISO-8859-4"},
{"русский", "\xe0\xe3\xe1\xe1\xda\xd8\xd9", "ISO-8859-5"},
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "ISO-8859-7"},
{"Kağan", "Ka\xf0an", "ISO-8859-9"},
{"Résumé", "R\x8esum\x8e", "macintosh"},
{"Gdańsk", "Gda\xf1sk", "windows-1250"},
{"русский", "\xf0\xf3\xf1\xf1\xea\xe8\xe9", "windows-1251"},
{"Résumé", "R\xe9sum\xe9", "windows-1252"},
{"ελληνικά", "\xe5\xeb\xeb\xe7\xed\xe9\xea\xdc", "windows-1253"},
{"Kağan", "Ka\xf0an", "windows-1254"},
{"עִבְרִית", "\xf2\xc4\xe1\xc0\xf8\xc4\xe9\xfa", "windows-1255"},
{"العربية", "\xc7\xe1\xda\xd1\xc8\xed\xc9", "windows-1256"},
{"latviešu", "latvie\xf0u", "windows-1257"},
{"Việt", "Vi\xea\xf2t", "windows-1258"},
{"สำหรับ", "\xca\xd3\xcb\xc3\u047a", "windows-874"},
{"русский", "\xd2\xd5\xd3\xd3\xcb\xc9\xca", "KOI8-R"},
{"українська", "\xd5\xcb\xd2\xc1\xa7\xce\xd3\xd8\xcb\xc1", "KOI8-U"},
{"Hello 常用國字標準字體表", "Hello \xb1`\xa5\u03b0\xea\xa6r\xbc\u0437\u01e6r\xc5\xe9\xaa\xed", "big5"},
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gbk"},
{"Hello 常用國字標準字體表", "Hello \xb3\xa3\xd3\xc3\x87\xf8\xd7\xd6\x98\xcb\x9c\xca\xd7\xd6\xf3\x77\xb1\xed", "gb18030"},
{"עִבְרִית", "\x81\x30\xfb\x30\x81\x30\xf6\x34\x81\x30\xf9\x33\x81\x30\xf6\x30\x81\x30\xfb\x36\x81\x30\xf6\x34\x81\x30\xfa\x31\x81\x30\xfb\x38", "gb18030"},
{"㧯", "\x82\x31\x89\x38", "gb18030"},
{"これは漢字です。", "\x82\xb1\x82\xea\x82\xcd\x8a\xbf\x8e\x9a\x82\xc5\x82\xb7\x81B", "SJIS"},
{"Hello, 世界!", "Hello, \x90\xa2\x8aE!", "SJIS"},
{"イウエオカ", "\xb2\xb3\xb4\xb5\xb6", "SJIS"},
{"これは漢字です。", "\xa4\xb3\xa4\xec\xa4\u03f4\xc1\xbb\xfa\xa4\u01e4\xb9\xa1\xa3", "EUC-JP"},
{"Hello, 世界!", "Hello, \x1b$B@$3&\x1b(B!", "ISO-2022-JP"},
{"다음과 같은 조건을 따라야 합니다: 저작자표시", "\xb4\xd9\xc0\xbd\xb0\xfa \xb0\xb0\xc0\xba \xc1\xb6\xb0\xc7\xc0\xbb \xb5\xfb\xb6\xf3\xbe\xdf \xc7մϴ\xd9: \xc0\xfa\xc0\xdb\xc0\xdaǥ\xbd\xc3", "EUC-KR"},
}
func TestDecode(t *testing.T) {
testCases := append(testCases, []testCase{
// Replace multi-byte maximum subpart of ill-formed subsequence with
// single replacement character (WhatWG requirement).
{"Rés\ufffdumé", "Rés\xe1\x80umé", "utf8"},
}...)
for _, tc := range testCases {
e, _ := Lookup(tc.otherEncoding)
if e == nil {
t.Errorf("%s: not found", tc.otherEncoding)
continue
}
s, err := transformString(e.NewDecoder(), tc.other)
if err != nil {
t.Errorf("%s: decode %q: %v", tc.otherEncoding, tc.other, err)
continue
}
if s != tc.utf8 {
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.utf8)
}
}
}
func TestEncode(t *testing.T) {
testCases := append(testCases, []testCase{
// Use Go-style replacement.
{"Rés\xe1\x80umé", "Rés\ufffd\ufffdumé", "utf8"},
// U+0144 LATIN SMALL LETTER N WITH ACUTE not supported by encoding.
{"Gdańsk", "Gda&#324;sk", "ISO-8859-11"},
{"\ufffd", "&#65533;", "ISO-8859-11"},
{"a\xe1\x80b", "a&#65533;&#65533;b", "ISO-8859-11"},
}...)
for _, tc := range testCases {
e, _ := Lookup(tc.otherEncoding)
if e == nil {
t.Errorf("%s: not found", tc.otherEncoding)
continue
}
s, err := transformString(e.NewEncoder(), tc.utf8)
if err != nil {
t.Errorf("%s: encode %q: %s", tc.otherEncoding, tc.utf8, err)
continue
}
if s != tc.other {
t.Errorf("%s: got %q, want %q", tc.otherEncoding, s, tc.other)
}
}
}
var sniffTestCases = []struct {
filename, declared, want string
}{
{"HTTP-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"UTF-16LE-BOM.html", "", "utf-16le"},
{"UTF-16BE-BOM.html", "", "utf-16be"},
{"meta-content-attribute.html", "text/html", "iso-8859-15"},
{"meta-charset-attribute.html", "text/html", "iso-8859-15"},
{"No-encoding-declaration.html", "text/html", "utf-8"},
{"HTTP-vs-UTF-8-BOM.html", "text/html; charset=iso-8859-15", "utf-8"},
{"HTTP-vs-meta-content.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"HTTP-vs-meta-charset.html", "text/html; charset=iso-8859-15", "iso-8859-15"},
{"UTF-8-BOM-vs-meta-content.html", "text/html", "utf-8"},
{"UTF-8-BOM-vs-meta-charset.html", "text/html", "utf-8"},
}
func TestSniff(t *testing.T) {
switch runtime.GOOS {
case "nacl": // platforms that don't permit direct file system access
t.Skipf("not supported on %q", runtime.GOOS)
}
for _, tc := range sniffTestCases {
content, err := ioutil.ReadFile("testdata/" + tc.filename)
if err != nil {
t.Errorf("%s: error reading file: %v", tc.filename, err)
continue
}
_, name, _ := DetermineEncoding(content, tc.declared)
if name != tc.want {
t.Errorf("%s: got %q, want %q", tc.filename, name, tc.want)
continue
}
}
}
func TestReader(t *testing.T) {
switch runtime.GOOS {
case "nacl": // platforms that don't permit direct file system access
t.Skipf("not supported on %q", runtime.GOOS)
}
for _, tc := range sniffTestCases {
content, err := ioutil.ReadFile("testdata/" + tc.filename)
if err != nil {
t.Errorf("%s: error reading file: %v", tc.filename, err)
continue
}
r, err := NewReader(bytes.NewReader(content), tc.declared)
if err != nil {
t.Errorf("%s: error creating reader: %v", tc.filename, err)
continue
}
got, err := ioutil.ReadAll(r)
if err != nil {
t.Errorf("%s: error reading from charset.NewReader: %v", tc.filename, err)
continue
}
e, _ := Lookup(tc.want)
want, err := ioutil.ReadAll(transform.NewReader(bytes.NewReader(content), e.NewDecoder()))
if err != nil {
t.Errorf("%s: error decoding with hard-coded charset name: %v", tc.filename, err)
continue
}
if !bytes.Equal(got, want) {
t.Errorf("%s: got %q, want %q", tc.filename, got, want)
continue
}
}
}
var metaTestCases = []struct {
meta, want string
}{
{"", ""},
{"text/html", ""},
{"text/html; charset utf-8", ""},
{"text/html; charset=latin-2", "latin-2"},
{"text/html; charset; charset = utf-8", "utf-8"},
{`charset="big5"`, "big5"},
{"charset='shift_jis'", "shift_jis"},
}
func TestFromMeta(t *testing.T) {
for _, tc := range metaTestCases {
got := fromMetaElement(tc.meta)
if got != tc.want {
t.Errorf("%q: got %q, want %q", tc.meta, got, tc.want)
}
}
}
func TestXML(t *testing.T) {
const s = "<?xml version=\"1.0\" encoding=\"windows-1252\"?><a><Word>r\xe9sum\xe9</Word></a>"
d := xml.NewDecoder(strings.NewReader(s))
d.CharsetReader = NewReaderLabel
var a struct {
Word string
}
err := d.Decode(&a)
if err != nil {
t.Fatalf("Decode: %v", err)
}
want := "résumé"
if a.Word != want {
t.Errorf("got %q, want %q", a.Word, want)
}
}

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>HTTP charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of a page can be set using the HTTP header charset declaration.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of a page can be set using the HTTP header charset declaration.</p>
<div class="notes"><p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p><p>The only character encoding declaration for this HTML file is in the HTTP header, which sets the encoding to ISO 8859-15.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-003">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-001<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-001" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>HTTP vs UTF-8 BOM</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>HTTP vs UTF-8 BOM</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A character encoding set in the HTTP header has lower precedence than the UTF-8 signature.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p><p>If the test is unsuccessful, the characters &#x00EF;&#x00BB;&#x00BF; should appear at the top of the page. These represent the bytes that make up the UTF-8 signature when encountered in the ISO 8859-15 encoding.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-022">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-034<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-034" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-1" > <title>HTTP vs meta charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP vs meta charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta charset attribute.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-037">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-018<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-018" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html;charset=iso-8859-1" > <title>HTTP vs meta content</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>HTTP vs meta content</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The HTTP header has a higher precedence than an encoding declaration in a meta content attribute.</p>
<div class="notes"><p><p>The HTTP header attempts to set the character encoding to ISO 8859-15. The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-1.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-018">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-016<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-016" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,47 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<title>No encoding declaration</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>No encoding declaration</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with no encoding information in HTTP, BOM, XML declaration or meta element will be treated as UTF-8.</p>
<div class="notes"><p><p>The test on this page contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-034">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-015<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-015" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,9 @@
These test cases come from
http://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics
Distributed under both the W3C Test Suite License
(http://www.w3.org/Consortium/Legal/2008/04-testsuite-license)
and the W3C 3-clause BSD License
(http://www.w3.org/Consortium/Legal/2008/03-bsd-license).
To contribute to a W3C Test Suite, see the policies and contribution
forms (http://www.w3.org/2004/10/27-testcases).

View file

@ -0,0 +1,49 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-15"> <title>UTF-8 BOM vs meta charset</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.">
<style type='text/css'>
.test div { width: 50px; }.test div { width: 90px; }
</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>UTF-8 BOM vs meta charset</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta charset attribute declares a different encoding.</p>
<div class="notes"><p><p>The page contains an encoding declaration in a meta charset attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-024">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-038<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-038" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>UTF-8 BOM vs meta content</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-utf8.css">
</head>
<body>
<p class='title'>UTF-8 BOM vs meta content</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">A page with a UTF-8 BOM will be recognized as UTF-8 even if the meta content attribute declares a different encoding.</p>
<div class="notes"><p><p>The page contains an encoding declaration in a meta content attribute that attempts to set the character encoding to ISO 8859-15, but the file starts with a UTF-8 signature.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00FD;&#x00E4;&#x00E8;</code>. This matches the sequence of bytes above when they are interpreted as UTF-8. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-038">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-037<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#precedence" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-037" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta charset="iso-8859-15"> <title>meta charset attribute</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of the page can be set by a meta element with charset attribute.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>meta charset attribute</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with charset attribute.</p>
<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the charset attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-015">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-009<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-009" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,48 @@
<!DOCTYPE html>
<html lang="en" >
<head>
<meta http-equiv="content-type" content="text/html; charset=iso-8859-15"> <title>meta content attribute</title>
<link rel='author' title='Richard Ishida' href='mailto:ishida@w3.org'>
<link rel='help' href='http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream'>
<link rel="stylesheet" type="text/css" href="./generatedtests.css">
<script src="http://w3c-test.org/resources/testharness.js"></script>
<script src="http://w3c-test.org/resources/testharnessreport.js"></script>
<meta name='flags' content='http'>
<meta name="assert" content="The character encoding of the page can be set by a meta element with http-equiv and content attributes.">
<style type='text/css'>
.test div { width: 50px; }</style>
<link rel="stylesheet" type="text/css" href="the-input-byte-stream/support/encodingtests-15.css">
</head>
<body>
<p class='title'>meta content attribute</p>
<div id='log'></div>
<div class='test'><div id='box' class='ýäè'>&#xA0;</div></div>
<div class='description'>
<p class="assertion" title="Assertion">The character encoding of the page can be set by a meta element with http-equiv and content attributes.</p>
<div class="notes"><p><p>The only character encoding declaration for this HTML file is in the content attribute of the meta element, which declares the encoding to be ISO 8859-15.</p><p>The test contains a div with a class name that contains the following sequence of bytes: 0xC3 0xBD 0xC3 0xA4 0xC3 0xA8. These represent different sequences of characters in ISO 8859-15, ISO 8859-1 and UTF-8. The external, UTF-8-encoded stylesheet contains a selector <code>.test div.&#x00C3;&#x0153;&#x00C3;&#x20AC;&#x00C3;&#x0161;</code>. This matches the sequence of bytes above when they are interpreted as ISO 8859-15. If the class name matches the selector then the test will pass.</p></p>
</div>
</div>
<div class="nexttest"><div><a href="generate?test=the-input-byte-stream-009">Next test</a></div><div class="doctype">HTML5</div>
<p class="jump">the-input-byte-stream-007<br /><a href="/International/tests/html5/the-input-byte-stream/results-basics#basics" target="_blank">Result summary &amp; related tests</a><br /><a href="http://w3c-test.org/framework/details/i18n-html5/the-input-byte-stream-007" target="_blank">Detailed results for this test</a><br/> <a href="http://www.w3.org/TR/html5/syntax.html#the-input-byte-stream" target="_blank">Link to spec</a></p>
<div class='prereq'>Assumptions: <ul><li>The default encoding for the browser you are testing is not set to ISO 8859-15.</li>
<li>The test is read from a server that supports HTTP.</li></ul></div>
</div>
<script>
test(function() {
assert_equals(document.getElementById('box').offsetWidth, 100);
}, " ");
</script>
</body>
</html>

View file

@ -0,0 +1,102 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
// Section 12.2.3.2 of the HTML5 specification says "The following elements
// have varying levels of special parsing rules".
// https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
var isSpecialElementMap = map[string]bool{
"address": true,
"applet": true,
"area": true,
"article": true,
"aside": true,
"base": true,
"basefont": true,
"bgsound": true,
"blockquote": true,
"body": true,
"br": true,
"button": true,
"caption": true,
"center": true,
"col": true,
"colgroup": true,
"dd": true,
"details": true,
"dir": true,
"div": true,
"dl": true,
"dt": true,
"embed": true,
"fieldset": true,
"figcaption": true,
"figure": true,
"footer": true,
"form": true,
"frame": true,
"frameset": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"header": true,
"hgroup": true,
"hr": true,
"html": true,
"iframe": true,
"img": true,
"input": true,
"isindex": true,
"li": true,
"link": true,
"listing": true,
"marquee": true,
"menu": true,
"meta": true,
"nav": true,
"noembed": true,
"noframes": true,
"noscript": true,
"object": true,
"ol": true,
"p": true,
"param": true,
"plaintext": true,
"pre": true,
"script": true,
"section": true,
"select": true,
"source": true,
"style": true,
"summary": true,
"table": true,
"tbody": true,
"td": true,
"template": true,
"textarea": true,
"tfoot": true,
"th": true,
"thead": true,
"title": true,
"tr": true,
"track": true,
"ul": true,
"wbr": true,
"xmp": true,
}
func isSpecialElement(element *Node) bool {
switch element.Namespace {
case "", "html":
return isSpecialElementMap[element.Data]
case "svg":
return element.Data == "foreignObject"
}
return false
}

View file

@ -0,0 +1,106 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
Package html implements an HTML5-compliant tokenizer and parser.
Tokenization is done by creating a Tokenizer for an io.Reader r. It is the
caller's responsibility to ensure that r provides UTF-8 encoded HTML.
z := html.NewTokenizer(r)
Given a Tokenizer z, the HTML is tokenized by repeatedly calling z.Next(),
which parses the next token and returns its type, or an error:
for {
tt := z.Next()
if tt == html.ErrorToken {
// ...
return ...
}
// Process the current token.
}
There are two APIs for retrieving the current token. The high-level API is to
call Token; the low-level API is to call Text or TagName / TagAttr. Both APIs
allow optionally calling Raw after Next but before Token, Text, TagName, or
TagAttr. In EBNF notation, the valid call sequence per token is:
Next {Raw} [ Token | Text | TagName {TagAttr} ]
Token returns an independent data structure that completely describes a token.
Entities (such as "&lt;") are unescaped, tag names and attribute keys are
lower-cased, and attributes are collected into a []Attribute. For example:
for {
if z.Next() == html.ErrorToken {
// Returning io.EOF indicates success.
return z.Err()
}
emitToken(z.Token())
}
The low-level API performs fewer allocations and copies, but the contents of
the []byte values returned by Text, TagName and TagAttr may change on the next
call to Next. For example, to extract an HTML page's anchor text:
depth := 0
for {
tt := z.Next()
switch tt {
case ErrorToken:
return z.Err()
case TextToken:
if depth > 0 {
// emitBytes should copy the []byte it receives,
// if it doesn't process it immediately.
emitBytes(z.Text())
}
case StartTagToken, EndTagToken:
tn, _ := z.TagName()
if len(tn) == 1 && tn[0] == 'a' {
if tt == StartTagToken {
depth++
} else {
depth--
}
}
}
}
Parsing is done by calling Parse with an io.Reader, which returns the root of
the parse tree (the document element) as a *Node. It is the caller's
responsibility to ensure that the Reader provides UTF-8 encoded HTML. For
example, to process each anchor node in depth-first order:
doc, err := html.Parse(r)
if err != nil {
// ...
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
// Do something with n...
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
The relevant specifications include:
https://html.spec.whatwg.org/multipage/syntax.html and
https://html.spec.whatwg.org/multipage/syntax.html#tokenization
*/
package html // import "golang.org/x/net/html"
// The tokenization algorithm implemented by this package is not a line-by-line
// transliteration of the relatively verbose state-machine in the WHATWG
// specification. A more direct approach is used instead, where the program
// counter implies the state, such as whether it is tokenizing a tag or a text
// node. Specification compliance is verified by checking expected and actual
// outputs over a test suite rather than aiming for algorithmic fidelity.
// TODO(nigeltao): Does a DOM API belong in this package or a separate one?
// TODO(nigeltao): How does parsing interact with a JavaScript engine?

View file

@ -0,0 +1,156 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
// parseDoctype parses the data from a DoctypeToken into a name,
// public identifier, and system identifier. It returns a Node whose Type
// is DoctypeNode, whose Data is the name, and which has attributes
// named "system" and "public" for the two identifiers if they were present.
// quirks is whether the document should be parsed in "quirks mode".
func parseDoctype(s string) (n *Node, quirks bool) {
n = &Node{Type: DoctypeNode}
// Find the name.
space := strings.IndexAny(s, whitespace)
if space == -1 {
space = len(s)
}
n.Data = s[:space]
// The comparison to "html" is case-sensitive.
if n.Data != "html" {
quirks = true
}
n.Data = strings.ToLower(n.Data)
s = strings.TrimLeft(s[space:], whitespace)
if len(s) < 6 {
// It can't start with "PUBLIC" or "SYSTEM".
// Ignore the rest of the string.
return n, quirks || s != ""
}
key := strings.ToLower(s[:6])
s = s[6:]
for key == "public" || key == "system" {
s = strings.TrimLeft(s, whitespace)
if s == "" {
break
}
quote := s[0]
if quote != '"' && quote != '\'' {
break
}
s = s[1:]
q := strings.IndexRune(s, rune(quote))
var id string
if q == -1 {
id = s
s = ""
} else {
id = s[:q]
s = s[q+1:]
}
n.Attr = append(n.Attr, Attribute{Key: key, Val: id})
if key == "public" {
key = "system"
} else {
key = ""
}
}
if key != "" || s != "" {
quirks = true
} else if len(n.Attr) > 0 {
if n.Attr[0].Key == "public" {
public := strings.ToLower(n.Attr[0].Val)
switch public {
case "-//w3o//dtd w3 html strict 3.0//en//", "-/w3d/dtd html 4.0 transitional/en", "html":
quirks = true
default:
for _, q := range quirkyIDs {
if strings.HasPrefix(public, q) {
quirks = true
break
}
}
}
// The following two public IDs only cause quirks mode if there is no system ID.
if len(n.Attr) == 1 && (strings.HasPrefix(public, "-//w3c//dtd html 4.01 frameset//") ||
strings.HasPrefix(public, "-//w3c//dtd html 4.01 transitional//")) {
quirks = true
}
}
if lastAttr := n.Attr[len(n.Attr)-1]; lastAttr.Key == "system" &&
strings.ToLower(lastAttr.Val) == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" {
quirks = true
}
}
return n, quirks
}
// quirkyIDs is a list of public doctype identifiers that cause a document
// to be interpreted in quirks mode. The identifiers should be in lower case.
var quirkyIDs = []string{
"+//silmaril//dtd html pro v0r11 19970101//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//",
"-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//",
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,29 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"testing"
"unicode/utf8"
)
func TestEntityLength(t *testing.T) {
// We verify that the length of UTF-8 encoding of each value is <= 1 + len(key).
// The +1 comes from the leading "&". This property implies that the length of
// unescaped text is <= the length of escaped text.
for k, v := range entity {
if 1+len(k) < utf8.RuneLen(v) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v))
}
if len(k) > longestEntityWithoutSemicolon && k[len(k)-1] != ';' {
t.Errorf("entity name %s is %d characters, but longestEntityWithoutSemicolon=%d", k, len(k), longestEntityWithoutSemicolon)
}
}
for k, v := range entity2 {
if 1+len(k) < utf8.RuneLen(v[0])+utf8.RuneLen(v[1]) {
t.Error("escaped entity &" + k + " is shorter than its UTF-8 encoding " + string(v[0]) + string(v[1]))
}
}
}

View file

@ -0,0 +1,258 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"strings"
"unicode/utf8"
)
// These replacements permit compatibility with old numeric entities that
// assumed Windows-1252 encoding.
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
var replacementTable = [...]rune{
'\u20AC', // First entry is what 0x80 should be replaced with.
'\u0081',
'\u201A',
'\u0192',
'\u201E',
'\u2026',
'\u2020',
'\u2021',
'\u02C6',
'\u2030',
'\u0160',
'\u2039',
'\u0152',
'\u008D',
'\u017D',
'\u008F',
'\u0090',
'\u2018',
'\u2019',
'\u201C',
'\u201D',
'\u2022',
'\u2013',
'\u2014',
'\u02DC',
'\u2122',
'\u0161',
'\u203A',
'\u0153',
'\u009D',
'\u017E',
'\u0178', // Last entry is 0x9F.
// 0x00->'\uFFFD' is handled programmatically.
// 0x0D->'\u000D' is a no-op.
}
// unescapeEntity reads an entity like "&lt;" from b[src:] and writes the
// corresponding "<" to b[dst:], returning the incremented dst and src cursors.
// Precondition: b[src] == '&' && dst <= src.
// attribute should be true if parsing an attribute value.
func unescapeEntity(b []byte, dst, src int, attribute bool) (dst1, src1 int) {
// https://html.spec.whatwg.org/multipage/syntax.html#consume-a-character-reference
// i starts at 1 because we already know that s[0] == '&'.
i, s := 1, b[src:]
if len(s) <= 1 {
b[dst] = b[src]
return dst + 1, src + 1
}
if s[i] == '#' {
if len(s) <= 3 { // We need to have at least "&#.".
b[dst] = b[src]
return dst + 1, src + 1
}
i++
c := s[i]
hex := false
if c == 'x' || c == 'X' {
hex = true
i++
}
x := '\x00'
for i < len(s) {
c = s[i]
i++
if hex {
if '0' <= c && c <= '9' {
x = 16*x + rune(c) - '0'
continue
} else if 'a' <= c && c <= 'f' {
x = 16*x + rune(c) - 'a' + 10
continue
} else if 'A' <= c && c <= 'F' {
x = 16*x + rune(c) - 'A' + 10
continue
}
} else if '0' <= c && c <= '9' {
x = 10*x + rune(c) - '0'
continue
}
if c != ';' {
i--
}
break
}
if i <= 3 { // No characters matched.
b[dst] = b[src]
return dst + 1, src + 1
}
if 0x80 <= x && x <= 0x9F {
// Replace characters from Windows-1252 with UTF-8 equivalents.
x = replacementTable[x-0x80]
} else if x == 0 || (0xD800 <= x && x <= 0xDFFF) || x > 0x10FFFF {
// Replace invalid characters with the replacement character.
x = '\uFFFD'
}
return dst + utf8.EncodeRune(b[dst:], x), src + i
}
// Consume the maximum number of characters possible, with the
// consumed characters matching one of the named references.
for i < len(s) {
c := s[i]
i++
// Lower-cased characters are more common in entities, so we check for them first.
if 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' || '0' <= c && c <= '9' {
continue
}
if c != ';' {
i--
}
break
}
entityName := string(s[1:i])
if entityName == "" {
// No-op.
} else if attribute && entityName[len(entityName)-1] != ';' && len(s) > i && s[i] == '=' {
// No-op.
} else if x := entity[entityName]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + i
} else if x := entity2[entityName]; x[0] != 0 {
dst1 := dst + utf8.EncodeRune(b[dst:], x[0])
return dst1 + utf8.EncodeRune(b[dst1:], x[1]), src + i
} else if !attribute {
maxLen := len(entityName) - 1
if maxLen > longestEntityWithoutSemicolon {
maxLen = longestEntityWithoutSemicolon
}
for j := maxLen; j > 1; j-- {
if x := entity[entityName[:j]]; x != 0 {
return dst + utf8.EncodeRune(b[dst:], x), src + j + 1
}
}
}
dst1, src1 = dst+i, src+i
copy(b[dst:dst1], b[src:src1])
return dst1, src1
}
// unescape unescapes b's entities in-place, so that "a&lt;b" becomes "a<b".
// attribute should be true if parsing an attribute value.
func unescape(b []byte, attribute bool) []byte {
for i, c := range b {
if c == '&' {
dst, src := unescapeEntity(b, i, i, attribute)
for src < len(b) {
c := b[src]
if c == '&' {
dst, src = unescapeEntity(b, dst, src, attribute)
} else {
b[dst] = c
dst, src = dst+1, src+1
}
}
return b[0:dst]
}
}
return b
}
// lower lower-cases the A-Z bytes in b in-place, so that "aBc" becomes "abc".
func lower(b []byte) []byte {
for i, c := range b {
if 'A' <= c && c <= 'Z' {
b[i] = c + 'a' - 'A'
}
}
return b
}
const escapedChars = "&'<>\"\r"
func escape(w writer, s string) error {
i := strings.IndexAny(s, escapedChars)
for i != -1 {
if _, err := w.WriteString(s[:i]); err != nil {
return err
}
var esc string
switch s[i] {
case '&':
esc = "&amp;"
case '\'':
// "&#39;" is shorter than "&apos;" and apos was not in HTML until HTML5.
esc = "&#39;"
case '<':
esc = "&lt;"
case '>':
esc = "&gt;"
case '"':
// "&#34;" is shorter than "&quot;".
esc = "&#34;"
case '\r':
esc = "&#13;"
default:
panic("unrecognized escape character")
}
s = s[i+1:]
if _, err := w.WriteString(esc); err != nil {
return err
}
i = strings.IndexAny(s, escapedChars)
}
_, err := w.WriteString(s)
return err
}
// EscapeString escapes special characters like "<" to become "&lt;". It
// escapes only five such characters: <, >, &, ' and ".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func EscapeString(s string) string {
if strings.IndexAny(s, escapedChars) == -1 {
return s
}
var buf bytes.Buffer
escape(&buf, s)
return buf.String()
}
// UnescapeString unescapes entities like "&lt;" to become "<". It unescapes a
// larger range of entities than EscapeString escapes. For example, "&aacute;"
// unescapes to "á", as does "&#225;" and "&xE1;".
// UnescapeString(EscapeString(s)) == s always holds, but the converse isn't
// always true.
func UnescapeString(s string) string {
for _, c := range s {
if c == '&' {
return string(unescape([]byte(s), false))
}
}
return s
}

View file

@ -0,0 +1,97 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import "testing"
type unescapeTest struct {
// A short description of the test case.
desc string
// The HTML text.
html string
// The unescaped text.
unescaped string
}
var unescapeTests = []unescapeTest{
// Handle no entities.
{
"copy",
"A\ttext\nstring",
"A\ttext\nstring",
},
// Handle simple named entities.
{
"simple",
"&amp; &gt; &lt;",
"& > <",
},
// Handle hitting the end of the string.
{
"stringEnd",
"&amp &amp",
"& &",
},
// Handle entities with two codepoints.
{
"multiCodepoint",
"text &gesl; blah",
"text \u22db\ufe00 blah",
},
// Handle decimal numeric entities.
{
"decimalEntity",
"Delta = &#916; ",
"Delta = Δ ",
},
// Handle hexadecimal numeric entities.
{
"hexadecimalEntity",
"Lambda = &#x3bb; = &#X3Bb ",
"Lambda = λ = λ ",
},
// Handle numeric early termination.
{
"numericEnds",
"&# &#x &#128;43 &copy = &#169f = &#xa9",
"&# &#x €43 © = ©f = ©",
},
// Handle numeric ISO-8859-1 entity replacements.
{
"numericReplacements",
"Footnote&#x87;",
"Footnote‡",
},
}
func TestUnescape(t *testing.T) {
for _, tt := range unescapeTests {
unescaped := UnescapeString(tt.html)
if unescaped != tt.unescaped {
t.Errorf("TestUnescape %s: want %q, got %q", tt.desc, tt.unescaped, unescaped)
}
}
}
func TestUnescapeEscape(t *testing.T) {
ss := []string{
``,
`abc def`,
`a & b`,
`a&amp;b`,
`a &amp b`,
`&quot;`,
`"`,
`"<&>"`,
`&quot;&lt;&amp;&gt;&quot;`,
`3&5==1 && 0<1, "0&lt;1", a+acute=&aacute;`,
`The special characters are: <, >, &, ' and "`,
}
for _, s := range ss {
if got := UnescapeString(EscapeString(s)); got != s {
t.Errorf("got %q want %q", got, s)
}
}
}

View file

@ -0,0 +1,40 @@
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This example demonstrates parsing HTML data and walking the resulting tree.
package html_test
import (
"fmt"
"log"
"strings"
"golang.org/x/net/html"
)
func ExampleParse() {
s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
fmt.Println(a.Val)
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
// Output:
// foo
// /bar/baz
}

View file

@ -0,0 +1,226 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"strings"
)
func adjustAttributeNames(aa []Attribute, nameMap map[string]string) {
for i := range aa {
if newName, ok := nameMap[aa[i].Key]; ok {
aa[i].Key = newName
}
}
}
func adjustForeignAttributes(aa []Attribute) {
for i, a := range aa {
if a.Key == "" || a.Key[0] != 'x' {
continue
}
switch a.Key {
case "xlink:actuate", "xlink:arcrole", "xlink:href", "xlink:role", "xlink:show",
"xlink:title", "xlink:type", "xml:base", "xml:lang", "xml:space", "xmlns:xlink":
j := strings.Index(a.Key, ":")
aa[i].Namespace = a.Key[:j]
aa[i].Key = a.Key[j+1:]
}
}
}
func htmlIntegrationPoint(n *Node) bool {
if n.Type != ElementNode {
return false
}
switch n.Namespace {
case "math":
if n.Data == "annotation-xml" {
for _, a := range n.Attr {
if a.Key == "encoding" {
val := strings.ToLower(a.Val)
if val == "text/html" || val == "application/xhtml+xml" {
return true
}
}
}
}
case "svg":
switch n.Data {
case "desc", "foreignObject", "title":
return true
}
}
return false
}
func mathMLTextIntegrationPoint(n *Node) bool {
if n.Namespace != "math" {
return false
}
switch n.Data {
case "mi", "mo", "mn", "ms", "mtext":
return true
}
return false
}
// Section 12.2.5.5.
var breakout = map[string]bool{
"b": true,
"big": true,
"blockquote": true,
"body": true,
"br": true,
"center": true,
"code": true,
"dd": true,
"div": true,
"dl": true,
"dt": true,
"em": true,
"embed": true,
"h1": true,
"h2": true,
"h3": true,
"h4": true,
"h5": true,
"h6": true,
"head": true,
"hr": true,
"i": true,
"img": true,
"li": true,
"listing": true,
"menu": true,
"meta": true,
"nobr": true,
"ol": true,
"p": true,
"pre": true,
"ruby": true,
"s": true,
"small": true,
"span": true,
"strong": true,
"strike": true,
"sub": true,
"sup": true,
"table": true,
"tt": true,
"u": true,
"ul": true,
"var": true,
}
// Section 12.2.5.5.
var svgTagNameAdjustments = map[string]string{
"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
"altglyphitem": "altGlyphItem",
"animatecolor": "animateColor",
"animatemotion": "animateMotion",
"animatetransform": "animateTransform",
"clippath": "clipPath",
"feblend": "feBlend",
"fecolormatrix": "feColorMatrix",
"fecomponenttransfer": "feComponentTransfer",
"fecomposite": "feComposite",
"feconvolvematrix": "feConvolveMatrix",
"fediffuselighting": "feDiffuseLighting",
"fedisplacementmap": "feDisplacementMap",
"fedistantlight": "feDistantLight",
"feflood": "feFlood",
"fefunca": "feFuncA",
"fefuncb": "feFuncB",
"fefuncg": "feFuncG",
"fefuncr": "feFuncR",
"fegaussianblur": "feGaussianBlur",
"feimage": "feImage",
"femerge": "feMerge",
"femergenode": "feMergeNode",
"femorphology": "feMorphology",
"feoffset": "feOffset",
"fepointlight": "fePointLight",
"fespecularlighting": "feSpecularLighting",
"fespotlight": "feSpotLight",
"fetile": "feTile",
"feturbulence": "feTurbulence",
"foreignobject": "foreignObject",
"glyphref": "glyphRef",
"lineargradient": "linearGradient",
"radialgradient": "radialGradient",
"textpath": "textPath",
}
// Section 12.2.5.1
var mathMLAttributeAdjustments = map[string]string{
"definitionurl": "definitionURL",
}
var svgAttributeAdjustments = map[string]string{
"attributename": "attributeName",
"attributetype": "attributeType",
"basefrequency": "baseFrequency",
"baseprofile": "baseProfile",
"calcmode": "calcMode",
"clippathunits": "clipPathUnits",
"contentscripttype": "contentScriptType",
"contentstyletype": "contentStyleType",
"diffuseconstant": "diffuseConstant",
"edgemode": "edgeMode",
"externalresourcesrequired": "externalResourcesRequired",
"filterres": "filterRes",
"filterunits": "filterUnits",
"glyphref": "glyphRef",
"gradienttransform": "gradientTransform",
"gradientunits": "gradientUnits",
"kernelmatrix": "kernelMatrix",
"kernelunitlength": "kernelUnitLength",
"keypoints": "keyPoints",
"keysplines": "keySplines",
"keytimes": "keyTimes",
"lengthadjust": "lengthAdjust",
"limitingconeangle": "limitingConeAngle",
"markerheight": "markerHeight",
"markerunits": "markerUnits",
"markerwidth": "markerWidth",
"maskcontentunits": "maskContentUnits",
"maskunits": "maskUnits",
"numoctaves": "numOctaves",
"pathlength": "pathLength",
"patterncontentunits": "patternContentUnits",
"patterntransform": "patternTransform",
"patternunits": "patternUnits",
"pointsatx": "pointsAtX",
"pointsaty": "pointsAtY",
"pointsatz": "pointsAtZ",
"preservealpha": "preserveAlpha",
"preserveaspectratio": "preserveAspectRatio",
"primitiveunits": "primitiveUnits",
"refx": "refX",
"refy": "refY",
"repeatcount": "repeatCount",
"repeatdur": "repeatDur",
"requiredextensions": "requiredExtensions",
"requiredfeatures": "requiredFeatures",
"specularconstant": "specularConstant",
"specularexponent": "specularExponent",
"spreadmethod": "spreadMethod",
"startoffset": "startOffset",
"stddeviation": "stdDeviation",
"stitchtiles": "stitchTiles",
"surfacescale": "surfaceScale",
"systemlanguage": "systemLanguage",
"tablevalues": "tableValues",
"targetx": "targetX",
"targety": "targetY",
"textlength": "textLength",
"viewbox": "viewBox",
"viewtarget": "viewTarget",
"xchannelselector": "xChannelSelector",
"ychannelselector": "yChannelSelector",
"zoomandpan": "zoomAndPan",
}

View file

@ -0,0 +1,193 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"golang.org/x/net/html/atom"
)
// A NodeType is the type of a Node.
type NodeType uint32
const (
ErrorNode NodeType = iota
TextNode
DocumentNode
ElementNode
CommentNode
DoctypeNode
scopeMarkerNode
)
// Section 12.2.3.3 says "scope markers are inserted when entering applet
// elements, buttons, object elements, marquees, table cells, and table
// captions, and are used to prevent formatting from 'leaking'".
var scopeMarker = Node{Type: scopeMarkerNode}
// A Node consists of a NodeType and some Data (tag name for element nodes,
// content for text) and are part of a tree of Nodes. Element nodes may also
// have a Namespace and contain a slice of Attributes. Data is unescaped, so
// that it looks like "a<b" rather than "a&lt;b". For element nodes, DataAtom
// is the atom for Data, or zero if Data is not a known tag name.
//
// An empty Namespace implies a "http://www.w3.org/1999/xhtml" namespace.
// Similarly, "math" is short for "http://www.w3.org/1998/Math/MathML", and
// "svg" is short for "http://www.w3.org/2000/svg".
type Node struct {
Parent, FirstChild, LastChild, PrevSibling, NextSibling *Node
Type NodeType
DataAtom atom.Atom
Data string
Namespace string
Attr []Attribute
}
// InsertBefore inserts newChild as a child of n, immediately before oldChild
// in the sequence of n's children. oldChild may be nil, in which case newChild
// is appended to the end of n's children.
//
// It will panic if newChild already has a parent or siblings.
func (n *Node) InsertBefore(newChild, oldChild *Node) {
if newChild.Parent != nil || newChild.PrevSibling != nil || newChild.NextSibling != nil {
panic("html: InsertBefore called for an attached child Node")
}
var prev, next *Node
if oldChild != nil {
prev, next = oldChild.PrevSibling, oldChild
} else {
prev = n.LastChild
}
if prev != nil {
prev.NextSibling = newChild
} else {
n.FirstChild = newChild
}
if next != nil {
next.PrevSibling = newChild
} else {
n.LastChild = newChild
}
newChild.Parent = n
newChild.PrevSibling = prev
newChild.NextSibling = next
}
// AppendChild adds a node c as a child of n.
//
// It will panic if c already has a parent or siblings.
func (n *Node) AppendChild(c *Node) {
if c.Parent != nil || c.PrevSibling != nil || c.NextSibling != nil {
panic("html: AppendChild called for an attached child Node")
}
last := n.LastChild
if last != nil {
last.NextSibling = c
} else {
n.FirstChild = c
}
n.LastChild = c
c.Parent = n
c.PrevSibling = last
}
// RemoveChild removes a node c that is a child of n. Afterwards, c will have
// no parent and no siblings.
//
// It will panic if c's parent is not n.
func (n *Node) RemoveChild(c *Node) {
if c.Parent != n {
panic("html: RemoveChild called for a non-child Node")
}
if n.FirstChild == c {
n.FirstChild = c.NextSibling
}
if c.NextSibling != nil {
c.NextSibling.PrevSibling = c.PrevSibling
}
if n.LastChild == c {
n.LastChild = c.PrevSibling
}
if c.PrevSibling != nil {
c.PrevSibling.NextSibling = c.NextSibling
}
c.Parent = nil
c.PrevSibling = nil
c.NextSibling = nil
}
// reparentChildren reparents all of src's child nodes to dst.
func reparentChildren(dst, src *Node) {
for {
child := src.FirstChild
if child == nil {
break
}
src.RemoveChild(child)
dst.AppendChild(child)
}
}
// clone returns a new node with the same type, data and attributes.
// The clone has no parent, no siblings and no children.
func (n *Node) clone() *Node {
m := &Node{
Type: n.Type,
DataAtom: n.DataAtom,
Data: n.Data,
Attr: make([]Attribute, len(n.Attr)),
}
copy(m.Attr, n.Attr)
return m
}
// nodeStack is a stack of nodes.
type nodeStack []*Node
// pop pops the stack. It will panic if s is empty.
func (s *nodeStack) pop() *Node {
i := len(*s)
n := (*s)[i-1]
*s = (*s)[:i-1]
return n
}
// top returns the most recently pushed node, or nil if s is empty.
func (s *nodeStack) top() *Node {
if i := len(*s); i > 0 {
return (*s)[i-1]
}
return nil
}
// index returns the index of the top-most occurrence of n in the stack, or -1
// if n is not present.
func (s *nodeStack) index(n *Node) int {
for i := len(*s) - 1; i >= 0; i-- {
if (*s)[i] == n {
return i
}
}
return -1
}
// insert inserts a node at the given index.
func (s *nodeStack) insert(i int, n *Node) {
(*s) = append(*s, nil)
copy((*s)[i+1:], (*s)[i:])
(*s)[i] = n
}
// remove removes a node from the stack. It is a no-op if n is not present.
func (s *nodeStack) remove(n *Node) {
i := s.index(n)
if i == -1 {
return
}
copy((*s)[i:], (*s)[i+1:])
j := len(*s) - 1
(*s)[j] = nil
*s = (*s)[:j]
}

View file

@ -0,0 +1,146 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"fmt"
)
// checkTreeConsistency checks that a node and its descendants are all
// consistent in their parent/child/sibling relationships.
func checkTreeConsistency(n *Node) error {
return checkTreeConsistency1(n, 0)
}
func checkTreeConsistency1(n *Node, depth int) error {
if depth == 1e4 {
return fmt.Errorf("html: tree looks like it contains a cycle")
}
if err := checkNodeConsistency(n); err != nil {
return err
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := checkTreeConsistency1(c, depth+1); err != nil {
return err
}
}
return nil
}
// checkNodeConsistency checks that a node's parent/child/sibling relationships
// are consistent.
func checkNodeConsistency(n *Node) error {
if n == nil {
return nil
}
nParent := 0
for p := n.Parent; p != nil; p = p.Parent {
nParent++
if nParent == 1e4 {
return fmt.Errorf("html: parent list looks like an infinite loop")
}
}
nForward := 0
for c := n.FirstChild; c != nil; c = c.NextSibling {
nForward++
if nForward == 1e6 {
return fmt.Errorf("html: forward list of children looks like an infinite loop")
}
if c.Parent != n {
return fmt.Errorf("html: inconsistent child/parent relationship")
}
}
nBackward := 0
for c := n.LastChild; c != nil; c = c.PrevSibling {
nBackward++
if nBackward == 1e6 {
return fmt.Errorf("html: backward list of children looks like an infinite loop")
}
if c.Parent != n {
return fmt.Errorf("html: inconsistent child/parent relationship")
}
}
if n.Parent != nil {
if n.Parent == n {
return fmt.Errorf("html: inconsistent parent relationship")
}
if n.Parent == n.FirstChild {
return fmt.Errorf("html: inconsistent parent/first relationship")
}
if n.Parent == n.LastChild {
return fmt.Errorf("html: inconsistent parent/last relationship")
}
if n.Parent == n.PrevSibling {
return fmt.Errorf("html: inconsistent parent/prev relationship")
}
if n.Parent == n.NextSibling {
return fmt.Errorf("html: inconsistent parent/next relationship")
}
parentHasNAsAChild := false
for c := n.Parent.FirstChild; c != nil; c = c.NextSibling {
if c == n {
parentHasNAsAChild = true
break
}
}
if !parentHasNAsAChild {
return fmt.Errorf("html: inconsistent parent/child relationship")
}
}
if n.PrevSibling != nil && n.PrevSibling.NextSibling != n {
return fmt.Errorf("html: inconsistent prev/next relationship")
}
if n.NextSibling != nil && n.NextSibling.PrevSibling != n {
return fmt.Errorf("html: inconsistent next/prev relationship")
}
if (n.FirstChild == nil) != (n.LastChild == nil) {
return fmt.Errorf("html: inconsistent first/last relationship")
}
if n.FirstChild != nil && n.FirstChild == n.LastChild {
// We have a sole child.
if n.FirstChild.PrevSibling != nil || n.FirstChild.NextSibling != nil {
return fmt.Errorf("html: inconsistent sole child's sibling relationship")
}
}
seen := map[*Node]bool{}
var last *Node
for c := n.FirstChild; c != nil; c = c.NextSibling {
if seen[c] {
return fmt.Errorf("html: inconsistent repeated child")
}
seen[c] = true
last = c
}
if last != n.LastChild {
return fmt.Errorf("html: inconsistent last relationship")
}
var first *Node
for c := n.LastChild; c != nil; c = c.PrevSibling {
if !seen[c] {
return fmt.Errorf("html: inconsistent missing child")
}
delete(seen, c)
first = c
}
if first != n.FirstChild {
return fmt.Errorf("html: inconsistent first relationship")
}
if len(seen) != 0 {
return fmt.Errorf("html: inconsistent forwards/backwards child list")
}
return nil
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,388 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"sort"
"strings"
"testing"
"golang.org/x/net/html/atom"
)
// readParseTest reads a single test case from r.
func readParseTest(r *bufio.Reader) (text, want, context string, err error) {
line, err := r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
var b []byte
// Read the HTML.
if string(line) != "#data\n" {
return "", "", "", fmt.Errorf(`got %q want "#data\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
b = append(b, line...)
}
text = strings.TrimSuffix(string(b), "\n")
b = b[:0]
// Skip the error list.
if string(line) != "#errors\n" {
return "", "", "", fmt.Errorf(`got %q want "#errors\n"`, line)
}
for {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
if line[0] == '#' {
break
}
}
if string(line) == "#document-fragment\n" {
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
context = strings.TrimSpace(string(line))
line, err = r.ReadSlice('\n')
if err != nil {
return "", "", "", err
}
}
// Read the dump of what the parse tree should be.
if string(line) != "#document\n" {
return "", "", "", fmt.Errorf(`got %q want "#document\n"`, line)
}
inQuote := false
for {
line, err = r.ReadSlice('\n')
if err != nil && err != io.EOF {
return "", "", "", err
}
trimmed := bytes.Trim(line, "| \n")
if len(trimmed) > 0 {
if line[0] == '|' && trimmed[0] == '"' {
inQuote = true
}
if trimmed[len(trimmed)-1] == '"' && !(line[0] == '|' && len(trimmed) == 1) {
inQuote = false
}
}
if len(line) == 0 || len(line) == 1 && line[0] == '\n' && !inQuote {
break
}
b = append(b, line...)
}
return text, string(b), context, nil
}
func dumpIndent(w io.Writer, level int) {
io.WriteString(w, "| ")
for i := 0; i < level; i++ {
io.WriteString(w, " ")
}
}
type sortedAttributes []Attribute
func (a sortedAttributes) Len() int {
return len(a)
}
func (a sortedAttributes) Less(i, j int) bool {
if a[i].Namespace != a[j].Namespace {
return a[i].Namespace < a[j].Namespace
}
return a[i].Key < a[j].Key
}
func (a sortedAttributes) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func dumpLevel(w io.Writer, n *Node, level int) error {
dumpIndent(w, level)
switch n.Type {
case ErrorNode:
return errors.New("unexpected ErrorNode")
case DocumentNode:
return errors.New("unexpected DocumentNode")
case ElementNode:
if n.Namespace != "" {
fmt.Fprintf(w, "<%s %s>", n.Namespace, n.Data)
} else {
fmt.Fprintf(w, "<%s>", n.Data)
}
attr := sortedAttributes(n.Attr)
sort.Sort(attr)
for _, a := range attr {
io.WriteString(w, "\n")
dumpIndent(w, level+1)
if a.Namespace != "" {
fmt.Fprintf(w, `%s %s="%s"`, a.Namespace, a.Key, a.Val)
} else {
fmt.Fprintf(w, `%s="%s"`, a.Key, a.Val)
}
}
case TextNode:
fmt.Fprintf(w, `"%s"`, n.Data)
case CommentNode:
fmt.Fprintf(w, "<!-- %s -->", n.Data)
case DoctypeNode:
fmt.Fprintf(w, "<!DOCTYPE %s", n.Data)
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" || s != "" {
fmt.Fprintf(w, ` "%s"`, p)
fmt.Fprintf(w, ` "%s"`, s)
}
}
io.WriteString(w, ">")
case scopeMarkerNode:
return errors.New("unexpected scopeMarkerNode")
default:
return errors.New("unknown node type")
}
io.WriteString(w, "\n")
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := dumpLevel(w, c, level+1); err != nil {
return err
}
}
return nil
}
func dump(n *Node) (string, error) {
if n == nil || n.FirstChild == nil {
return "", nil
}
var b bytes.Buffer
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := dumpLevel(&b, c, 0); err != nil {
return "", err
}
}
return b.String(), nil
}
const testDataDir = "testdata/webkit/"
func TestParser(t *testing.T) {
testFiles, err := filepath.Glob(testDataDir + "*.dat")
if err != nil {
t.Fatal(err)
}
for _, tf := range testFiles {
f, err := os.Open(tf)
if err != nil {
t.Fatal(err)
}
defer f.Close()
r := bufio.NewReader(f)
for i := 0; ; i++ {
text, want, context, err := readParseTest(r)
if err == io.EOF {
break
}
if err != nil {
t.Fatal(err)
}
err = testParseCase(text, want, context)
if err != nil {
t.Errorf("%s test #%d %q, %s", tf, i, text, err)
}
}
}
}
// testParseCase tests one test case from the test files. If the test does not
// pass, it returns an error that explains the failure.
// text is the HTML to be parsed, want is a dump of the correct parse tree,
// and context is the name of the context node, if any.
func testParseCase(text, want, context string) (err error) {
defer func() {
if x := recover(); x != nil {
switch e := x.(type) {
case error:
err = e
default:
err = fmt.Errorf("%v", e)
}
}
}()
var doc *Node
if context == "" {
doc, err = Parse(strings.NewReader(text))
if err != nil {
return err
}
} else {
contextNode := &Node{
Type: ElementNode,
DataAtom: atom.Lookup([]byte(context)),
Data: context,
}
nodes, err := ParseFragment(strings.NewReader(text), contextNode)
if err != nil {
return err
}
doc = &Node{
Type: DocumentNode,
}
for _, n := range nodes {
doc.AppendChild(n)
}
}
if err := checkTreeConsistency(doc); err != nil {
return err
}
got, err := dump(doc)
if err != nil {
return err
}
// Compare the parsed tree to the #document section.
if got != want {
return fmt.Errorf("got vs want:\n----\n%s----\n%s----", got, want)
}
if renderTestBlacklist[text] || context != "" {
return nil
}
// Check that rendering and re-parsing results in an identical tree.
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(Render(pw, doc))
}()
doc1, err := Parse(pr)
if err != nil {
return err
}
got1, err := dump(doc1)
if err != nil {
return err
}
if got != got1 {
return fmt.Errorf("got vs got1:\n----\n%s----\n%s----", got, got1)
}
return nil
}
// Some test input result in parse trees are not 'well-formed' despite
// following the HTML5 recovery algorithms. Rendering and re-parsing such a
// tree will not result in an exact clone of that tree. We blacklist such
// inputs from the render test.
var renderTestBlacklist = map[string]bool{
// The second <a> will be reparented to the first <table>'s parent. This
// results in an <a> whose parent is an <a>, which is not 'well-formed'.
`<a><table><td><a><table></table><a></tr><a></table><b>X</b>C<a>Y`: true,
// The same thing with a <p>:
`<p><table></p>`: true,
// More cases of <a> being reparented:
`<a href="blah">aba<table><a href="foo">br<tr><td></td></tr>x</table>aoe`: true,
`<a><table><a></table><p><a><div><a>`: true,
`<a><table><td><a><table></table><a></tr><a></table><a>`: true,
// A similar reparenting situation involving <nobr>:
`<!DOCTYPE html><body><b><nobr>1<table><nobr></b><i><nobr>2<nobr></i>3`: true,
// A <plaintext> element is reparented, putting it before a table.
// A <plaintext> element can't have anything after it in HTML.
`<table><plaintext><td>`: true,
`<!doctype html><table><plaintext></plaintext>`: true,
`<!doctype html><table><tbody><plaintext></plaintext>`: true,
`<!doctype html><table><tbody><tr><plaintext></plaintext>`: true,
// A form inside a table inside a form doesn't work either.
`<!doctype html><form><table></form><form></table></form>`: true,
// A script that ends at EOF may escape its own closing tag when rendered.
`<!doctype html><script><!--<script `: true,
`<!doctype html><script><!--<script <`: true,
`<!doctype html><script><!--<script <a`: true,
`<!doctype html><script><!--<script </`: true,
`<!doctype html><script><!--<script </s`: true,
`<!doctype html><script><!--<script </script`: true,
`<!doctype html><script><!--<script </scripta`: true,
`<!doctype html><script><!--<script -`: true,
`<!doctype html><script><!--<script -a`: true,
`<!doctype html><script><!--<script -<`: true,
`<!doctype html><script><!--<script --`: true,
`<!doctype html><script><!--<script --a`: true,
`<!doctype html><script><!--<script --<`: true,
`<script><!--<script `: true,
`<script><!--<script <a`: true,
`<script><!--<script </script`: true,
`<script><!--<script </scripta`: true,
`<script><!--<script -`: true,
`<script><!--<script -a`: true,
`<script><!--<script --`: true,
`<script><!--<script --a`: true,
`<script><!--<script <`: true,
`<script><!--<script </`: true,
`<script><!--<script </s`: true,
// Reconstructing the active formatting elements results in a <plaintext>
// element that contains an <a> element.
`<!doctype html><p><a><plaintext>b`: true,
}
func TestNodeConsistency(t *testing.T) {
// inconsistentNode is a Node whose DataAtom and Data do not agree.
inconsistentNode := &Node{
Type: ElementNode,
DataAtom: atom.Frameset,
Data: "table",
}
_, err := ParseFragment(strings.NewReader("<p>hello</p>"), inconsistentNode)
if err == nil {
t.Errorf("got nil error, want non-nil")
}
}
func BenchmarkParser(b *testing.B) {
buf, err := ioutil.ReadFile("testdata/go1.html")
if err != nil {
b.Fatalf("could not read testdata/go1.html: %v", err)
}
b.SetBytes(int64(len(buf)))
runtime.GC()
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
Parse(bytes.NewBuffer(buf))
}
}

View file

@ -0,0 +1,271 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
type writer interface {
io.Writer
io.ByteWriter
WriteString(string) (int, error)
}
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error {
if x, ok := w.(writer); ok {
return render(x, n)
}
buf := bufio.NewWriter(w)
if err := render(buf, n); err != nil {
return err
}
return buf.Flush()
}
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error {
err := render1(w, n)
if err == plaintextAbort {
err = nil
}
return err
}
func render1(w writer, n *Node) error {
// Render non-element nodes; these are the easy cases.
switch n.Type {
case ErrorNode:
return errors.New("html: cannot render an ErrorNode node")
case TextNode:
return escape(w, n.Data)
case DocumentNode:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
return nil
case ElementNode:
// No-op.
case CommentNode:
if _, err := w.WriteString("<!--"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if _, err := w.WriteString("-->"); err != nil {
return err
}
return nil
case DoctypeNode:
if _, err := w.WriteString("<!DOCTYPE "); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
if n.Attr != nil {
var p, s string
for _, a := range n.Attr {
switch a.Key {
case "public":
p = a.Val
case "system":
s = a.Val
}
}
if p != "" {
if _, err := w.WriteString(" PUBLIC "); err != nil {
return err
}
if err := writeQuoted(w, p); err != nil {
return err
}
if s != "" {
if err := w.WriteByte(' '); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
} else if s != "" {
if _, err := w.WriteString(" SYSTEM "); err != nil {
return err
}
if err := writeQuoted(w, s); err != nil {
return err
}
}
}
return w.WriteByte('>')
default:
return errors.New("html: unknown node type")
}
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
for _, a := range n.Attr {
if err := w.WriteByte(' '); err != nil {
return err
}
if a.Namespace != "" {
if _, err := w.WriteString(a.Namespace); err != nil {
return err
}
if err := w.WriteByte(':'); err != nil {
return err
}
}
if _, err := w.WriteString(a.Key); err != nil {
return err
}
if _, err := w.WriteString(`="`); err != nil {
return err
}
if err := escape(w, a.Val); err != nil {
return err
}
if err := w.WriteByte('"'); err != nil {
return err
}
}
if voidElements[n.Data] {
if n.FirstChild != nil {
return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
}
_, err := w.WriteString("/>")
return err
}
if err := w.WriteByte('>'); err != nil {
return err
}
// Add initial newline where there is danger of a newline beging ignored.
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
switch n.Data {
case "pre", "listing", "textarea":
if err := w.WriteByte('\n'); err != nil {
return err
}
}
}
// Render any child nodes.
switch n.Data {
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
for c := n.FirstChild; c != nil; c = c.NextSibling {
if c.Type == TextNode {
if _, err := w.WriteString(c.Data); err != nil {
return err
}
} else {
if err := render1(w, c); err != nil {
return err
}
}
}
if n.Data == "plaintext" {
// Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort
}
default:
for c := n.FirstChild; c != nil; c = c.NextSibling {
if err := render1(w, c); err != nil {
return err
}
}
}
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil {
return err
}
if _, err := w.WriteString(n.Data); err != nil {
return err
}
return w.WriteByte('>')
}
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error {
var q byte = '"'
if strings.Contains(s, `"`) {
q = '\''
}
if err := w.WriteByte(q); err != nil {
return err
}
if _, err := w.WriteString(s); err != nil {
return err
}
if err := w.WriteByte(q); err != nil {
return err
}
return nil
}
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{
"area": true,
"base": true,
"br": true,
"col": true,
"command": true,
"embed": true,
"hr": true,
"img": true,
"input": true,
"keygen": true,
"link": true,
"meta": true,
"param": true,
"source": true,
"track": true,
"wbr": true,
}

View file

@ -0,0 +1,156 @@
// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import (
"bytes"
"testing"
)
func TestRenderer(t *testing.T) {
nodes := [...]*Node{
0: {
Type: ElementNode,
Data: "html",
},
1: {
Type: ElementNode,
Data: "head",
},
2: {
Type: ElementNode,
Data: "body",
},
3: {
Type: TextNode,
Data: "0<1",
},
4: {
Type: ElementNode,
Data: "p",
Attr: []Attribute{
{
Key: "id",
Val: "A",
},
{
Key: "foo",
Val: `abc"def`,
},
},
},
5: {
Type: TextNode,
Data: "2",
},
6: {
Type: ElementNode,
Data: "b",
Attr: []Attribute{
{
Key: "empty",
Val: "",
},
},
},
7: {
Type: TextNode,
Data: "3",
},
8: {
Type: ElementNode,
Data: "i",
Attr: []Attribute{
{
Key: "backslash",
Val: `\`,
},
},
},
9: {
Type: TextNode,
Data: "&4",
},
10: {
Type: TextNode,
Data: "5",
},
11: {
Type: ElementNode,
Data: "blockquote",
},
12: {
Type: ElementNode,
Data: "br",
},
13: {
Type: TextNode,
Data: "6",
},
}
// Build a tree out of those nodes, based on a textual representation.
// Only the ".\t"s are significant. The trailing HTML-like text is
// just commentary. The "0:" prefixes are for easy cross-reference with
// the nodes array.
treeAsText := [...]string{
0: `<html>`,
1: `. <head>`,
2: `. <body>`,
3: `. . "0&lt;1"`,
4: `. . <p id="A" foo="abc&#34;def">`,
5: `. . . "2"`,
6: `. . . <b empty="">`,
7: `. . . . "3"`,
8: `. . . <i backslash="\">`,
9: `. . . . "&amp;4"`,
10: `. . "5"`,
11: `. . <blockquote>`,
12: `. . <br>`,
13: `. . "6"`,
}
if len(nodes) != len(treeAsText) {
t.Fatal("len(nodes) != len(treeAsText)")
}
var stack [8]*Node
for i, line := range treeAsText {
level := 0
for line[0] == '.' {
// Strip a leading ".\t".
line = line[2:]
level++
}
n := nodes[i]
if level == 0 {
if stack[0] != nil {
t.Fatal("multiple root nodes")
}
stack[0] = n
} else {
stack[level-1].AppendChild(n)
stack[level] = n
for i := level + 1; i < len(stack); i++ {
stack[i] = nil
}
}
// At each stage of tree construction, we check all nodes for consistency.
for j, m := range nodes {
if err := checkNodeConsistency(m); err != nil {
t.Fatalf("i=%d, j=%d: %v", i, j, err)
}
}
}
want := `<html><head></head><body>0&lt;1<p id="A" foo="abc&#34;def">` +
`2<b empty="">3</b><i backslash="\">&amp;4</i></p>` +
`5<blockquote></blockquote><br/>6</body></html>`
b := new(bytes.Buffer)
if err := Render(b, nodes[0]); err != nil {
t.Fatal(err)
}
if got := b.String(); got != want {
t.Errorf("got vs want:\n%s\n%s\n", got, want)
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,28 @@
The *.dat files in this directory are copied from The WebKit Open Source
Project, specifically $WEBKITROOT/LayoutTests/html5lib/resources.
WebKit is licensed under a BSD style license.
http://webkit.org/coding/bsd-license.html says:
Copyright (C) 2009 Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS "AS IS" AND ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -0,0 +1,194 @@
#data
<a><p></a></p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <p>
| <a>
#data
<a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
#data
<a>1<button>2</a>3</button>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <button>
| <a>
| "2"
| "3"
#data
<a>1<b>2</a>3</b>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <b>
| "2"
| <b>
| "3"
#data
<a>1<div>2<div>3</a>4</div>5</div>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <div>
| <a>
| "2"
| <div>
| <a>
| "3"
| "4"
| "5"
#data
<table><a>1<p>2</a>3</p>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <p>
| <a>
| "2"
| "3"
| <table>
#data
<b><b><a><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <b>
| <a>
| <p>
| <a>
#data
<b><a><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <b>
| <a>
| <b>
| <b>
| <p>
| <a>
#data
<a><b><b><p></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <b>
| <b>
| <b>
| <b>
| <p>
| <a>
#data
<p>1<s id="A">2<b id="B">3</p>4</s>5</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| "1"
| <s>
| id="A"
| "2"
| <b>
| id="B"
| "3"
| <s>
| id="A"
| <b>
| id="B"
| "4"
| <b>
| id="B"
| "5"
#data
<table><a>1<td>2</td>3</table>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| "1"
| <a>
| "3"
| <table>
| <tbody>
| <tr>
| <td>
| "2"
#data
<table>A<td>B</td>C</table>
#errors
#document
| <html>
| <head>
| <body>
| "AC"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<a><svg><tr><input></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <svg svg>
| <svg tr>
| <svg input>

View file

@ -0,0 +1,31 @@
#data
<b>1<i>2<p>3</b>4
#errors
#document
| <html>
| <head>
| <body>
| <b>
| "1"
| <i>
| "2"
| <i>
| <p>
| <b>
| "3"
| "4"
#data
<a><div><style></style><address><a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| <div>
| <a>
| <style>
| <address>
| <a>
| <a>

View file

@ -0,0 +1,135 @@
#data
FOO<!-- BAR -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -->
| "BAZ"
#data
FOO<!-- BAR -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- >BAZ -->
#data
FOO<!-- BAR -- <QUX> -- MUX -->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX --!>BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -->
| "BAZ"
#data
FOO<!-- BAR -- <QUX> -- MUX -- >BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- BAR -- <QUX> -- MUX -- >BAZ -->
#data
FOO<!---->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!--->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
FOO<!-->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- -->
| "BAZ"
#data
<?xml version="1.0">Hi
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
| "Hi"
#data
<?xml version="1.0">
#errors
#document
| <!-- ?xml version="1.0" -->
| <html>
| <head>
| <body>
#data
<?xml version
#errors
#document
| <!-- ?xml version -->
| <html>
| <head>
| <body>
#data
FOO<!----->BAZ
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <!-- - -->
| "BAZ"

View file

@ -0,0 +1,370 @@
#data
<!DOCTYPE html>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!dOctYpE HtMl>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPEhtml>Hello
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE>Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE >Hello
#errors
#document
| <!DOCTYPE >
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato taco "ddd>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato sYstEM ggg>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM taco >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM 'taco"'>Hello
#errors
#document
| <!DOCTYPE potato "" "taco"">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "taco">Hello
#errors
#document
| <!DOCTYPE potato "" "taco">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEM "tai'co">Hello
#errors
#document
| <!DOCTYPE potato "" "tai'co">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato SYSTEMtaco "ddd">Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato grass SYSTEM taco>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIc >Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato pUbLIcgoof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC goof>Hello
#errors
#document
| <!DOCTYPE potato>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "go'of">Hello
#errors
#document
| <!DOCTYPE potato "go'of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go'of'>Hello
#errors
#document
| <!DOCTYPE potato "go" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC 'go:hh of' >Hello
#errors
#document
| <!DOCTYPE potato "go:hh of" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE potato PUBLIC "W3C-//dfdf" SYSTEM ggg>Hello
#errors
#document
| <!DOCTYPE potato "W3C-//dfdf" "">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">Hello
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE ...>Hello
#errors
#document
| <!DOCTYPE ...>
| <html>
| <head>
| <body>
| "Hello"
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Frameset//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD XHTML 1.0 Frameset//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE root-element [SYSTEM OR PUBLIC FPI] "uri" [
<!-- internal declarations -->
]>
#errors
#document
| <!DOCTYPE root-element>
| <html>
| <head>
| <body>
| "]>"
#data
<!DOCTYPE html PUBLIC
"-//WAPFORUM//DTD XHTML Mobile 1.0//EN"
"http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
#errors
#document
| <!DOCTYPE html "-//WAPFORUM//DTD XHTML Mobile 1.0//EN" "http://www.wapforum.org/DTD/xhtml-mobile10.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML SYSTEM "http://www.w3.org/DTD/HTML4-strict.dtd"><body><b>Mine!</b></body>
#errors
#document
| <!DOCTYPE html "" "http://www.w3.org/DTD/HTML4-strict.dtd">
| <html>
| <head>
| <body>
| <b>
| "Mine!"
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN""http://www.w3.org/TR/html4/strict.dtd">
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC"-//W3C//DTD HTML 4.01//EN"'http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>
#data
<!DOCTYPE HTML PUBLIC'-//W3C//DTD HTML 4.01//EN''http://www.w3.org/TR/html4/strict.dtd'>
#errors
#document
| <!DOCTYPE html "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
| <html>
| <head>
| <body>

View file

@ -0,0 +1,603 @@
#data
FOO&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gtBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>BAR"
#data
FOO&gt BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO> BAR"
#data
FOO&gt;;;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO>;;BAR"
#data
I'm &notit; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ¬it; I tell you"
#data
I'm &notin; I tell you
#errors
#document
| <html>
| <head>
| <body>
| "I'm ∉ I tell you"
#data
FOO& BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO& BAR"
#data
FOO&<BAR>
#errors
#document
| <html>
| <head>
| <body>
| "FOO&"
| <bar>
#data
FOO&&&&gt;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&&&>BAR"
#data
FOO&#41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#X41;BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOABAR"
#data
FOO&#BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#BAR"
#data
FOO&#ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#ZOO"
#data
FOO&#xBAR
#errors
#document
| <html>
| <head>
| <body>
| "FOOºR"
#data
FOO&#xZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#xZOO"
#data
FOO&#XZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO&#XZOO"
#data
FOO&#41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO)BAR"
#data
FOO&#x41BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO䆺R"
#data
FOO&#x41ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOAZOO"
#data
FOO&#x0000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#x0078;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOxZOO"
#data
FOO&#x0079;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOyZOO"
#data
FOO&#x0080;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO€ZOO"
#data
FOO&#x0081;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0082;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0083;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOƒZOO"
#data
FOO&#x0084;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO„ZOO"
#data
FOO&#x0085;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO…ZOO"
#data
FOO&#x0086;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO†ZOO"
#data
FOO&#x0087;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‡ZOO"
#data
FOO&#x0088;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOˆZOO"
#data
FOO&#x0089;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO‰ZOO"
#data
FOO&#x008A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŠZOO"
#data
FOO&#x008B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŒZOO"
#data
FOO&#x008D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x008E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŽZOO"
#data
FOO&#x008F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0090;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0091;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0092;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0093;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO“ZOO"
#data
FOO&#x0094;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO”ZOO"
#data
FOO&#x0095;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO•ZOO"
#data
FOO&#x0096;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x0097;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO—ZOO"
#data
FOO&#x0098;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO˜ZOO"
#data
FOO&#x0099;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO™ZOO"
#data
FOO&#x009A;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOšZOO"
#data
FOO&#x009B;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009C;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOœZOO"
#data
FOO&#x009D;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x009E;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOžZOO"
#data
FOO&#x009F;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOŸZOO"
#data
FOO&#x00A0;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO ZOO"
#data
FOO&#xD7FF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO퟿ZOO"
#data
FOO&#xD800;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xD801;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xDFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xE000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOOZOO"
#data
FOO&#x10FFFE;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿾ZOO"
#data
FOO&#x1087D4;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􈟔ZOO"
#data
FOO&#x10FFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO􏿿ZOO"
#data
FOO&#x110000;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"
#data
FOO&#xFFFFFF;ZOO
#errors
#document
| <html>
| <head>
| <body>
| "FOO<4F>ZOO"

View file

@ -0,0 +1,249 @@
#data
<div bar="ZZ&gt;YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>YY"
#data
<div bar="ZZ&"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar='ZZ&'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar=ZZ&></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&"
#data
<div bar="ZZ&gt=YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt=YY"
#data
<div bar="ZZ&gt0YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt0YY"
#data
<div bar="ZZ&gt9YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gt9YY"
#data
<div bar="ZZ&gtaYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtaYY"
#data
<div bar="ZZ&gtZYY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&gtZYY"
#data
<div bar="ZZ&gt YY"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ> YY"
#data
<div bar="ZZ&gt"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar='ZZ&gt'></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar=ZZ&gt></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ>"
#data
<div bar="ZZ&pound_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod_id=23"
#data
<div bar="ZZ&pound;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ£_id=23"
#data
<div bar="ZZ&prod;_id=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ∏_id=23"
#data
<div bar="ZZ&pound=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&pound=23"
#data
<div bar="ZZ&prod=23"></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| bar="ZZ&prod=23"
#data
<div>ZZ&pound_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod_id=23"
#data
<div>ZZ&pound;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£_id=23"
#data
<div>ZZ&prod;_id=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ∏_id=23"
#data
<div>ZZ&pound=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ£=23"
#data
<div>ZZ&prod=23</div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| "ZZ&prod=23"

View file

@ -0,0 +1,246 @@
#data
<div<div>
#errors
#document
| <html>
| <head>
| <body>
| <div<div>
#data
<div foo<bar=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo<bar=""
#data
<div foo=`bar`>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| foo="`bar`"
#data
<div \"foo=''>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| \"foo=""
#data
<a href='\nbar'></a>
#errors
#document
| <html>
| <head>
| <body>
| <a>
| href="\nbar"
#data
<!DOCTYPE html>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
#data
&lang;&rang;
#errors
#document
| <html>
| <head>
| <body>
| "⟨⟩"
#data
&apos;
#errors
#document
| <html>
| <head>
| <body>
| "'"
#data
&ImaginaryI;
#errors
#document
| <html>
| <head>
| <body>
| ""
#data
&Kopf;
#errors
#document
| <html>
| <head>
| <body>
| "𝕂"
#data
&notinva;
#errors
#document
| <html>
| <head>
| <body>
| "∉"
#data
<?import namespace="foo" implementation="#bar">
#errors
#document
| <!-- ?import namespace="foo" implementation="#bar" -->
| <html>
| <head>
| <body>
#data
<!--foo--bar-->
#errors
#document
| <!-- foo--bar -->
| <html>
| <head>
| <body>
#data
<![CDATA[x]]>
#errors
#document
| <!-- [CDATA[x]] -->
| <html>
| <head>
| <body>
#data
<textarea><!--</textarea>--></textarea>
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<textarea><!--</textarea>-->
#errors
#document
| <html>
| <head>
| <body>
| <textarea>
| "<!--"
| "-->"
#data
<style><!--</style>--></style>
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<style><!--</style>-->
#errors
#document
| <html>
| <head>
| <style>
| "<!--"
| <body>
| "-->"
#data
<ul><li>A </li> <li>B</li></ul>
#errors
#document
| <html>
| <head>
| <body>
| <ul>
| <li>
| "A "
| " "
| <li>
| "B"
#data
<table><form><input type=hidden><input></form><div></div></table>
#errors
#document
| <html>
| <head>
| <body>
| <input>
| <div>
| <table>
| <form>
| <input>
| type="hidden"
#data
<i>A<b>B<p></i>C</b>D
#errors
#document
| <html>
| <head>
| <body>
| <i>
| "A"
| <b>
| "B"
| <b>
| <p>
| <b>
| <i>
| "C"
| "D"
#data
<div></div>
#errors
#document
| <html>
| <head>
| <body>
| <div>
#data
<svg></svg>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<math></math>
#errors
#document
| <html>
| <head>
| <body>
| <math math>

View file

@ -0,0 +1,43 @@
#data
<button>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <button>
| "1"
#data
<foo>1<p>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <p>
| "2"
#data
<dd>1</foo>
#errors
#document
| <html>
| <head>
| <body>
| <dd>
| "1"
#data
<foo>1<dd>2</foo>
#errors
#document
| <html>
| <head>
| <body>
| <foo>
| "1"
| <dd>
| "2"

View file

@ -0,0 +1,40 @@
#data
<isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>
| <hr>
| <label>
| "This is a searchable index. Enter search keywords: "
| <input>
| name="isindex"
| <hr>
#data
<isindex name="A" action="B" prompt="C" foo="D">
#errors
#document
| <html>
| <head>
| <body>
| <form>
| action="B"
| <hr>
| <label>
| "C"
| <input>
| foo="D"
| name="isindex"
| <hr>
#data
<form><isindex>
#errors
#document
| <html>
| <head>
| <body>
| <form>

View file

@ -0,0 +1,52 @@
#data
<input type="hidden"><frameset>
#errors
21: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
31: “frameset” start tag seen.
31: End of file seen and there were open elements.
#document
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><table><caption><svg>foo</table>bar
#errors
47: End tag “table” did not match the name of the current open element (“svg”).
47: “table” closed but “caption” was still open.
47: End tag “table” seen, but there were open elements.
36: Unclosed element “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| "foo"
| "bar"
#data
<table><tr><td><svg><desc><td></desc><circle>
#errors
7: Start tag seen without seeing a doctype first. Expected “<!DOCTYPE html>”.
30: A table cell was implicitly closed, but there were open elements.
26: Unclosed element “desc”.
20: Unclosed element “svg”.
37: Stray end tag “desc”.
45: End of file seen and there were open elements.
45: Unclosed element “circle”.
7: Unclosed element “table”.
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>
| <circle>

View file

@ -0,0 +1,308 @@
#data
FOO<script>'Hello'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'Hello'"
| "BAR"
#data
FOO<script></script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script></script/ >BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script type="text/plain"></scriptx>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "</scriptx>BAR"
#data
FOO<script></script foo=">" dd>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "BAR"
#data
FOO<script>'<'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<'"
| "BAR"
#data
FOO<script>'<!'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!'"
| "BAR"
#data
FOO<script>'<!-'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-'"
| "BAR"
#data
FOO<script>'<!--'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!--'"
| "BAR"
#data
FOO<script>'<!---'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!---'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-->'"
| "BAR"
#data
FOO<script>'<!-- potato'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- potato'"
| "BAR"
#data
FOO<script>'<!-- <sCrIpt'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --'</script>BAR"
#data
FOO<script>'<!-- <sCrIpt> -->'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| "'<!-- <sCrIpt> -->'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> --!>'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> --!>'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt> -- >'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt> -- >'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt '</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt '</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt\'</script>BAR
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt\'"
| "BAR"
#data
FOO<script type="text/plain">'<!-- <sCrIpt/'</script>BAR</script>QUX
#errors
#document
| <html>
| <head>
| <body>
| "FOO"
| <script>
| type="text/plain"
| "'<!-- <sCrIpt/'</script>BAR"
| "QUX"

View file

@ -0,0 +1,15 @@
#data
<p><b id="A"><script>document.getElementById("A").id = "B"</script></p>TEXT</b>
#errors
#document
| <html>
| <head>
| <body>
| <p>
| <b>
| id="B"
| <script>
| "document.getElementById("A").id = "B""
| <b>
| id="A"
| "TEXT"

View file

@ -0,0 +1,28 @@
#data
1<script>document.write("2")</script>3
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("2")"
| "23"
#data
1<script>document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")</script>4
#errors
#document
| <html>
| <head>
| <body>
| "1"
| <script>
| "document.write("<script>document.write('2')</scr"+ "ipt><script>document.write('3')</scr" + "ipt>")"
| <script>
| "document.write('2')"
| "2"
| <script>
| "document.write('3')"
| "34"

View file

@ -0,0 +1,212 @@
#data
<table><th>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <th>
#data
<table><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><col foo='bar'>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <colgroup>
| <col>
| foo="bar"
#data
<table><colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| "foo"
| <table>
| <colgroup>
#data
<table></table><p>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <p>
| "foo"
#data
<table></body></caption></col></colgroup></html></tbody></td></tfoot></th></thead></tr><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
#data
<table><select><option>3</select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "3"
| <table>
#data
<table><select><table></table></select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
| <table>
#data
<table><select></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <table>
#data
<table><select><option>A<tr><td>B</td></tr></table>
#errors
#document
| <html>
| <head>
| <body>
| <select>
| <option>
| "A"
| <table>
| <tbody>
| <tr>
| <td>
| "B"
#data
<table><td></body></caption></col></colgroup></html>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td>A</table>B
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "A"
| "B"
#data
<table><tr><caption>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <caption>
#data
<table><tr></body></caption></col></colgroup></html></td></th><td>foo
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| "foo"
#data
<table><td><tr>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <tr>
#data
<table><td><button><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <button>
| <td>
#data
<table><tr><td><svg><desc><td>
#errors
#document
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg desc>
| <td>

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,799 @@
#data
<!DOCTYPE html><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><svg></svg><![CDATA[a]]>
#errors
29: Bogus comment
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <!-- [CDATA[a]] -->
#data
<!DOCTYPE html><body><svg></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
#data
<!DOCTYPE html><body><select><svg></svg></select>
#errors
35: Stray “svg” start tag.
42: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
#data
<!DOCTYPE html><body><select><option><svg></svg></option></select>
#errors
43: Stray “svg” start tag.
50: Stray end tag “svg”
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| <option>
#data
<!DOCTYPE html><body><table><svg></svg></table>
#errors
34: Start tag “svg” seen in “table”.
41: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
53: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <table>
#data
<!DOCTYPE html><body><table><svg><g>foo</g><g>bar</g></svg></table>
#errors
34: Start tag “svg” seen in “table”.
46: Stray end tag “g”.
58: Stray end tag “g”.
65: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
#data
<!DOCTYPE html><body><table><tbody><svg><g>foo</g><g>bar</g></svg></tbody></table>
#errors
41: Start tag “svg” seen in “table”.
53: Stray end tag “g”.
65: Stray end tag “g”.
72: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
#data
<!DOCTYPE html><body><table><tbody><tr><svg><g>foo</g><g>bar</g></svg></tr></tbody></table>
#errors
45: Start tag “svg” seen in “table”.
57: Stray end tag “g”.
69: Stray end tag “g”.
76: Stray end tag “svg”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <table>
| <tbody>
| <tr>
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg></td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
#data
<!DOCTYPE html><body><table><tbody><tr><td><svg><g>foo</g><g>bar</g></svg><p>baz</td></tr></tbody></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g></svg><p>baz</caption></table>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
70: HTML start tag “p” in a foreign namespace context.
81: “table” closed but “caption” was still open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><caption><svg><g>foo</g><g>bar</g>baz</table><p>quux
#errors
78: “table” closed but “caption” was still open.
78: Unclosed elements on stack.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <caption>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| "baz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><colgroup><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
44: Start tag “svg” seen in “table”.
56: Stray end tag “g”.
68: Stray end tag “g”.
71: HTML start tag “p” in a foreign namespace context.
71: Start tag “p” seen in “table”.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
| <table>
| <colgroup>
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><tr><td><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
50: Stray “svg” start tag.
54: Stray “g” start tag.
62: Stray end tag “g”
66: Stray “g” start tag.
74: Stray end tag “g”
77: Stray “p” start tag.
88: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <table>
| <tbody>
| <tr>
| <td>
| <select>
| "foobarbaz"
| <p>
| "quux"
#data
<!DOCTYPE html><body><table><select><svg><g>foo</g><g>bar</g><p>baz</table><p>quux
#errors
36: Start tag “select” seen in “table”.
42: Stray “svg” start tag.
46: Stray “g” start tag.
54: Stray end tag “g”
58: Stray “g” start tag.
66: Stray end tag “g”
69: Stray “p” start tag.
80: “table” end tag with “select” open.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <select>
| "foobarbaz"
| <table>
| <p>
| "quux"
#data
<!DOCTYPE html><body></body></html><svg><g>foo</g><g>bar</g><p>baz
#errors
41: Stray “svg” start tag.
68: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><body></body><svg><g>foo</g><g>bar</g><p>baz
#errors
34: Stray “svg” start tag.
61: HTML start tag “p” in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg g>
| "foo"
| <svg g>
| "bar"
| <p>
| "baz"
#data
<!DOCTYPE html><frameset><svg><g></g><g></g><p><span>
#errors
31: Stray “svg” start tag.
35: Stray “g” start tag.
40: Stray end tag “g”
44: Stray “g” start tag.
49: Stray end tag “g”
52: Stray “p” start tag.
58: Stray “span” start tag.
58: End of file seen and there were open elements.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><frameset></frameset><svg><g></g><g></g><p><span>
#errors
42: Stray “svg” start tag.
46: Stray “g” start tag.
51: Stray end tag “g”
55: Stray “g” start tag.
60: Stray end tag “g”
63: Stray “p” start tag.
69: Stray “span” start tag.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <frameset>
#data
<!DOCTYPE html><body xlink:href=foo><svg xlink:href=foo></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| <svg svg>
| xlink href="foo"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo></g></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo /></svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
#data
<!DOCTYPE html><body xlink:href=foo xml:lang=en><svg><g xml:lang=en xlink:href=foo />bar</svg>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| xlink:href="foo"
| xml:lang="en"
| <svg svg>
| <svg g>
| xlink href="foo"
| xml lang="en"
| "bar"
#data
<svg></path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
#data
<div><svg></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| "a"
#data
<div><svg><path></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| "a"
#data
<div><svg><path></svg><path>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <path>
#data
<div><svg><path><foreignObject><math></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <math math>
| "a"
#data
<div><svg><path><foreignObject><p></div>a
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| "a"
#data
<!DOCTYPE html><svg><desc><div><svg><ul>a
#errors
40: HTML start tag “ul” in a foreign namespace context.
41: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <div>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><svg><desc><svg><ul>a
#errors
35: HTML start tag “ul” in a foreign namespace context.
36: End of file in a foreign namespace context.
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <svg svg>
| <svg desc>
| <svg svg>
| <ul>
| "a"
#data
<!DOCTYPE html><p><svg><desc><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg desc>
| <p>
#data
<!DOCTYPE html><p><svg><title><p>
#errors
#document
| <!DOCTYPE html>
| <html>
| <head>
| <body>
| <p>
| <svg svg>
| <svg title>
| <p>
#data
<div><svg><path><foreignObject><p></foreignObject><p>
#errors
#document
| <html>
| <head>
| <body>
| <div>
| <svg svg>
| <svg path>
| <svg foreignObject>
| <p>
| <p>
#data
<math><mi><div><object><div><span></span></div></object></div></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <div>
| <object>
| <div>
| <span>
| <math mi>
#data
<math><mi><svg><foreignObject><div><div></div></div></foreignObject></svg></mi><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <svg svg>
| <svg foreignObject>
| <div>
| <div>
| <math mi>
#data
<svg><script></script><path>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <svg script>
| <svg path>
#data
<table><svg></svg><tr>
#errors
#document
| <html>
| <head>
| <body>
| <svg svg>
| <table>
| <tbody>
| <tr>
#data
<math><mi><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math mglyph>
#data
<math><mi><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mi>
| <math malignmark>
#data
<math><mo><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math mglyph>
#data
<math><mo><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mo>
| <math malignmark>
#data
<math><mn><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math mglyph>
#data
<math><mn><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mn>
| <math malignmark>
#data
<math><ms><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math mglyph>
#data
<math><ms><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math ms>
| <math malignmark>
#data
<math><mtext><mglyph>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math mglyph>
#data
<math><mtext><malignmark>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math mtext>
| <math malignmark>
#data
<math><annotation-xml><svg></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><div><math><mi></mi></math><span></span></div></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <div>
| <math math>
| <math mi>
| <span>
| <svg path>
| <math mi>
#data
<math><annotation-xml><svg><foreignObject><math><mi><svg></svg></mi><mo></mo></math><span></span></foreignObject><path></path></svg></annotation-xml><mi>
#errors
#document
| <html>
| <head>
| <body>
| <math math>
| <math annotation-xml>
| <svg svg>
| <svg foreignObject>
| <math math>
| <math mi>
| <svg svg>
| <math mo>
| <span>
| <svg path>
| <math mi>

Some files were not shown because too many files have changed in this diff Show more