Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(494)

Unified Diff: src/pkg/html/parse.go

Issue 3460043: code review 3460043: html: handle unexpected EOF during parsing. (Closed)
Patch Set: code review 3460043: html: handle unexpected EOF during parsing. Created 14 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | src/pkg/html/parse_test.go » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/pkg/html/parse.go
===================================================================
--- a/src/pkg/html/parse.go
+++ b/src/pkg/html/parse.go
@@ -32,11 +32,6 @@
Attr []Attribute
}
-// An insertion mode (section 10.2.3.1) is the state transition function from
-// a particular state in the HTML5 parser's state machine. In addition to
-// returning the next state, it also returns whether the token was consumed.
-type insertionMode func(*parser) (insertionMode, bool)
-
// A parser implements the HTML5 parsing algorithm:
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction
type parser struct {
@@ -121,11 +116,12 @@
p.tok.Attr = nil
return nil
}
- if tokenType := p.tokenizer.Next(); tokenType == ErrorToken {
+ p.tokenizer.Next()
+ p.tok = p.tokenizer.Token()
+ switch p.tok.Type {
+ case ErrorToken:
return p.tokenizer.Error()
- }
- p.tok = p.tokenizer.Token()
- if p.tok.Type == SelfClosingTagToken {
+ case SelfClosingTagToken:
p.hasSelfClosingToken = true
p.tok.Type = StartTagToken
}
@@ -137,6 +133,13 @@
p.hasSelfClosingToken = false
}
+// An insertion mode (section 10.2.3.1) is the state transition function from
+// a particular state in the HTML5 parser's state machine. It updates the
+// parser's fields depending on parser.token (where ErrorToken means EOF). In
+// addition to returning the next insertionMode state, it also returns whether
+// the token was consumed.
+type insertionMode func(*parser) (insertionMode, bool)
+
// Section 10.2.5.4.
func initialInsertionMode(p *parser) (insertionMode, bool) {
// TODO(nigeltao): check p.tok for DOCTYPE.
@@ -151,6 +154,8 @@
implied bool
)
switch p.tok.Type {
+ case ErrorToken:
+ implied = true
case TextToken:
// TODO(nigeltao): distinguish whitespace text from others.
implied = true
@@ -162,7 +167,12 @@
implied = true
}
case EndTagToken:
- // TODO.
+ switch p.tok.Data {
+ case "head", "body", "html", "br":
+ implied = true
+ default:
+ // Ignore the token.
+ }
}
if add || implied {
p.addChild(&Node{
@@ -182,6 +192,8 @@
implied bool
)
switch p.tok.Type {
+ case ErrorToken:
+ implied = true
case TextToken:
// TODO(nigeltao): distinguish whitespace text from others.
implied = true
@@ -191,12 +203,17 @@
add = true
attr = p.tok.Attr
case "html":
- // TODO.
+ return inBodyInsertionMode, false
default:
implied = true
}
case EndTagToken:
- // TODO.
+ switch p.tok.Data {
+ case "head", "body", "html", "br":
+ implied = true
+ default:
+ // Ignore the token.
+ }
}
if add || implied {
p.addChild(&Node{
@@ -215,7 +232,7 @@
implied bool
)
switch p.tok.Type {
- case TextToken:
+ case ErrorToken, TextToken:
implied = true
case StartTagToken:
switch p.tok.Data {
@@ -251,7 +268,7 @@
implied bool
)
switch p.tok.Type {
- case TextToken:
+ case ErrorToken, TextToken:
implied = true
framesetOK = true
case StartTagToken:
@@ -290,6 +307,8 @@
func inBodyInsertionMode(p *parser) (insertionMode, bool) {
var endP bool
switch p.tok.Type {
+ case ErrorToken:
+ // No-op.
case TextToken:
p.addText(p.tok.Data)
p.framesetOK = false
@@ -363,6 +382,8 @@
// Section 10.2.5.22.
func afterBodyInsertionMode(p *parser) (insertionMode, bool) {
switch p.tok.Type {
+ case ErrorToken:
+ // TODO.
case TextToken:
// TODO.
case StartTagToken:
@@ -395,6 +416,7 @@
scripting: true,
framesetOK: true,
}
+ // Iterate until EOF. Any other error will cause an early return.
im, consumed := initialInsertionMode, true
for {
if consumed {
@@ -407,8 +429,11 @@
}
im, consumed = im(p)
}
- // TODO(nigeltao): clean up, depending on the value of im.
- // The specification's algorithm does clean up on reading an EOF 'token',
- // but in go we represent EOF by an os.Error instead.
+ // Loop until the final token (the ErrorToken signifying EOF) is consumed.
+ for {
+ if im, consumed = im(p); consumed {
+ break
+ }
+ }
return p.doc, nil
}
« no previous file with comments | « no previous file | src/pkg/html/parse_test.go » ('j') | no next file with comments »

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b