src/pkg/html/parse.go - Issue 3460043: code review 3460043: html: handle unexpected EOF during parsing.

Keyboard Shortcuts

	File
u :	up to issue
m :	publish + mail comments
M :	edit review message
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line
<Enter> :	respond to / edit current comment
d :	mark current comment as done

	Issue
u :	up to list of issues
m :	publish + mail comments
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue
# :	close issue

	Comment/message editing
<Ctrl> + s or <Ctrl> + Enter :	save comment
<Esc> :	cancel edit

Unified Diff: src/pkg/html/parse.go

Issue 3460043: code review 3460043: html: handle unexpected EOF during parsing. (Closed)

Patch Set: code review 3460043: html: handle unexpected EOF during parsing. Created 14 years, 3 months ago

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/pkg/html/parse.go

===================================================================

--- a/src/pkg/html/parse.go

+++ b/src/pkg/html/parse.go

@@ -32,11 +32,6 @@

Attr []Attribute

}

-// An insertion mode (section 10.2.3.1) is the state transition function from

-// a particular state in the HTML5 parser's state machine. In addition to

-// returning the next state, it also returns whether the token was consumed.

-type insertionMode func(*parser) (insertionMode, bool)

// A parser implements the HTML5 parsing algorithm:

// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#tree-construction

type parser struct {

@@ -121,11 +116,12 @@

p.tok.Attr = nil

return nil

}

- if tokenType := p.tokenizer.Next(); tokenType == ErrorToken {

+ p.tokenizer.Next()

+ p.tok = p.tokenizer.Token()

+ switch p.tok.Type {

+ case ErrorToken:

return p.tokenizer.Error()

- }

- p.tok = p.tokenizer.Token()

- if p.tok.Type == SelfClosingTagToken {

+ case SelfClosingTagToken:

p.hasSelfClosingToken = true

p.tok.Type = StartTagToken

}

@@ -137,6 +133,13 @@

p.hasSelfClosingToken = false

}

+// An insertion mode (section 10.2.3.1) is the state transition function from

+// a particular state in the HTML5 parser's state machine. It updates the

+// parser's fields depending on parser.token (where ErrorToken means EOF). In

+// addition to returning the next insertionMode state, it also returns whether

+// the token was consumed.

+type insertionMode func(*parser) (insertionMode, bool)

// Section 10.2.5.4.

func initialInsertionMode(p *parser) (insertionMode, bool) {

// TODO(nigeltao): check p.tok for DOCTYPE.

@@ -151,6 +154,8 @@

implied bool

)

switch p.tok.Type {

+ case ErrorToken:

+ implied = true

case TextToken:

// TODO(nigeltao): distinguish whitespace text from others.

implied = true

@@ -162,7 +167,12 @@

implied = true

}

case EndTagToken:

- // TODO.

+ switch p.tok.Data {

+ case "head", "body", "html", "br":

+ implied = true

+ default:

+ // Ignore the token.

+ }

}

if add || implied {

p.addChild(&Node{

@@ -182,6 +192,8 @@

implied bool

)

switch p.tok.Type {

+ case ErrorToken:

+ implied = true

case TextToken:

// TODO(nigeltao): distinguish whitespace text from others.

implied = true

@@ -191,12 +203,17 @@

add = true

attr = p.tok.Attr

case "html":

- // TODO.

+ return inBodyInsertionMode, false

default:

implied = true

}

case EndTagToken:

- // TODO.

+ switch p.tok.Data {

+ case "head", "body", "html", "br":

+ implied = true

+ default:

+ // Ignore the token.

+ }

}

if add || implied {

p.addChild(&Node{

@@ -215,7 +232,7 @@

implied bool

)

switch p.tok.Type {

- case TextToken:

+ case ErrorToken, TextToken:

implied = true

case StartTagToken:

switch p.tok.Data {

@@ -251,7 +268,7 @@

implied bool

)

switch p.tok.Type {

- case TextToken:

+ case ErrorToken, TextToken:

implied = true

framesetOK = true

case StartTagToken:

@@ -290,6 +307,8 @@

func inBodyInsertionMode(p *parser) (insertionMode, bool) {

var endP bool

switch p.tok.Type {

+ case ErrorToken:

+ // No-op.

case TextToken:

p.addText(p.tok.Data)

p.framesetOK = false

@@ -363,6 +382,8 @@

// Section 10.2.5.22.

func afterBodyInsertionMode(p *parser) (insertionMode, bool) {

switch p.tok.Type {

+ case ErrorToken:

+ // TODO.

case TextToken:

// TODO.

case StartTagToken:

@@ -395,6 +416,7 @@

scripting: true,

framesetOK: true,

}

+ // Iterate until EOF. Any other error will cause an early return.

im, consumed := initialInsertionMode, true

for {

if consumed {

@@ -407,8 +429,11 @@

}

im, consumed = im(p)

}

- // TODO(nigeltao): clean up, depending on the value of im.

- // The specification's algorithm does clean up on reading an EOF 'token',

- // but in go we represent EOF by an os.Error instead.

+ // Loop until the final token (the ErrorToken signifying EOF) is consumed.

+ for {

+ if im, consumed = im(p); consumed {

+ break

+ }

return p.doc, nil

}

« no previous file with comments | « no previous file | src/pkg/html/parse_test.go » ('j') | no next file with comments »