Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(322)

Delta Between Two Patch Sets: src/pkg/net/textproto/reader.go

Issue 6721055: code review 6721055: net/textproto: faster header canonicalization with fewe... (Closed)
Left Patch Set: diff -r 25dcee3d220c https://code.google.com/p/go Created 11 years, 4 months ago
Right Patch Set: diff -r 42c8d3aadc40 https://code.google.com/p/go Created 11 years, 4 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | src/pkg/net/textproto/reader_test.go » ('j') | src/pkg/net/textproto/reader_test.go » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2010 The Go Authors. All rights reserved. 1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package textproto 5 package textproto
6 6
7 import ( 7 import (
8 "bufio" 8 "bufio"
9 "bytes" 9 "bytes"
10 "io" 10 "io"
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
479 } 479 }
480 } 480 }
481 panic("unreachable") 481 panic("unreachable")
482 } 482 }
483 483
484 // CanonicalMIMEHeaderKey returns the canonical format of the 484 // CanonicalMIMEHeaderKey returns the canonical format of the
485 // MIME header key s. The canonicalization converts the first 485 // MIME header key s. The canonicalization converts the first
486 // letter and any letter following a hyphen to upper case; 486 // letter and any letter following a hyphen to upper case;
487 // the rest are converted to lowercase. For example, the 487 // the rest are converted to lowercase. For example, the
488 // canonical key for "accept-encoding" is "Accept-Encoding". 488 // canonical key for "accept-encoding" is "Accept-Encoding".
489 // MIME headers should be ASCII only; Unicode runes are passed through
bradfitz 2012/11/09 09:15:54 were these docs requested earlier? If not, I'd ju
rsc 2012/11/12 20:40:15 // MIME header keys are assumed to be ASCII only.
490 // unchanged, if they are present.
489 func CanonicalMIMEHeaderKey(s string) string { 491 func CanonicalMIMEHeaderKey(s string) string {
490 // Quick check for canonical encoding. 492 // Quick check for canonical encoding.
491 upper := true 493 upper := true
492 for i := 0; i < len(s); i++ { 494 for i := 0; i < len(s); i++ {
493 c := s[i] 495 c := s[i]
494 if upper && 'a' <= c && c <= 'z' { 496 if upper && 'a' <= c && c <= 'z' {
495 return canonicalMIMEHeaderKey([]byte(s)) 497 return canonicalMIMEHeaderKey([]byte(s))
496 } 498 }
497 if !upper && 'A' <= c && c <= 'Z' { 499 if !upper && 'A' <= c && c <= 'Z' {
498 return canonicalMIMEHeaderKey([]byte(s)) 500 return canonicalMIMEHeaderKey([]byte(s))
499 } 501 }
500 upper = c == '-' 502 upper = c == '-'
501 } 503 }
502 return s 504 return s
503 } 505 }
504 506
505 const toUpper = 'A' - 'a'
506 const toLower = 'a' - 'A' 507 const toLower = 'a' - 'A'
507 508
508 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 509 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
509 // allowed to mutate the provided byte slice before returning the 510 // allowed to mutate the provided byte slice before returning the
510 // string. 511 // string.
511 func canonicalMIMEHeaderKey(a []byte) string { 512 func canonicalMIMEHeaderKey(a []byte) string {
512 // Look for it in commonHeaders , so that we can avoid an 513 // Look for it in commonHeaders , so that we can avoid an
513 // allocation by sharing the strings among all users 514 // allocation by sharing the strings among all users
514 // of textproto. If we don't find it, a has been canonicalized 515 // of textproto. If we don't find it, a has been canonicalized
515 // so just return string(a). 516 // so just return string(a).
516 upper := true 517 upper := true
517 lo := 0 518 lo := 0
518 hi := len(commonHeaders) 519 hi := len(commonHeaders)
519 for i := 0; i < len(a); i++ { 520 for i := 0; i < len(a); i++ {
520 c := int(a[i])
521 // Canonicalize: first letter upper case 521 // Canonicalize: first letter upper case
522 // and upper case after each dash. 522 // and upper case after each dash.
523 // (Host, User-Agent, If-Modified-Since). 523 // (Host, User-Agent, If-Modified-Since).
524 // MIME headers are ASCII only, so no Unicode issues. 524 // MIME headers are ASCII only, so no Unicode issues.
525 if a[i] == ' ' { 525 if a[i] == ' ' {
526 a[i] = '-' 526 a[i] = '-'
527 upper = true 527 upper = true
528 continue 528 continue
529 } 529 }
530 c := a[i]
530 if upper && 'a' <= c && c <= 'z' { 531 if upper && 'a' <= c && c <= 'z' {
531 » » » c += toUpper 532 » » » c -= toLower
532 } else if !upper && 'A' <= c && c <= 'Z' { 533 } else if !upper && 'A' <= c && c <= 'Z' {
533 c += toLower 534 c += toLower
534 } 535 }
535 » » a[i] = byte(c) 536 » » a[i] = c
536 upper = c == '-' // for next time 537 upper = c == '-' // for next time
537 538
538 if lo < hi { 539 if lo < hi {
539 » » » for lo < hi && (len(commonHeaders[lo]) <= i || int(commo nHeaders[lo][i]) < c) { 540 » » » for lo < hi && (len(commonHeaders[lo]) <= i || commonHea ders[lo][i] < c) {
540 lo++ 541 lo++
541 } 542 }
542 » » » for hi > lo && int(commonHeaders[hi-1][i]) > c { 543 » » » for hi > lo && commonHeaders[hi-1][i] > c {
543 hi-- 544 hi--
544 } 545 }
545 } 546 }
546 } 547 }
547 if lo < hi && len(commonHeaders[lo]) == len(a) { 548 if lo < hi && len(commonHeaders[lo]) == len(a) {
548 return commonHeaders[lo] 549 return commonHeaders[lo]
549 } 550 }
550 return string(a) 551 return string(a)
551 } 552 }
552 553
553 var commonHeaders = []string{ 554 var commonHeaders = []string{
554 "Accept", 555 "Accept",
555 "Accept-Charset", 556 "Accept-Charset",
556 "Accept-Encoding", 557 "Accept-Encoding",
557 "Accept-Language", 558 "Accept-Language",
558 "Accept-Ranges", 559 "Accept-Ranges",
559 "Cache-Control", 560 "Cache-Control",
561 "Cc",
560 "Connection", 562 "Connection",
563 "Content-Id",
564 "Content-Language",
561 "Content-Length", 565 "Content-Length",
566 "Content-Transfer-Encoding",
562 "Content-Type", 567 "Content-Type",
563 "Date", 568 "Date",
569 "Dkim-Signature",
564 "Etag", 570 "Etag",
565 "Expires", 571 "Expires",
572 "From",
566 "Host", 573 "Host",
574 "If-Modified-Since",
575 "If-None-Match",
576 "In-Reply-To",
567 "Last-Modified", 577 "Last-Modified",
568 "Location", 578 "Location",
579 "Message-Id",
580 "Mime-Version",
569 "Pragma", 581 "Pragma",
582 "Received",
583 "Return-Path",
570 "Server", 584 "Server",
571 "Set-Cookie", 585 "Set-Cookie",
586 "Subject",
587 "To",
572 "User-Agent", 588 "User-Agent",
573 "Via", 589 "Via",
574 "X-Forwarded-For", 590 "X-Forwarded-For",
575 "X-Imforwards", 591 "X-Imforwards",
576 "X-Powered-By", 592 "X-Powered-By",
577 } 593 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b