Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code | Sign in
(416)

Delta Between Two Patch Sets: src/pkg/net/textproto/reader.go

Issue 6721055: code review 6721055: net/textproto: faster header canonicalization with fewe... (Closed)
Left Patch Set: diff -r 3d637cc9dff0 https://code.google.com/p/go Created 11 years, 5 months ago
Right Patch Set: diff -r 42c8d3aadc40 https://code.google.com/p/go Created 11 years, 4 months ago
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « no previous file | src/pkg/net/textproto/reader_test.go » ('j') | src/pkg/net/textproto/reader_test.go » ('J')
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 // Copyright 2010 The Go Authors. All rights reserved. 1 // Copyright 2010 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style 2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file. 3 // license that can be found in the LICENSE file.
4 4
5 package textproto 5 package textproto
6 6
7 import ( 7 import (
8 "bufio" 8 "bufio"
9 "bytes" 9 "bytes"
10 "io" 10 "io"
(...skipping 468 matching lines...) Expand 10 before | Expand all | Expand 10 after
479 } 479 }
480 } 480 }
481 panic("unreachable") 481 panic("unreachable")
482 } 482 }
483 483
484 // CanonicalMIMEHeaderKey returns the canonical format of the 484 // CanonicalMIMEHeaderKey returns the canonical format of the
485 // MIME header key s. The canonicalization converts the first 485 // MIME header key s. The canonicalization converts the first
486 // letter and any letter following a hyphen to upper case; 486 // letter and any letter following a hyphen to upper case;
487 // the rest are converted to lowercase. For example, the 487 // the rest are converted to lowercase. For example, the
488 // canonical key for "accept-encoding" is "Accept-Encoding". 488 // canonical key for "accept-encoding" is "Accept-Encoding".
489 // MIME headers should be ASCII only; Unicode runes are passed through
bradfitz 2012/11/09 09:15:54 were these docs requested earlier? If not, I'd ju
rsc 2012/11/12 20:40:15 // MIME header keys are assumed to be ASCII only.
490 // unchanged, if they are present.
489 func CanonicalMIMEHeaderKey(s string) string { 491 func CanonicalMIMEHeaderKey(s string) string {
490 // Quick check for canonical encoding. 492 // Quick check for canonical encoding.
491 upper := true 493 upper := true
492 for i := 0; i < len(s); i++ { 494 for i := 0; i < len(s); i++ {
493 c := s[i] 495 c := s[i]
494 if upper && 'a' <= c && c <= 'z' { 496 if upper && 'a' <= c && c <= 'z' {
495 return canonicalMIMEHeaderKey([]byte(s)) 497 return canonicalMIMEHeaderKey([]byte(s))
496 } 498 }
497 if !upper && 'A' <= c && c <= 'Z' { 499 if !upper && 'A' <= c && c <= 'Z' {
498 return canonicalMIMEHeaderKey([]byte(s)) 500 return canonicalMIMEHeaderKey([]byte(s))
499 } 501 }
500 upper = c == '-' 502 upper = c == '-'
501 } 503 }
502 return s 504 return s
503 } 505 }
504 506
507 const toLower = 'a' - 'A'
508
505 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is 509 // canonicalMIMEHeaderKey is like CanonicalMIMEHeaderKey but is
506 // allowed to mutate the provided byte slice before returning the 510 // allowed to mutate the provided byte slice before returning the
507 // string. 511 // string.
508 func canonicalMIMEHeaderKey(a []byte) string { 512 func canonicalMIMEHeaderKey(a []byte) string {
509 » // We observe that most headers are transmitted on the net 513 » // Look for it in commonHeaders , so that we can avoid an
510 » // in canonical form. Also, we'd like to use string interning 514 » // allocation by sharing the strings among all users
511 » // on the keys, instead of copying the []byte into a string. 515 » // of textproto. If we don't find it, a has been canonicalized
512 » // So try this first, before doing the proper canonicalization. 516 » // so just return string(a).
513 » b := headerKey{}
514 » copy(b[:], a)
515 » if s, ok := headerPool[b]; ok {
516 » » return s
517 » }
518
519 » // Canonicalize: first letter upper case
520 » // and upper case after each dash.
521 » // (Host, User-Agent, If-Modified-Since).
522 » // MIME headers are ASCII only, so no Unicode issues.
523 upper := true 517 upper := true
524 » for i, v := range a { 518 » lo := 0
525 » » if v == ' ' { 519 » hi := len(commonHeaders)
520 » for i := 0; i < len(a); i++ {
521 » » // Canonicalize: first letter upper case
522 » » // and upper case after each dash.
523 » » // (Host, User-Agent, If-Modified-Since).
524 » » // MIME headers are ASCII only, so no Unicode issues.
525 » » if a[i] == ' ' {
526 a[i] = '-' 526 a[i] = '-'
527 upper = true 527 upper = true
528 continue 528 continue
529 } 529 }
530 » » if upper && 'a' <= v && v <= 'z' { 530 » » c := a[i]
531 » » » a[i] = v + 'A' - 'a' 531 » » if upper && 'a' <= c && c <= 'z' {
532 » » } 532 » » » c -= toLower
533 » » if !upper && 'A' <= v && v <= 'Z' { 533 » » } else if !upper && 'A' <= c && c <= 'Z' {
534 » » » a[i] = v + 'a' - 'A' 534 » » » c += toLower
535 » » } 535 » » }
536 » » upper = v == '-' 536 » » a[i] = c
537 » » upper = c == '-' // for next time
538
539 » » if lo < hi {
540 » » » for lo < hi && (len(commonHeaders[lo]) <= i || commonHea ders[lo][i] < c) {
541 » » » » lo++
542 » » » }
543 » » » for hi > lo && commonHeaders[hi-1][i] > c {
544 » » » » hi--
545 » » » }
546 » » }
547 » }
548 » if lo < hi && len(commonHeaders[lo]) == len(a) {
549 » » return commonHeaders[lo]
537 } 550 }
538 return string(a) 551 return string(a)
539 } 552 }
540 553
541 // 16 bytes is two 64-bit words, making the copy as efficient as possible 554 var commonHeaders = []string{
542 // (at the cost that some longer headers can't be interned)
543 const headerKeyLen = 16
544
545 // A headerKey is a fixed-length byte array because keys of
546 // maps cannot be slices. The requested header is copied into
547 // a headerKey before being looked up in the headerPool.
548 type headerKey [headerKeyLen]byte
549
550 // A pool of common header strings to avoid allocations for
551 // the common headers.
552 var headerPool map[headerKey]string
553
554 var commonHeaders = [...]string{
555 » // from https://en.wikipedia.org/wiki/List_of_HTTP_headers
556 » // with ones that are too long removed
557 "Accept", 555 "Accept",
558 "Accept-Charset", 556 "Accept-Charset",
559 "Accept-Encoding", 557 "Accept-Encoding",
560 "Accept-Language", 558 "Accept-Language",
561 » "Accept-Datetime", 559 » "Accept-Ranges",
562 » "Authorization",
563 "Cache-Control", 560 "Cache-Control",
561 "Cc",
564 "Connection", 562 "Connection",
565 » "Cookie", 563 » "Content-Id",
564 » "Content-Language",
566 "Content-Length", 565 "Content-Length",
567 » "Content-MD5", 566 » "Content-Transfer-Encoding",
568 "Content-Type", 567 "Content-Type",
569 "Date", 568 "Date",
570 » "Expect", 569 » "Dkim-Signature",
570 » "Etag",
571 » "Expires",
571 "From", 572 "From",
572 "Host", 573 "Host",
573 » "If-Match", 574 » "If-Modified-Since",
574 "If-None-Match", 575 "If-None-Match",
575 » "If-Range", 576 » "In-Reply-To",
576 » "Max-Forwards", 577 » "Last-Modified",
578 » "Location",
579 » "Message-Id",
580 » "Mime-Version",
577 "Pragma", 581 "Pragma",
578 » "Range", 582 » "Received",
579 » "Referer", 583 » "Return-Path",
580 » "TE", 584 » "Server",
581 » "Upgrade", 585 » "Set-Cookie",
586 » "Subject",
587 » "To",
582 "User-Agent", 588 "User-Agent",
583 "Via", 589 "Via",
584 "Warning",
585 "X-Requested-With",
586 "DNT",
587 "X-Forwarded-For", 590 "X-Forwarded-For",
588 » "Front-End-Https", 591 » "X-Imforwards",
589 » "X-ATT-DeviceId",
590 » "X-Wap-Profile",
591 » "Proxy-Connection",
592 » "Accept-Ranges",
593 » "Age",
594 » "Allow",
595 » "Content-Encoding",
596 » "Content-Language",
597 » "Content-Location",
598 » "Content-Range",
599 » "ETag",
600 » "Expires",
601 » "Last-Modified",
602 » "Link",
603 » "Location",
604 » "P3P",
605 » "Refresh",
606 » "Retry-After",
607 » "Server",
608 » "Set-Cookie",
609 » "Trailer",
610 » "Vary",
611 » "WWW-Authenticate",
612 » "X-Frame-Options",
613 » "X-XSS-Protection",
614 "X-Powered-By", 592 "X-Powered-By",
615 » "X-UA-Compatible", 593 }
616 » // from https://en.wikipedia.org/wiki/Internet_Message_Format
617 » "Message-Id", "In-Reply-To", "To", "Subject", "Bcc", "Cc",
618 » "Precedence", "References", "Reply-To", "Sender", "Archived-At",
619 » "Received", "Return-Path", "Received-SPF", "Auto-Submitted",
620 » "VBR-Info",
621 » // from observation on real mail
622 » "Delivered-To", "X-Forwarded-To", "DKIM-Signature",
623 » "MIME-Version",
624 }
625
626 func init() {
627 » headerPool = make(map[headerKey]string, len(commonHeaders))
628 » for _, h := range commonHeaders {
629 » » if len(h) > headerKeyLen {
630 » » » panic("Header too long.")
631 » » }
632 » » h = CanonicalMIMEHeaderKey(h)
633 » » hk := headerKey{}
634 » » copy(hk[:], h)
635 » » headerPool[hk] = h
636 » }
637 }
LEFTRIGHT

Powered by Google App Engine
RSS Feeds Recent Issues | This issue
This is Rietveld f62528b