src/pkg/encoding/binary/varint.go - Issue 5146048: code review 5146048: encoding/binary: support for varint encoding

Delta Between Two Patch Sets: src/pkg/encoding/binary/varint.go

Issue 5146048: code review 5146048: encoding/binary: support for varint encoding (Closed)

Left Patch Set: diff -r c8a7809fabcd https://go.googlecode.com/hg/ Created 13 years, 5 months ago

Right Patch Set: diff -r 7ae32f195afd https://go.googlecode.com/hg/ Created 13 years, 5 months ago

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments. Please Sign in to add in-line comments.

Jump to:

Left: Side by side diff | Download
Right: Side by side diff | Download

LEFT	RIGHT
1 // Copyright 2011 The Go Authors. All rights reserved.	1 // Copyright 2011 The Go Authors. All rights reserved.

2 // Use of this source code is governed by a BSD-style	2 // Use of this source code is governed by a BSD-style

3 // license that can be found in the LICENSE file.	3 // license that can be found in the LICENSE file.

4	4

5 // This file implements "varint" encoding of 64bit integers.	5 package binary

6 // The encoding is as follows:

7 // - unsigned integers are serialized 7bits at a time, starting with the lsb

8 // - the msb in each byte indicates if there is a continuation byte (msb = 1)

9 // - signed integers are mapped to unsigned integers using the "zig-zag"

10 // encoding (see PutInt)

11	6

	7 // This file implements "varint" encoding of 64-bit integers.

	8 // The encoding is:

	9 // - unsigned integers are serialized 7 bits at a time, starting with the

	10 // least significant bits

	11 // - the most significant bit (msb) in each output byte indicates if there

	12 // is a continuation byte (msb = 1)

	13 // - signed integers are mapped to unsigned integers using "zig-zag"

	14 // encoding: Positive values x are written as 2*x + 0, negative values

	15 // are written as 2*(^x) + 1; that is, negative numbers are complemented

	16 // and whether to complement is encoded in bit 0.

	17 //

12 // Design note:	18 // Design note:

13 // At most 10 bytes are needed for 64bit values. The encoding could be more	19 // At most 10 bytes are needed for 64-bit values. The encoding could

14 // dense: A full 64bit value needs an extra byte just to hold bit 63. Instead,	20 // be more dense: a full 64-bit value needs an extra byte just to hold bit 63.

15 // the msb of the previous byte could be used to hold bit 63 since we know	21 // Instead, the msb of the previous byte could be used to hold bit 63 since we

16 // there can't be more then 64 bits. This is a trivial improvement and would	22 // know there can't be more than 64 bits. This is a trivial improvement and

17 // reduce the maximum encoding length to 9 bytes. However, it breaks the	23 // would reduce the maximum encoding length to 9 bytes. However, it breaks the

18 // invariant that the msb is always the "continuation bit" and thus makes the	24 // invariant that the msb is always the "continuation bit" and thus makes the

19 // format incompatible with a varint encoding for larger numbers (say 128bit).	25 // format incompatible with a varint encoding for larger numbers (say 128-bit).

20

21 package binary

22	26

23 import (	27 import (

24 "io"	28 "io"

25 "os"	29 "os"

26 )	30 )

27	31

28 // PutUint encodes a uint64 into buf and returns the number of bytes written.	32 // MaxVarintLenN is the maximum length of a varint-encoded N-bit integer.

29 // If the buffer is to small, the result is 0.	33 const (

30 func PutUint(buf []byte, x uint64) int {	34 » MaxVarintLen16 = 3

31 » for i := range buf {	35 » MaxVarintLen32 = 5

32 » » if x < 128 {	36 » MaxVarintLen64 = 10

	37 )

	38

	39 // PutUvarint encodes a uint64 into buf and returns the number of bytes written.

	40 // If the buffer is too small, the result is the negated number of bytes require d

	41 // (that is, -PutUvarint(nil, x) is the number of bytes required to encode x).

	42 func PutUvarint(buf []byte, x uint64) int {

	43 » var i int

	44 » for i = range buf {

	45 » » if x < 0x80 {

33 buf[i] = byte(x)	46 buf[i] = byte(x)

34 return i + 1	47 return i + 1

35 }	48 }

36 » » buf[i] = byte(x) \| 128	49 » » buf[i] = byte(x) \| 0x80

37 x >>= 7	50 x >>= 7

38 }	51 }

39 » return 0 // error: buffer too small	52 » // buffer too small; compute number of bytes required

	53 » for x >= 0x4000 {

	54 » » x >>= 2 * 7

	55 » » i += 2

	56 » }

	57 » if x >= 0x80 {

	58 » » i++

	59 » }

	60 » return -(i + 1)

40 }	61 }

41	62

42 // Uint decodes a uint64 from buf and returns that value and the	63 // Uvarint decodes a uint64 from buf and returns that value and the

43 // number of bytes read. If the buffer is too small, the value is	64 // number of bytes read (> 0). If an error occurred, the value is 0

44 // undefined and the number of bytes is 0. At most 11 bytes are read.	65 // and the number of bytes n is <= 0 meaning:

45 func Uint(buf []byte) (uint64, int) {	66 //

	67 //» n == 0: buf too small

	68 //» n < 0: value larger than 64 bits (overflow)

	69 // and -n is the number of bytes read

	70 //

	71 func Uvarint(buf []byte) (uint64, int) {

46 var x uint64	72 var x uint64

47 var s uint	73 var s uint

48 for i, b := range buf {	74 for i, b := range buf {

49 » » if b < 128 \|\| i >= 10 {	75 » » if b < 0x80 {

	76 » » » if i > 9 \|\| i == 9 && b > 1 {

	77 » » » » return 0, -(i + 1) // overflow

	78 » » » }

50 return x \| uint64(b)<<s, i + 1	79 return x \| uint64(b)<<s, i + 1

51 }	80 }

52 » » x \|= uint64(b&127) << s	81 » » x \|= uint64(b&0x7f) << s

53 s += 7	82 s += 7

54 }	83 }

55 » return x, 0	84 » return 0, 0

56 }	85 }

57	86

58 // PutInt encodes an int64 into buf and returns the number of bytes written.	87 // PutVarint encodes an int64 into buf and returns the number of bytes written.

59 // If the buffer is to small, the result is 0.	88 // If the buffer is too small, the result is the negated number of bytes require d

60 func PutInt(buf []byte, x int64) int {	89 // (that is, -PutVarint(nil, x) is the number of bytes required to encode x).

	90 func PutVarint(buf []byte, x int64) int {

61 ux := uint64(x) << 1	91 ux := uint64(x) << 1

62 if x < 0 {	92 if x < 0 {

63 ux = ^ux	93 ux = ^ux

64 }	94 }

65 » return PutUint(buf, ux)	95 » return PutUvarint(buf, ux)

66 }	96 }

67	97

68 // Int decodes an int64 from buf and returns that value and the	98 // Varint decodes an int64 from buf and returns that value and the

69 // number of bytes read. If the buffer is too small, the value is	99 // number of bytes read (> 0). If an error occurred, the value is 0

70 // undefined and the number of bytes is 0. At most 11 bytes are read.	100 // and the number of bytes n is <= 0 with the following meaning:

71 func Int(buf []byte) (int64, int) {	101 //

72 » ux, n := Uint(buf) // ok to continue in presence of error	102 //» n == 0: buf too small

	103 //» n < 0: value larger than 64 bits (overflow)

	104 // and -n is the number of bytes read

	105 //

	106 func Varint(buf []byte) (int64, int) {

	107 » ux, n := Uvarint(buf) // ok to continue in presence of error

73 x := int64(ux >> 1)	108 x := int64(ux >> 1)

74 if ux&1 != 0 {	109 if ux&1 != 0 {

75 x = ^x	110 x = ^x

76 }	111 }

77 return x, n	112 return x, n

78 }	113 }

79	114

80 // WriteUint writes a uint64 to w.	115 // WriteUvarint encodes x and writes the result to w.

81 func WriteUint(w io.Writer, x uint64) os.Error {	116 func WriteUvarint(w io.Writer, x uint64) os.Error {

82 » var buf [10]byte	117 » var buf [MaxVarintLen64]byte

83 » n := PutUint(buf[:], x) // won't fail	118 » n := PutUvarint(buf[:], x) // won't fail

84 _, err := w.Write(buf[0:n])	119 _, err := w.Write(buf[0:n])

85 return err	120 return err

86 }	121 }

87	122

88 var overflow = os.NewError("overflow: not a 64bit integer")	123 var overflow = os.NewError("binary: varint overflows a 64-bit integer")

89	124

90 // ReadUint reads a uint64 from r.	125 // ReadUvarint reads an encoded unsigned integer from r and returns it as a uint 64.

91 func ReadUint(r io.ByteReader) (uint64, os.Error) {	126 func ReadUvarint(r io.ByteReader) (uint64, os.Error) {

92 var x uint64	127 var x uint64

93 var s uint	128 var s uint

94 » for i := 0; i < 10; i++ {	129 » for i := 0; ; i++ {

95 b, err := r.ReadByte()	130 b, err := r.ReadByte()

96 » » if b < 128 \|\| err != nil {	131 » » if err != nil {

97 » » » return x \| uint64(b)<<s, err	132 » » » return x, err

98 }	133 }

99 » » x \|= uint64(b&127) << s	134 » » if b < 0x80 {

	135 » » » if i > 9 \|\| i == 9 && b > 1 {

	136 » » » » return x, overflow

	137 » » » }

	138 » » » return x \| uint64(b)<<s, nil

	139 » » }

	140 » » x \|= uint64(b&0x7f) << s

100 s += 7	141 s += 7

101 }	142 }

102 » return x, overflow	143 » panic("unreachable")

103 }	144 }

104	145

105 // WriteInt writes a uint64 to w.	146 // WriteVarint encodes x and writes the result to w.

106 func WriteInt(w io.Writer, x int64) os.Error {	147 func WriteVarint(w io.Writer, x int64) os.Error {

107 ux := uint64(x) << 1	148 ux := uint64(x) << 1

108 if x < 0 {	149 if x < 0 {

109 ux = ^ux	150 ux = ^ux

110 }	151 }

111 » return WriteUint(w, ux)	152 » return WriteUvarint(w, ux)

112 }	153 }

113	154

114 // ReadInt reads a uint64 from r.	155 // ReadVarint reads an encoded unsigned integer from r and returns it as a uint6 4.

115 func ReadInt(r io.ByteReader) (int64, os.Error) {	156 func ReadVarint(r io.ByteReader) (int64, os.Error) {

116 » ux, err := ReadUint(r) // ok to continue in presence of error	157 » ux, err := ReadUvarint(r) // ok to continue in presence of error

117 x := int64(ux >> 1)	158 x := int64(ux >> 1)

118 if ux&1 != 0 {	159 if ux&1 != 0 {

119 x = ^x	160 x = ^x

120 }	161 }

121 return x, err	162 return x, err

122 }	163 }

LEFT	RIGHT