LEFT | RIGHT |
1 /* | 1 /* |
2 * Copyright (c) 2010 Google Inc. | 2 * Copyright (c) 2010 Google Inc. |
3 * | 3 * |
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except | 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except |
5 * in compliance with the License. You may obtain a copy of the License at | 5 * in compliance with the License. You may obtain a copy of the License at |
6 * | 6 * |
7 * http://www.apache.org/licenses/LICENSE-2.0 | 7 * http://www.apache.org/licenses/LICENSE-2.0 |
8 * | 8 * |
9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License | 9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License |
10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express | 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express |
11 * or implied. See the License for the specific language governing permissions a
nd limitations under | 11 * or implied. See the License for the specific language governing permissions a
nd limitations under |
12 * the License. | 12 * the License. |
13 */ | 13 */ |
14 | 14 |
15 package com.google.api.client.util.escape; | 15 package com.google.api.client.util.escape; |
16 | 16 |
17 /** | 17 /** |
18 * A {@code UnicodeEscaper} that escapes some set of Java characters using the U
RI percent encoding | 18 * A {@code UnicodeEscaper} that escapes some set of Java characters using the U
RI percent encoding |
19 * scheme. The set of safe characters (those which remain unescaped) can be spec
ified on | 19 * scheme. The set of safe characters (those which remain unescaped) can be spec
ified on |
20 * construction. | 20 * construction. |
21 * | 21 * |
22 * <p> | 22 * <p> |
23 * For details on escaping URIs for use in web pages, see section 2.4 of <a | 23 * For details on escaping URIs for use in web pages, see <a |
24 * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>. | 24 * href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - section 2.4<
/a> and <a |
| 25 * href="http://tools.ietf.org/html/rfc3986#appendix-A">RFC 3986 - appendix A</a
> |
25 * | 26 * |
26 * <p> | 27 * <p> |
27 * When encoding a String, the following rules apply: | 28 * When encoding a String, the following rules apply: |
28 * <ul> | 29 * <ul> |
29 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" thro
ugh "9" remain the | 30 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" thro
ugh "9" remain the |
30 * same. | 31 * same. |
31 * <li>Any additionally specified safe characters remain the same. | 32 * <li>Any additionally specified safe characters remain the same. |
32 * <li>If {@code plusForSpace} was specified, the space character " " is convert
ed into a plus sign | 33 * <li>If {@code plusForSpace} was specified, the space character " " is convert
ed into a plus sign |
33 * "+". | 34 * "+". |
34 * <li>All other characters are converted into one or more bytes using UTF-8 enc
oding and each byte | 35 * <li>All other characters are converted into one or more bytes using UTF-8 enc
oding and each byte |
35 * is then represented by the 3-character string "%XY", where "XY" is the two-di
git, uppercase, | 36 * is then represented by the 3-character string "%XY", where "XY" is the two-di
git, uppercase, |
36 * hexadecimal representation of the byte value. | 37 * hexadecimal representation of the byte value. |
37 * </ul> | 38 * </ul> |
38 * | 39 * |
39 * <p> | 40 * <p> |
40 * RFC 2396 specifies the set of unreserved characters as "-", "_", ".", "!", "~
", "*", "'", "(" and | 41 * RFC 2396 specifies the set of unreserved characters as "-", "_", ".", "!", "~
", "*", "'", "(" and |
41 * ")". It goes on to state: | 42 * ")". It goes on to state: |
42 * | 43 * |
43 * <p> | 44 * <p> |
44 * <i>Unreserved characters can be escaped without changing the semantics of the
URI, but this | 45 * <i>Unreserved characters can be escaped without changing the semantics of the
URI, but this |
45 * should not be done unless the URI is being used in a context that does not al
low the unescaped | 46 * should not be done unless the URI is being used in a context that does not al
low the unescaped |
46 * character to appear.</i> | 47 * character to appear.</i> |
47 * | 48 * |
48 * <p> | 49 * <p> |
49 * For performance reasons the only currently supported character encoding of th
is class is UTF-8. | 50 * For performance reasons the only currently supported character encoding of th
is class is UTF-8. |
50 * | 51 * |
51 * <p> | 52 * <p> |
52 * <b>Note</b>: This escaper produces uppercase hexadecimal sequences. From <a | 53 * <b>Note</b>: This escaper produces uppercase hexadecimal sequences. From <a |
53 * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>:<br> <i>"URI producer
s and normalizers | 54 * href="http://tools.ietf.org/html/rfc3986">RFC 3986</a>:<br> |
54 * should use uppercase hexadecimal digits for all percent-encodings."</i> | 55 * <i>"URI producers and normalizers should use uppercase hexadecimal digits for
all |
| 56 * percent-encodings."</i> |
55 * | 57 * |
56 * @since 1.0 | 58 * @since 1.0 |
57 */ | 59 */ |
58 public class PercentEscaper extends UnicodeEscaper { | 60 public class PercentEscaper extends UnicodeEscaper { |
59 /** | 61 /** |
60 * A string of safe characters that mimics the behavior of {@link java.net.URL
Encoder}. | 62 * A string of safe characters that mimics the behavior of {@link java.net.URL
Encoder}. |
61 */ | 63 */ |
62 public static final String SAFECHARS_URLENCODER = "-_.*"; | 64 public static final String SAFECHARS_URLENCODER = "-_.*"; |
63 | 65 |
64 /** | 66 /** |
(...skipping 195 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
260 dest[4] = UPPER_HEX_DIGITS[0x8 | cp & 0x3]; | 262 dest[4] = UPPER_HEX_DIGITS[0x8 | cp & 0x3]; |
261 cp >>>= 2; | 263 cp >>>= 2; |
262 dest[2] = UPPER_HEX_DIGITS[cp & 0x7]; | 264 dest[2] = UPPER_HEX_DIGITS[cp & 0x7]; |
263 return dest; | 265 return dest; |
264 } else { | 266 } else { |
265 // If this ever happens it is due to bug in UnicodeEscaper, not bad input. | 267 // If this ever happens it is due to bug in UnicodeEscaper, not bad input. |
266 throw new IllegalArgumentException("Invalid unicode character value " + cp
); | 268 throw new IllegalArgumentException("Invalid unicode character value " + cp
); |
267 } | 269 } |
268 } | 270 } |
269 } | 271 } |
LEFT | RIGHT |