LEFT | RIGHT |
(no file at all) | |
1 /* | 1 /* |
2 * Copyright (c) 2010 Google Inc. | 2 * Copyright (c) 2010 Google Inc. |
3 * | 3 * |
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except | 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except |
5 * in compliance with the License. You may obtain a copy of the License at | 5 * in compliance with the License. You may obtain a copy of the License at |
6 * | 6 * |
7 * http://www.apache.org/licenses/LICENSE-2.0 | 7 * http://www.apache.org/licenses/LICENSE-2.0 |
8 * | 8 * |
9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License | 9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License |
10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express | 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express |
11 * or implied. See the License for the specific language governing permissions a
nd limitations under | 11 * or implied. See the License for the specific language governing permissions a
nd limitations under |
12 * the License. | 12 * the License. |
13 */ | 13 */ |
14 | 14 |
15 package com.google.api.client.util.escape; | 15 package com.google.api.client.util.escape; |
16 | 16 |
17 import java.io.UnsupportedEncodingException; | 17 import java.io.UnsupportedEncodingException; |
18 import java.net.URLDecoder; | 18 import java.net.URLDecoder; |
19 | 19 |
20 /** | 20 /** |
21 * Utility functions for dealing with {@code CharEscaper}s, and some commonly us
ed {@code | 21 * Utility functions for dealing with {@code CharEscaper}s, and some commonly us
ed |
22 * CharEscaper} instances. | 22 * {@code CharEscaper} instances. |
23 * | 23 * |
24 * @since 1.0 | 24 * @since 1.0 |
25 */ | 25 */ |
26 public final class CharEscapers { | 26 public final class CharEscapers { |
27 | 27 |
28 private static final Escaper URI_ESCAPER = | 28 private static final Escaper URI_ESCAPER = |
29 new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true); | 29 new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true); |
30 | 30 |
31 private static final Escaper URI_PATH_ESCAPER = | 31 private static final Escaper URI_PATH_ESCAPER = |
32 new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false); | 32 new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false); |
33 | 33 |
| 34 private static final Escaper URI_USERINFO_ESCAPER = |
| 35 new PercentEscaper(PercentEscaper.SAFEUSERINFOCHARS_URLENCODER, false); |
| 36 |
34 private static final Escaper URI_QUERY_STRING_ESCAPER = | 37 private static final Escaper URI_QUERY_STRING_ESCAPER = |
35 new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false); | 38 new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false); |
36 | 39 |
37 /** | 40 /** |
38 * Escapes the string value so it can be safely included in URIs. For details
on escaping URIs, | 41 * Escapes the string value so it can be safely included in URIs. For details
on escaping URIs, |
39 * see section 2.4 of <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</
a>. | 42 * see <a href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - sec
tion 2.4</a>. |
40 * | 43 * |
41 * <p> | 44 * <p> |
42 * When encoding a String, the following rules apply: | 45 * When encoding a String, the following rules apply: |
43 * <ul> | 46 * <ul> |
44 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 47 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
45 * same. | 48 * same. |
46 * <li>The special characters ".", "-", "*", and "_" remain the same. | 49 * <li>The special characters ".", "-", "*", and "_" remain the same. |
47 * <li>The space character " " is converted into a plus sign "+". | 50 * <li>The space character " " is converted into a plus sign "+". |
48 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 51 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
49 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 52 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
50 * uppercase, hexadecimal representation of the byte value. | 53 * uppercase, hexadecimal representation of the byte value. |
51 * </ul> | 54 * </ul> |
52 * | 55 * </p> |
53 * <p> | 56 * |
54 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 57 * <p> |
55 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 58 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
56 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 59 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 60 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 61 * percent-encodings."</i> |
| 62 * </p> |
57 * | 63 * |
58 * <p> | 64 * <p> |
59 * This escaper has identical behavior to (but is potentially much faster than
): | 65 * This escaper has identical behavior to (but is potentially much faster than
): |
60 * <ul> | 66 * <ul> |
61 * <li>{@link java.net.URLEncoder#encode(String, String)} with the encoding na
me "UTF-8" | 67 * <li>{@link java.net.URLEncoder#encode(String, String)} with the encoding na
me "UTF-8" |
62 * </ul> | 68 * </ul> |
| 69 * </p> |
63 */ | 70 */ |
64 public static String escapeUri(String value) { | 71 public static String escapeUri(String value) { |
65 return URI_ESCAPER.escape(value); | 72 return URI_ESCAPER.escape(value); |
66 } | 73 } |
67 | 74 |
68 /** | 75 /** |
69 * Percent-decodes a US-ASCII string into a Unicode string. UTF-8 encoding is
used to determine | 76 * Percent-decodes a US-ASCII string into a Unicode string. UTF-8 encoding is
used to determine |
70 * what characters are represented by any consecutive sequences of the form "%
<i>XX</i>". | 77 * what characters are represented by any consecutive sequences of the form "%
<i>XX</i>". |
71 * | 78 * |
72 * <p> | 79 * <p> |
73 * This replaces each occurrence of '+' with a space, ' '. So this method shou
ld not be used for | 80 * This replaces each occurrence of '+' with a space, ' '. So this method shou
ld not be used for |
74 * non application/x-www-form-urlencoded strings such as host and path. | 81 * non application/x-www-form-urlencoded strings such as host and path. |
| 82 * </p> |
75 * | 83 * |
76 * @param uri a percent-encoded US-ASCII string | 84 * @param uri a percent-encoded US-ASCII string |
77 * @return a Unicode string | 85 * @return a Unicode string |
78 */ | 86 */ |
79 public static String decodeUri(String uri) { | 87 public static String decodeUri(String uri) { |
80 try { | 88 try { |
81 return URLDecoder.decode(uri, "UTF-8"); | 89 return URLDecoder.decode(uri, "UTF-8"); |
82 } catch (UnsupportedEncodingException e) { | 90 } catch (UnsupportedEncodingException e) { |
83 // UTF-8 encoding guaranteed to be supported by JVM | 91 // UTF-8 encoding guaranteed to be supported by JVM |
84 throw new RuntimeException(e); | 92 throw new RuntimeException(e); |
85 } | 93 } |
86 } | 94 } |
87 | 95 |
88 /** | 96 /** |
89 * Escapes the string value so it can be safely included in URI path segments.
For details on | 97 * Escapes the string value so it can be safely included in URI path segments.
For details on |
90 * escaping URIs, see section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.
txt">RFC 3986</a>. | 98 * escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4"
>RFC 3986 - section |
| 99 * 2.4</a>. |
91 * | 100 * |
92 * <p> | 101 * <p> |
93 * When encoding a String, the following rules apply: | 102 * When encoding a String, the following rules apply: |
94 * <ul> | 103 * <ul> |
95 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 104 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
96 * same. | 105 * same. |
97 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. | 106 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
98 * <li>The general delimiters "@" and ":" remain the same. | 107 * <li>The general delimiters "@" and ":" remain the same. |
99 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. | 108 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. |
100 * <li>The space character " " is converted into %20. | 109 * <li>The space character " " is converted into %20. |
101 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 110 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
102 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 111 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
103 * uppercase, hexadecimal representation of the byte value. | 112 * uppercase, hexadecimal representation of the byte value. |
104 * </ul> | 113 * </ul> |
105 * | 114 * </p> |
106 * <p> | 115 * |
107 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 116 * <p> |
108 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 117 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
109 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 118 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 119 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 120 * percent-encodings."</i> |
| 121 * </p> |
110 */ | 122 */ |
111 public static String escapeUriPath(String value) { | 123 public static String escapeUriPath(String value) { |
112 return URI_PATH_ESCAPER.escape(value); | 124 return URI_PATH_ESCAPER.escape(value); |
| 125 } |
| 126 |
| 127 /** |
| 128 * Escapes the string value so it can be safely included in URI user info part
. For details on |
| 129 * escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4"
>RFC 3986 - section |
| 130 * 2.4</a>. |
| 131 * |
| 132 * <p> |
| 133 * When encoding a String, the following rules apply: |
| 134 * <ul> |
| 135 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
| 136 * same. |
| 137 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
| 138 * <li>The general delimiter ":" remains the same. |
| 139 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. |
| 140 * <li>The space character " " is converted into %20. |
| 141 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
| 142 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
| 143 * uppercase, hexadecimal representation of the byte value. |
| 144 * </ul> |
| 145 * </p> |
| 146 * |
| 147 * <p> |
| 148 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
| 149 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 150 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 151 * percent-encodings."</i> |
| 152 * </p> |
| 153 * |
| 154 * @since 1.15 |
| 155 */ |
| 156 public static String escapeUriUserInfo(String value) { |
| 157 return URI_USERINFO_ESCAPER.escape(value); |
113 } | 158 } |
114 | 159 |
115 /** | 160 /** |
116 * Escapes the string value so it can be safely included in URI query string s
egments. When the | 161 * Escapes the string value so it can be safely included in URI query string s
egments. When the |
117 * query string consists of a sequence of name=value pairs separated by &,
the names and | 162 * query string consists of a sequence of name=value pairs separated by &,
the names and |
118 * values should be individually encoded. If you escape an entire query string
in one pass with | 163 * values should be individually encoded. If you escape an entire query string
in one pass with |
119 * this escaper, then the "=" and "&" characters used as separators will a
lso be escaped. | 164 * this escaper, then the "=" and "&" characters used as separators will a
lso be escaped. |
120 * | 165 * |
121 * <p> | 166 * <p> |
122 * This escaper is also suitable for escaping fragment identifiers. | 167 * This escaper is also suitable for escaping fragment identifiers. |
123 * | 168 * </p> |
124 * <p> | 169 * |
125 * For details on escaping URIs, see section 2.4 of <a | 170 * <p> |
126 * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>. | 171 * For details on escaping URIs, see <a href="http://tools.ietf.org/html/rfc39
86#section-2.4">RFC |
| 172 * 3986 - section 2.4</a>. |
| 173 * </p> |
127 * | 174 * |
128 * <p> | 175 * <p> |
129 * When encoding a String, the following rules apply: | 176 * When encoding a String, the following rules apply: |
130 * <ul> | 177 * <ul> |
131 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 178 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
132 * same. | 179 * same. |
133 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. | 180 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
134 * <li>The general delimiters "@" and ":" remain the same. | 181 * <li>The general delimiters "@" and ":" remain the same. |
135 * <li>The path delimiters "/" and "?" remain the same. | 182 * <li>The path delimiters "/" and "?" remain the same. |
136 * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";", remain th
e same. | 183 * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";", remain th
e same. |
137 * <li>The space character " " is converted into %20. | 184 * <li>The space character " " is converted into %20. |
138 * <li>The equals sign "=" is converted into %3D. | 185 * <li>The equals sign "=" is converted into %3D. |
139 * <li>The ampersand "&" is converted into %26. | 186 * <li>The ampersand "&" is converted into %26. |
140 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 187 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
141 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 188 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
142 * uppercase, hexadecimal representation of the byte value. | 189 * uppercase, hexadecimal representation of the byte value. |
143 * </ul> | 190 * </ul> |
144 * | 191 * </p> |
145 * <p> | 192 * |
146 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 193 * <p> |
147 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 194 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
148 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 195 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 196 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 197 * percent-encodings."</i> |
| 198 * </p> |
149 */ | 199 */ |
150 public static String escapeUriQuery(String value) { | 200 public static String escapeUriQuery(String value) { |
151 return URI_QUERY_STRING_ESCAPER.escape(value); | 201 return URI_QUERY_STRING_ESCAPER.escape(value); |
152 } | 202 } |
153 | 203 |
154 private CharEscapers() { | 204 private CharEscapers() { |
155 } | 205 } |
156 } | 206 } |
LEFT | RIGHT |