LEFT | RIGHT |
1 /* | 1 /* |
2 * Copyright (c) 2010 Google Inc. | 2 * Copyright (c) 2010 Google Inc. |
3 * | 3 * |
4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except | 4 * Licensed under the Apache License, Version 2.0 (the "License"); you may not u
se this file except |
5 * in compliance with the License. You may obtain a copy of the License at | 5 * in compliance with the License. You may obtain a copy of the License at |
6 * | 6 * |
7 * http://www.apache.org/licenses/LICENSE-2.0 | 7 * http://www.apache.org/licenses/LICENSE-2.0 |
8 * | 8 * |
9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License | 9 * Unless required by applicable law or agreed to in writing, software distribut
ed under the License |
10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express | 10 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY K
IND, either express |
11 * or implied. See the License for the specific language governing permissions a
nd limitations under | 11 * or implied. See the License for the specific language governing permissions a
nd limitations under |
12 * the License. | 12 * the License. |
13 */ | 13 */ |
14 | 14 |
15 package com.google.api.client.util.escape; | 15 package com.google.api.client.util.escape; |
16 | 16 |
17 import java.io.UnsupportedEncodingException; | 17 import java.io.UnsupportedEncodingException; |
18 import java.net.URLDecoder; | 18 import java.net.URLDecoder; |
19 | 19 |
20 /** | 20 /** |
21 * Utility functions for dealing with {@code CharEscaper}s, and some commonly us
ed {@code | 21 * Utility functions for dealing with {@code CharEscaper}s, and some commonly us
ed |
22 * CharEscaper} instances. | 22 * {@code CharEscaper} instances. |
23 * | 23 * |
24 * @since 1.0 | 24 * @since 1.0 |
25 */ | 25 */ |
26 public final class CharEscapers { | 26 public final class CharEscapers { |
27 | 27 |
28 private static final Escaper URI_ESCAPER = | 28 private static final Escaper URI_ESCAPER = |
29 new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true); | 29 new PercentEscaper(PercentEscaper.SAFECHARS_URLENCODER, true); |
30 | 30 |
31 private static final Escaper URI_PATH_ESCAPER = | 31 private static final Escaper URI_PATH_ESCAPER = |
32 new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false); | 32 new PercentEscaper(PercentEscaper.SAFEPATHCHARS_URLENCODER, false); |
33 | 33 |
34 private static final Escaper URI_USERINFO_ESCAPER = | 34 private static final Escaper URI_USERINFO_ESCAPER = |
35 new PercentEscaper(PercentEscaper.SAFEUSERINFOCHARS_URLENCODER, false); | 35 new PercentEscaper(PercentEscaper.SAFEUSERINFOCHARS_URLENCODER, false); |
36 | 36 |
37 private static final Escaper URI_QUERY_STRING_ESCAPER = | 37 private static final Escaper URI_QUERY_STRING_ESCAPER = |
38 new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false); | 38 new PercentEscaper(PercentEscaper.SAFEQUERYSTRINGCHARS_URLENCODER, false); |
39 | 39 |
40 /** | 40 /** |
41 * Escapes the string value so it can be safely included in URIs. For details
on escaping URIs, | 41 * Escapes the string value so it can be safely included in URIs. For details
on escaping URIs, |
42 * see section 2.4 of <a href="http://www.ietf.org/rfc/rfc2396.txt">RFC 2396</
a>. | 42 * see <a href="http://tools.ietf.org/html/rfc3986#section-2.4">RFC 3986 - sec
tion 2.4</a>. |
43 * | 43 * |
44 * <p> | 44 * <p> |
45 * When encoding a String, the following rules apply: | 45 * When encoding a String, the following rules apply: |
46 * <ul> | 46 * <ul> |
47 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 47 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
48 * same. | 48 * same. |
49 * <li>The special characters ".", "-", "*", and "_" remain the same. | 49 * <li>The special characters ".", "-", "*", and "_" remain the same. |
50 * <li>The space character " " is converted into a plus sign "+". | 50 * <li>The space character " " is converted into a plus sign "+". |
51 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 51 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
52 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 52 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
53 * uppercase, hexadecimal representation of the byte value. | 53 * uppercase, hexadecimal representation of the byte value. |
54 * </ul> | 54 * </ul> |
55 * | 55 * </p> |
56 * <p> | 56 * |
57 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 57 * <p> |
58 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 58 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
59 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 59 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 60 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 61 * percent-encodings."</i> |
| 62 * </p> |
60 * | 63 * |
61 * <p> | 64 * <p> |
62 * This escaper has identical behavior to (but is potentially much faster than
): | 65 * This escaper has identical behavior to (but is potentially much faster than
): |
63 * <ul> | 66 * <ul> |
64 * <li>{@link java.net.URLEncoder#encode(String, String)} with the encoding na
me "UTF-8" | 67 * <li>{@link java.net.URLEncoder#encode(String, String)} with the encoding na
me "UTF-8" |
65 * </ul> | 68 * </ul> |
| 69 * </p> |
66 */ | 70 */ |
67 public static String escapeUri(String value) { | 71 public static String escapeUri(String value) { |
68 return URI_ESCAPER.escape(value); | 72 return URI_ESCAPER.escape(value); |
69 } | 73 } |
70 | 74 |
71 /** | 75 /** |
72 * Percent-decodes a US-ASCII string into a Unicode string. UTF-8 encoding is
used to determine | 76 * Percent-decodes a US-ASCII string into a Unicode string. UTF-8 encoding is
used to determine |
73 * what characters are represented by any consecutive sequences of the form "%
<i>XX</i>". | 77 * what characters are represented by any consecutive sequences of the form "%
<i>XX</i>". |
74 * | 78 * |
75 * <p> | 79 * <p> |
76 * This replaces each occurrence of '+' with a space, ' '. So this method shou
ld not be used for | 80 * This replaces each occurrence of '+' with a space, ' '. So this method shou
ld not be used for |
77 * non application/x-www-form-urlencoded strings such as host and path. | 81 * non application/x-www-form-urlencoded strings such as host and path. |
| 82 * </p> |
78 * | 83 * |
79 * @param uri a percent-encoded US-ASCII string | 84 * @param uri a percent-encoded US-ASCII string |
80 * @return a Unicode string | 85 * @return a Unicode string |
81 */ | 86 */ |
82 public static String decodeUri(String uri) { | 87 public static String decodeUri(String uri) { |
83 try { | 88 try { |
84 return URLDecoder.decode(uri, "UTF-8"); | 89 return URLDecoder.decode(uri, "UTF-8"); |
85 } catch (UnsupportedEncodingException e) { | 90 } catch (UnsupportedEncodingException e) { |
86 // UTF-8 encoding guaranteed to be supported by JVM | 91 // UTF-8 encoding guaranteed to be supported by JVM |
87 throw new RuntimeException(e); | 92 throw new RuntimeException(e); |
88 } | 93 } |
89 } | 94 } |
90 | 95 |
91 /** | 96 /** |
92 * Escapes the string value so it can be safely included in URI path segments.
For details on | 97 * Escapes the string value so it can be safely included in URI path segments.
For details on |
93 * escaping URIs, see section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.
txt">RFC 3986</a>. | 98 * escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4"
>RFC 3986 - section |
| 99 * 2.4</a>. |
94 * | 100 * |
95 * <p> | 101 * <p> |
96 * When encoding a String, the following rules apply: | 102 * When encoding a String, the following rules apply: |
97 * <ul> | 103 * <ul> |
98 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 104 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
99 * same. | 105 * same. |
100 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. | 106 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
101 * <li>The general delimiters "@" and ":" remain the same. | 107 * <li>The general delimiters "@" and ":" remain the same. |
102 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. | 108 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. |
103 * <li>The space character " " is converted into %20. | 109 * <li>The space character " " is converted into %20. |
104 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 110 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
105 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 111 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
106 * uppercase, hexadecimal representation of the byte value. | 112 * uppercase, hexadecimal representation of the byte value. |
107 * </ul> | 113 * </ul> |
108 * | 114 * </p> |
109 * <p> | 115 * |
110 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 116 * <p> |
111 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 117 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
112 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 118 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 119 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 120 * percent-encodings."</i> |
| 121 * </p> |
113 */ | 122 */ |
114 public static String escapeUriPath(String value) { | 123 public static String escapeUriPath(String value) { |
115 return URI_PATH_ESCAPER.escape(value); | 124 return URI_PATH_ESCAPER.escape(value); |
116 } | 125 } |
117 | 126 |
118 /** | 127 /** |
119 * Escapes the string value so it can be safely included in URI user info part
. For details on | 128 * Escapes the string value so it can be safely included in URI user info part
. For details on |
120 * escaping URIs, see section 2.4 of <a href="http://www.ietf.org/rfc/rfc3986.
txt">RFC 3986</a>. | 129 * escaping URIs, see <a href="http://tools.ietf.org/html/rfc3986#section-2.4"
>RFC 3986 - section |
| 130 * 2.4</a>. |
121 * | 131 * |
122 * <p> | 132 * <p> |
123 * When encoding a String, the following rules apply: | 133 * When encoding a String, the following rules apply: |
124 * <ul> | 134 * <ul> |
125 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 135 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
126 * same. | 136 * same. |
127 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. | 137 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
128 * <li>The general delimiter ":" remains the same. | 138 * <li>The general delimiter ":" remains the same. |
129 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. | 139 * <li>The subdelimiters "!", "$", "&", "'", "(", ")", "*", ",", ";", and
"=" remain the same. |
130 * <li>The space character " " is converted into %20. | 140 * <li>The space character " " is converted into %20. |
131 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 141 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
132 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 142 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
133 * uppercase, hexadecimal representation of the byte value. | 143 * uppercase, hexadecimal representation of the byte value. |
134 * </ul> | 144 * </ul> |
135 * | 145 * </p> |
136 * <p> | 146 * |
137 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 147 * <p> |
138 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 148 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
139 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 149 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 150 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 151 * percent-encodings."</i> |
| 152 * </p> |
140 * | 153 * |
141 * @since 1.15 | 154 * @since 1.15 |
142 */ | 155 */ |
143 public static String escapeUriUserInfo(String value) { | 156 public static String escapeUriUserInfo(String value) { |
144 return URI_USERINFO_ESCAPER.escape(value); | 157 return URI_USERINFO_ESCAPER.escape(value); |
145 } | 158 } |
146 | 159 |
147 /** | 160 /** |
148 * Escapes the string value so it can be safely included in URI query string s
egments. When the | 161 * Escapes the string value so it can be safely included in URI query string s
egments. When the |
149 * query string consists of a sequence of name=value pairs separated by &,
the names and | 162 * query string consists of a sequence of name=value pairs separated by &,
the names and |
150 * values should be individually encoded. If you escape an entire query string
in one pass with | 163 * values should be individually encoded. If you escape an entire query string
in one pass with |
151 * this escaper, then the "=" and "&" characters used as separators will a
lso be escaped. | 164 * this escaper, then the "=" and "&" characters used as separators will a
lso be escaped. |
152 * | 165 * |
153 * <p> | 166 * <p> |
154 * This escaper is also suitable for escaping fragment identifiers. | 167 * This escaper is also suitable for escaping fragment identifiers. |
155 * | 168 * </p> |
156 * <p> | 169 * |
157 * For details on escaping URIs, see section 2.4 of <a | 170 * <p> |
158 * href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>. | 171 * For details on escaping URIs, see <a href="http://tools.ietf.org/html/rfc39
86#section-2.4">RFC |
| 172 * 3986 - section 2.4</a>. |
| 173 * </p> |
159 * | 174 * |
160 * <p> | 175 * <p> |
161 * When encoding a String, the following rules apply: | 176 * When encoding a String, the following rules apply: |
162 * <ul> | 177 * <ul> |
163 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the | 178 * <li>The alphanumeric characters "a" through "z", "A" through "Z" and "0" th
rough "9" remain the |
164 * same. | 179 * same. |
165 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. | 180 * <li>The unreserved characters ".", "-", "~", and "_" remain the same. |
166 * <li>The general delimiters "@" and ":" remain the same. | 181 * <li>The general delimiters "@" and ":" remain the same. |
167 * <li>The path delimiters "/" and "?" remain the same. | 182 * <li>The path delimiters "/" and "?" remain the same. |
168 * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";", remain th
e same. | 183 * <li>The subdelimiters "!", "$", "'", "(", ")", "*", ",", and ";", remain th
e same. |
169 * <li>The space character " " is converted into %20. | 184 * <li>The space character " " is converted into %20. |
170 * <li>The equals sign "=" is converted into %3D. | 185 * <li>The equals sign "=" is converted into %3D. |
171 * <li>The ampersand "&" is converted into %26. | 186 * <li>The ampersand "&" is converted into %26. |
172 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each | 187 * <li>All other characters are converted into one or more bytes using UTF-8 e
ncoding and each |
173 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, | 188 * byte is then represented by the 3-character string "%XY", where "XY" is the
two-digit, |
174 * uppercase, hexadecimal representation of the byte value. | 189 * uppercase, hexadecimal representation of the byte value. |
175 * </ul> | 190 * </ul> |
176 * | 191 * </p> |
177 * <p> | 192 * |
178 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From | 193 * <p> |
179 * <a href="http://www.ietf.org/rfc/rfc3986.txt"> RFC 3986</a>:<br> <i>"URI pr
oducers and | 194 * <b>Note</b>: Unlike other escapers, URI escapers produce uppercase hexadeci
mal sequences. From |
180 * normalizers should use uppercase hexadecimal digits for all percent-encodin
gs."</i> | 195 * <a href="http://tools.ietf.org/html/rfc3986"> RFC 3986</a>:<br> |
| 196 * <i>"URI producers and normalizers should use uppercase hexadecimal digits f
or all |
| 197 * percent-encodings."</i> |
| 198 * </p> |
181 */ | 199 */ |
182 public static String escapeUriQuery(String value) { | 200 public static String escapeUriQuery(String value) { |
183 return URI_QUERY_STRING_ESCAPER.escape(value); | 201 return URI_QUERY_STRING_ESCAPER.escape(value); |
184 } | 202 } |
185 | 203 |
186 private CharEscapers() { | 204 private CharEscapers() { |
187 } | 205 } |
188 } | 206 } |
LEFT | RIGHT |