View Javadoc

1   /*
2    * Copyright 2004-2008 the Seasar Foundation and the Others.
3    *
4    * Licensed under the Apache License, Version 2.0 (the "License");
5    * you may not use this file except in compliance with the License.
6    * You may obtain a copy of the License at
7    *
8    *     http://www.apache.org/licenses/LICENSE-2.0
9    *
10   * Unless required by applicable law or agreed to in writing, software
11   * distributed under the License is distributed on an "AS IS" BASIS,
12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
13   * either express or implied. See the License for the specific language
14   * governing permissions and limitations under the License.
15   */
16  package org.seasar.cubby.util;
17  
18  import java.io.ByteArrayOutputStream;
19  import java.io.UnsupportedEncodingException;
20  import java.util.BitSet;
21  
22  /**
23   * URL ボディ部のエンコーダです。
24   * 
25   * @author baba
26   */
27  public class URLBodyEncoder {
28  
29  	protected static byte ESCAPE_CHAR = '%';
30  
31  	/**
32  	 * BitSet of www-form-url safe characters.
33  	 */
34  	protected static final BitSet WWW_FORM_URL = new BitSet(256);
35  
36  	// Static initializer for www_form_url
37  	static {
38  		// alpha characters
39  		for (int i = 'a'; i <= 'z'; i++) {
40  			WWW_FORM_URL.set(i);
41  		}
42  		for (int i = 'A'; i <= 'Z'; i++) {
43  			WWW_FORM_URL.set(i);
44  		}
45  		// numeric characters
46  		for (int i = '0'; i <= '9'; i++) {
47  			WWW_FORM_URL.set(i);
48  		}
49  		// special chars
50  		WWW_FORM_URL.set('-');
51  		WWW_FORM_URL.set('_');
52  		WWW_FORM_URL.set('.');
53  		WWW_FORM_URL.set('*');
54  		// blank to be replaced with +
55  		// WWW_FORM_URL.set(' ');
56  	}
57  
58  	/**
59  	 * Default constructor.
60  	 */
61  	private URLBodyEncoder() {
62  		super();
63  	}
64  
65  	/**
66  	 * Encodes an array of bytes into an array of URL safe 7-bit characters.
67  	 * Unsafe characters are escaped.
68  	 * 
69  	 * @param urlsafe
70  	 *            bitset of characters deemed URL safe
71  	 * @param bytes
72  	 *            array of bytes to convert to URL safe characters
73  	 * @return array of bytes containing URL safe characters
74  	 */
75  	public static final byte[] encodeUrl(BitSet urlsafe, byte[] bytes) {
76  		if (bytes == null) {
77  			return null;
78  		}
79  		if (urlsafe == null) {
80  			urlsafe = WWW_FORM_URL;
81  		}
82  
83  		ByteArrayOutputStream buffer = new ByteArrayOutputStream();
84  		for (int i = 0; i < bytes.length; i++) {
85  			int b = bytes[i];
86  			if (b < 0) {
87  				b = 256 + b;
88  			}
89  			if (urlsafe.get(b)) {
90  				// if (b == ' ') {
91  				// b = '+';
92  				// }
93  				buffer.write(b);
94  			} else {
95  				buffer.write('%');
96  				char hex1 = Character.toUpperCase(Character.forDigit(
97  						(b >> 4) & 0xF, 16));
98  				char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF,
99  						16));
100 				buffer.write(hex1);
101 				buffer.write(hex2);
102 			}
103 		}
104 		return buffer.toByteArray();
105 	}
106 
107 	/**
108 	 * Encodes an array of bytes into an array of URL safe 7-bit characters.
109 	 * Unsafe characters are escaped.
110 	 * 
111 	 * @param bytes
112 	 *            array of bytes to convert to URL safe characters
113 	 * @return array of bytes containing URL safe characters
114 	 */
115 	public static byte[] encode(byte[] bytes) {
116 		return encodeUrl(WWW_FORM_URL, bytes);
117 	}
118 
119 	/**
120 	 * Encodes a string into its URL safe form using the specified string
121 	 * charset. Unsafe characters are escaped.
122 	 * 
123 	 * @param pString
124 	 *            string to convert to a URL safe form
125 	 * @param charset
126 	 *            the charset for pString
127 	 * @return URL safe string
128 	 * @throws UnsupportedEncodingException
129 	 *             Thrown if charset is not supported
130 	 */
131 	public static String encode(String pString, String charset)
132 			throws UnsupportedEncodingException {
133 		if (pString == null) {
134 			return null;
135 		}
136 		return new String(encode(pString.getBytes(charset)), "US-ASCII");
137 	}
138 
139 }