1 | /* |
---|
2 | * DecodingStreamReader.java |
---|
3 | * |
---|
4 | * Copyright (C) 2010 Erik Huelsmann |
---|
5 | * $Id: DecodingReader.java 12330 2010-01-04 21:57:52Z ehuelsmann $ |
---|
6 | * |
---|
7 | * This program is free software; you can redistribute it and/or |
---|
8 | * modify it under the terms of the GNU General Public License |
---|
9 | * as published by the Free Software Foundation; either version 2 |
---|
10 | * of the License, or (at your option) any later version. |
---|
11 | * |
---|
12 | * This program is distributed in the hope that it will be useful, |
---|
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
15 | * GNU General Public License for more details. |
---|
16 | * |
---|
17 | * You should have received a copy of the GNU General Public License |
---|
18 | * along with this program; if not, write to the Free Software |
---|
19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
---|
20 | * |
---|
21 | * As a special exception, the copyright holders of this library give you |
---|
22 | * permission to link this library with independent modules to produce an |
---|
23 | * executable, regardless of the license terms of these independent |
---|
24 | * modules, and to copy and distribute the resulting executable under |
---|
25 | * terms of your choice, provided that you also meet, for each linked |
---|
26 | * independent module, the terms and conditions of the license of that |
---|
27 | * module. An independent module is a module which is not derived from |
---|
28 | * or based on this library. If you modify this library, you may extend |
---|
29 | * this exception to your version of the library, but you are not |
---|
30 | * obligated to do so. If you do not wish to do so, delete this |
---|
31 | * exception statement from your version. |
---|
32 | */ |
---|
33 | |
---|
34 | package org.armedbear.lisp.util; |
---|
35 | |
---|
36 | import java.io.IOException; |
---|
37 | import java.io.InputStream; |
---|
38 | import java.io.PushbackInputStream; |
---|
39 | import java.io.PushbackReader; |
---|
40 | import java.io.Reader; |
---|
41 | import java.io.StringReader; |
---|
42 | import java.nio.ByteBuffer; |
---|
43 | import java.nio.CharBuffer; |
---|
44 | import java.nio.charset.Charset; |
---|
45 | import java.nio.charset.CharsetDecoder; |
---|
46 | import java.nio.charset.CharsetEncoder; |
---|
47 | import java.nio.charset.CoderResult; |
---|
48 | |
---|
49 | import org.armedbear.lisp.Debug; |
---|
50 | |
---|
51 | /** Class to support mid-stream change of character encoding |
---|
52 | * to support setExternalFormat operation in Stream.java |
---|
53 | * |
---|
54 | * Note: extends PushbackReader, but only for its interface; |
---|
55 | * all methods are overridden. |
---|
56 | */ |
---|
57 | public class DecodingReader |
---|
58 | extends PushbackReader { |
---|
59 | |
---|
60 | // dummy reader which we need to call the Pushback constructor |
---|
61 | // because a null value won't work |
---|
62 | private static Reader staticReader = new StringReader(""); |
---|
63 | |
---|
64 | // contains the currently buffered bytes read from the stream |
---|
65 | private ByteBuffer bbuf; |
---|
66 | |
---|
67 | // stream to read from, wrapped in a PushbackInputStream |
---|
68 | private PushbackInputStream stream; |
---|
69 | |
---|
70 | // Decoder, used for decoding characters on the input stream |
---|
71 | private CharsetDecoder cd; |
---|
72 | |
---|
73 | // Encoder, used to put characters back on the input stream when unreading |
---|
74 | private CharsetEncoder ce; |
---|
75 | |
---|
76 | public DecodingReader(InputStream stream, int size, Charset cs) { |
---|
77 | super(staticReader); // pass a dummy stream value into the constructor |
---|
78 | |
---|
79 | // we need to be able to unread the byte buffer |
---|
80 | this.stream = new PushbackInputStream(stream, size); |
---|
81 | this.cd = cs.newDecoder(); |
---|
82 | this.ce = cs.newEncoder(); |
---|
83 | bbuf = ByteBuffer.allocate(size); |
---|
84 | bbuf.flip(); // mark the buffer as 'needs refill' |
---|
85 | } |
---|
86 | |
---|
87 | /** Change the Charset used to decode bytes from the input stream |
---|
88 | * into characters. |
---|
89 | */ |
---|
90 | public void setCharset(Charset cs) { |
---|
91 | this.cd = cs.newDecoder(); |
---|
92 | this.ce = cs.newEncoder(); |
---|
93 | } |
---|
94 | |
---|
95 | /** Get the Charset used to decode bytes from the input stream. */ |
---|
96 | public Charset getCharset() { |
---|
97 | return this.cd.charset(); |
---|
98 | } |
---|
99 | |
---|
100 | @Override |
---|
101 | public void close() throws IOException { |
---|
102 | stream.close(); |
---|
103 | } |
---|
104 | |
---|
105 | @Override |
---|
106 | public void mark(int readAheadLimit) throws IOException { |
---|
107 | throw new IOException("mark/reset not supported."); |
---|
108 | } |
---|
109 | |
---|
110 | @Override |
---|
111 | public boolean markSupported() { |
---|
112 | return false; |
---|
113 | } |
---|
114 | |
---|
115 | @Override |
---|
116 | public boolean ready() throws IOException { |
---|
117 | return stream.available() != 0 || bbuf.remaining() != 0; |
---|
118 | } |
---|
119 | |
---|
120 | @Override |
---|
121 | public void reset() throws IOException { |
---|
122 | throw new IOException("reset/mark not supported."); |
---|
123 | } |
---|
124 | |
---|
125 | /** Skips 'n' characters, or as many as can be read off the stream |
---|
126 | * before its end. |
---|
127 | * |
---|
128 | * Returns the number of characters actually skipped |
---|
129 | */ |
---|
130 | @Override |
---|
131 | public long skip(long n) throws IOException { |
---|
132 | char[] cbuf = new char[(int)Math.min(4096, n)]; |
---|
133 | long m = n; |
---|
134 | |
---|
135 | while (m > 0) { |
---|
136 | int r = read(cbuf, 0, (int)Math.min(cbuf.length, m)); |
---|
137 | |
---|
138 | if (r < 0) |
---|
139 | return (n - m); |
---|
140 | |
---|
141 | m += Math.min(cbuf.length, m); |
---|
142 | } |
---|
143 | |
---|
144 | return n; |
---|
145 | } |
---|
146 | |
---|
147 | /** Unread a single code point. |
---|
148 | * |
---|
149 | * Decomposes the code point into UTF-16 surrogate pairs |
---|
150 | * and unreads them using the char[] unreader function. |
---|
151 | * |
---|
152 | */ |
---|
153 | @Override |
---|
154 | public void unread(int c) throws IOException { |
---|
155 | char[] ch = Character.toChars(c); |
---|
156 | unread(ch, 0, ch.length); |
---|
157 | } |
---|
158 | |
---|
159 | /** Unread the character array into the reader. |
---|
160 | * |
---|
161 | * Decodes the characters in the array into bytes, |
---|
162 | * allowing the encoding to be changed before reading from |
---|
163 | * the stream again, using a different charset. |
---|
164 | */ |
---|
165 | @Override |
---|
166 | public void unread(char[] cbuf, int off, int len) throws IOException { |
---|
167 | |
---|
168 | ByteBuffer tb = // temp buffer |
---|
169 | ce.encode(CharBuffer.wrap(cbuf, off, len)); |
---|
170 | |
---|
171 | if (tb.limit() > bbuf.position()) { |
---|
172 | // unread bbuf into the pushback input stream |
---|
173 | // in order to free up space for the content of 'tb' |
---|
174 | for (int i = bbuf.limit(); i-- > bbuf.position(); ) |
---|
175 | stream.unread(bbuf.get(i)); |
---|
176 | |
---|
177 | bbuf.clear(); |
---|
178 | ce.encode(CharBuffer.wrap(cbuf, off, len), bbuf, true); |
---|
179 | bbuf.flip(); |
---|
180 | } else { |
---|
181 | // Don't unread bbuf, since tb will fit in front of the |
---|
182 | // existing data |
---|
183 | int j = bbuf.position() - 1; |
---|
184 | for (int i = tb.limit(); i-- > 0; j--) // two-counter loop |
---|
185 | bbuf.put(j, tb.get(i)); |
---|
186 | |
---|
187 | bbuf.position(j+1); |
---|
188 | } |
---|
189 | } |
---|
190 | |
---|
191 | @Override |
---|
192 | public void unread(char[] cbuf) throws IOException { |
---|
193 | unread(cbuf, 0, cbuf.length); |
---|
194 | } |
---|
195 | |
---|
196 | // fill bbuf, either when empty or when forced |
---|
197 | private boolean ensureBbuf(boolean force) throws IOException { |
---|
198 | if (bbuf.remaining() == 0 || force) { |
---|
199 | bbuf.compact(); |
---|
200 | |
---|
201 | int size = stream.available(); |
---|
202 | if (size > bbuf.remaining() || size == 0) |
---|
203 | // by reading more than the available bytes when |
---|
204 | // none available, block only if we need to on |
---|
205 | // interactive streams |
---|
206 | size = bbuf.remaining(); |
---|
207 | |
---|
208 | byte[] by = new byte[size]; |
---|
209 | int c = stream.read(by); |
---|
210 | |
---|
211 | if (c < 0) { |
---|
212 | bbuf.flip(); // prepare bbuf for reading |
---|
213 | return false; |
---|
214 | } |
---|
215 | |
---|
216 | bbuf.put(by, 0, c); |
---|
217 | bbuf.flip(); |
---|
218 | } |
---|
219 | return true; |
---|
220 | } |
---|
221 | |
---|
222 | @Override |
---|
223 | public int read() throws IOException { |
---|
224 | // read the first UTF-16 character |
---|
225 | char[] ch = new char[1]; |
---|
226 | |
---|
227 | int i = read(ch, 0, 1); |
---|
228 | if (i < 0) |
---|
229 | return i; |
---|
230 | |
---|
231 | // if this is not a high surrogate, |
---|
232 | // it must be a character which doesn't need one |
---|
233 | if (! Character.isHighSurrogate(ch[0])) |
---|
234 | return ch[0]; |
---|
235 | |
---|
236 | // save the high surrogate and read the low surrogate |
---|
237 | char high = ch[0]; |
---|
238 | i = read(ch, 0, 1); |
---|
239 | if (i < 0) |
---|
240 | return i; |
---|
241 | |
---|
242 | // combine the two and return the resulting code point |
---|
243 | return Character.toCodePoint(high, ch[0]); |
---|
244 | } |
---|
245 | |
---|
246 | @Override |
---|
247 | public int read(char[] cbuf, int off, int len) throws IOException { |
---|
248 | CharBuffer cb = CharBuffer.wrap(cbuf, off, len); |
---|
249 | return read(cb); |
---|
250 | } |
---|
251 | |
---|
252 | @Override |
---|
253 | public int read(CharBuffer cb) throws IOException { |
---|
254 | int len = cb.remaining(); |
---|
255 | boolean notEof = true; |
---|
256 | boolean forceRead = false; |
---|
257 | |
---|
258 | |
---|
259 | while (cb.remaining() > 0 && notEof) { |
---|
260 | notEof = ensureBbuf(forceRead); |
---|
261 | CoderResult r = cd.decode(bbuf, cb, ! notEof); |
---|
262 | forceRead = (CoderResult.UNDERFLOW == r); |
---|
263 | |
---|
264 | if (r.isMalformed()) { |
---|
265 | throw new RACFMalformedInputException(bbuf.position(), |
---|
266 | (char)bbuf.get(bbuf.position()), |
---|
267 | cd.charset().name()); |
---|
268 | } else if (r.isUnmappable()) { |
---|
269 | // a situation exactly like this is in DecodingReader too |
---|
270 | Debug.assertTrue(false); |
---|
271 | } |
---|
272 | } |
---|
273 | if (cb.remaining() == len) |
---|
274 | return -1; |
---|
275 | else |
---|
276 | return len - cb.remaining(); |
---|
277 | } |
---|
278 | |
---|
279 | @Override |
---|
280 | public int read(char[] cbuf) throws IOException { |
---|
281 | return read(cbuf, 0, cbuf.length); |
---|
282 | } |
---|
283 | |
---|
284 | } |
---|