source: branches/0.20.x/abcl/src/org/armedbear/lisp/util/DecodingReader.java

Last change on this file was 12330, checked in by ehuelsmann, 15 years ago

Fix ticket #77: incorrect encoding used for FASLs, by always using UTF-8.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 8.8 KB
Line 
1/*
2 * DecodingStreamReader.java
3 *
4 * Copyright (C) 2010 Erik Huelsmann
5 * $Id: DecodingReader.java 12330 2010-01-04 21:57:52Z ehuelsmann $
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
20 *
21 * As a special exception, the copyright holders of this library give you
22 * permission to link this library with independent modules to produce an
23 * executable, regardless of the license terms of these independent
24 * modules, and to copy and distribute the resulting executable under
25 * terms of your choice, provided that you also meet, for each linked
26 * independent module, the terms and conditions of the license of that
27 * module.  An independent module is a module which is not derived from
28 * or based on this library.  If you modify this library, you may extend
29 * this exception to your version of the library, but you are not
30 * obligated to do so.  If you do not wish to do so, delete this
31 * exception statement from your version.
32 */
33
34package org.armedbear.lisp.util;
35
36import java.io.IOException;
37import java.io.InputStream;
38import java.io.PushbackInputStream;
39import java.io.PushbackReader;
40import java.io.Reader;
41import java.io.StringReader;
42import java.nio.ByteBuffer;
43import java.nio.CharBuffer;
44import java.nio.charset.Charset;
45import java.nio.charset.CharsetDecoder;
46import java.nio.charset.CharsetEncoder;
47import java.nio.charset.CoderResult;
48
49import org.armedbear.lisp.Debug;
50
51/** Class to support mid-stream change of character encoding
52 * to support setExternalFormat operation in Stream.java
53 *
54 * Note: extends PushbackReader, but only for its interface;
55 * all methods are overridden.
56 */
57public class DecodingReader
58    extends PushbackReader {
59
60    // dummy reader which we need to call the Pushback constructor
61    // because a null value won't work
62    private static Reader staticReader = new StringReader("");
63
64    // contains the currently buffered bytes read from the stream
65    private ByteBuffer bbuf;
66
67    // stream to read from, wrapped in a PushbackInputStream
68    private PushbackInputStream stream;
69
70    // Decoder, used for decoding characters on the input stream
71    private CharsetDecoder cd;
72
73    // Encoder, used to put characters back on the input stream when unreading
74    private CharsetEncoder ce;
75
76    public DecodingReader(InputStream stream, int size, Charset cs) {
77        super(staticReader); // pass a dummy stream value into the constructor
78
79          // we need to be able to unread the byte buffer
80        this.stream = new PushbackInputStream(stream, size);
81        this.cd = cs.newDecoder();
82        this.ce = cs.newEncoder();
83        bbuf = ByteBuffer.allocate(size);
84        bbuf.flip();  // mark the buffer as 'needs refill'
85    }
86
87    /** Change the Charset used to decode bytes from the input stream
88     * into characters.
89     */
90    public void setCharset(Charset cs) {
91        this.cd = cs.newDecoder();
92        this.ce = cs.newEncoder();
93    }
94
95    /** Get the Charset used to decode bytes from the input stream. */
96    public Charset getCharset() {
97        return this.cd.charset();
98    }
99
100    @Override
101    public void close() throws IOException {
102        stream.close();
103    }
104
105    @Override
106    public void mark(int readAheadLimit) throws IOException {
107        throw new IOException("mark/reset not supported.");
108    }
109
110    @Override
111    public boolean markSupported() {
112        return false;
113    }
114
115    @Override
116    public boolean ready() throws IOException {
117        return stream.available() != 0 || bbuf.remaining() != 0;
118    }
119
120    @Override
121    public void reset() throws IOException {
122        throw new IOException("reset/mark not supported.");
123    }
124
125    /** Skips 'n' characters, or as many as can be read off the stream
126     * before its end.
127     *
128     * Returns the number of characters actually skipped
129     */
130    @Override
131    public long skip(long n) throws IOException {
132        char[] cbuf = new char[(int)Math.min(4096, n)];
133        long m = n;
134
135        while (m > 0) {
136            int r = read(cbuf, 0, (int)Math.min(cbuf.length, m));
137
138            if (r < 0)
139                return (n - m);
140
141            m += Math.min(cbuf.length, m);
142        }
143
144        return n;
145    }
146
147    /** Unread a single code point.
148     *
149     * Decomposes the code point into UTF-16 surrogate pairs
150     * and unreads them using the char[] unreader function.
151     *
152     */
153    @Override
154    public void unread(int c) throws IOException {
155        char[] ch = Character.toChars(c);
156        unread(ch, 0, ch.length);
157    }
158
159    /** Unread the character array into the reader.
160     *
161     * Decodes the characters in the array into bytes,
162     * allowing the encoding to be changed before reading from
163     * the stream again, using a different charset.
164     */
165    @Override
166    public void unread(char[] cbuf, int off, int len) throws IOException {
167
168        ByteBuffer tb = // temp buffer
169            ce.encode(CharBuffer.wrap(cbuf, off, len));
170
171        if (tb.limit() > bbuf.position()) {
172            // unread bbuf into the pushback input stream
173            // in order to free up space for the content of 'tb'
174            for (int i = bbuf.limit(); i-- > bbuf.position(); )
175                stream.unread(bbuf.get(i));
176
177            bbuf.clear();
178            ce.encode(CharBuffer.wrap(cbuf, off, len), bbuf, true);
179            bbuf.flip();
180        } else {
181            // Don't unread bbuf, since tb will fit in front of the
182            // existing data
183            int j = bbuf.position() - 1;
184            for (int i = tb.limit(); i-- > 0; j--) // two-counter loop
185                bbuf.put(j, tb.get(i));
186
187            bbuf.position(j+1);
188        }
189    }
190
191    @Override
192    public void unread(char[] cbuf) throws IOException {
193        unread(cbuf, 0, cbuf.length);
194    }
195
196    // fill bbuf, either when empty or when forced
197    private boolean ensureBbuf(boolean force) throws IOException {
198        if (bbuf.remaining() == 0 || force) {
199            bbuf.compact();
200
201            int size = stream.available();
202            if (size > bbuf.remaining() || size == 0)
203                // by reading more than the available bytes when
204                // none available, block only if we need to on
205                // interactive streams
206                size = bbuf.remaining();
207
208            byte[] by = new byte[size];
209            int c = stream.read(by);
210
211            if (c < 0) {
212                bbuf.flip();  // prepare bbuf for reading
213                return false;
214            }
215
216            bbuf.put(by, 0, c);
217            bbuf.flip();
218        }
219        return true;
220    }
221
222    @Override
223    public int read() throws IOException {
224        // read the first UTF-16 character
225        char[] ch = new char[1];
226
227        int i = read(ch, 0, 1);
228        if (i < 0)
229            return i;
230
231        // if this is not a high surrogate,
232        // it must be a character which doesn't need one
233        if (! Character.isHighSurrogate(ch[0]))
234            return ch[0];
235
236        // save the high surrogate and read the low surrogate
237        char high = ch[0];
238        i = read(ch, 0, 1);
239        if (i < 0)
240            return i;
241
242        // combine the two and return the resulting code point
243        return Character.toCodePoint(high, ch[0]);
244    }
245
246    @Override
247    public int read(char[] cbuf, int off, int len) throws IOException {
248        CharBuffer cb = CharBuffer.wrap(cbuf, off, len);
249        return read(cb);
250    }
251
252    @Override
253    public int read(CharBuffer cb) throws IOException {
254        int len = cb.remaining();
255        boolean notEof = true;
256        boolean forceRead = false;
257
258
259        while (cb.remaining() > 0 && notEof) {
260            notEof = ensureBbuf(forceRead);
261            CoderResult r = cd.decode(bbuf, cb, ! notEof);
262            forceRead = (CoderResult.UNDERFLOW == r);
263
264            if (r.isMalformed()) {
265                throw new RACFMalformedInputException(bbuf.position(),
266                                                      (char)bbuf.get(bbuf.position()),
267                                                      cd.charset().name());
268            } else if (r.isUnmappable()) {
269                // a situation exactly like this is in DecodingReader too
270                Debug.assertTrue(false);
271            }
272        }
273        if (cb.remaining() == len)
274            return -1;
275        else
276            return len - cb.remaining();
277    }
278
279    @Override
280    public int read(char[] cbuf) throws IOException {
281        return read(cbuf, 0, cbuf.length);
282    }
283
284}
Note: See TracBrowser for help on using the repository browser.