source: branches/0.15.x/abcl/src/org/armedbear/lisp/util/RandomAccessCharacterFile.java

Last change on this file was 11511, checked in by vvoutilainen, 17 years ago

Use replacement characters for unmappable and malformed
data. This makes the reader more lenient and allows
cl-bench to compile and run out-of-the-box on current
ubuntu installations, for example. At some point, we
may want to add user-definable strategies for handling
unmappable/malformed data.

  • Property svn:eol-style set to LF
File size: 16.2 KB
Line 
1/*
2 * RandomAccessCharacterFile.java
3 *
4 * Copyright (C) 2008 Hideo at Yokohama
5 * Copyright (C) 2008 Erik Huelsmann
6 * $Id$
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version 2
11 * of the License, or (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
21 *
22 * As a special exception, the copyright holders of this library give you
23 * permission to link this library with independent modules to produce an
24 * executable, regardless of the license terms of these independent
25 * modules, and to copy and distribute the resulting executable under
26 * terms of your choice, provided that you also meet, for each linked
27 * independent module, the terms and conditions of the license of that
28 * module.  An independent module is a module which is not derived from
29 * or based on this library.  If you modify this library, you may extend
30 * this exception to your version of the library, but you are not
31 * obligated to do so.  If you do not wish to do so, delete this
32 * exception statement from your version.
33 */
34
35package org.armedbear.lisp.util;
36
37import java.io.IOException;
38import java.io.PushbackInputStream;
39import java.io.OutputStream;
40import java.io.RandomAccessFile;
41import java.io.PushbackReader;
42import java.io.Reader;
43import java.io.StringReader;
44import java.io.Writer;
45import java.nio.ByteBuffer;
46import java.nio.CharBuffer;
47import java.nio.channels.FileChannel;
48import java.nio.charset.Charset;
49import java.nio.charset.CharsetDecoder;
50import java.nio.charset.CharsetEncoder;
51import java.nio.charset.CoderResult;
52import java.nio.charset.CodingErrorAction;
53
54public class RandomAccessCharacterFile {
55
56    private class RandomAccessInputStream extends PushbackInputStream {
57
58        public RandomAccessInputStream() {
59            super(null);
60        }
61       
62  private byte[] read_buf = new byte[1];
63
64        @Override
65  public int read() throws IOException {
66      int len = read(read_buf);
67      if (len == 1) {
68    // byte is signed, char is unsigned, int is signed.
69    // buf can hold 0xff, we want it as 0xff in int, not -1.
70    return 0xff & (int) read_buf[0];
71      } else {
72    return -1;
73      }
74  }
75               
76  @Override
77        public int read(byte[] b, int off, int len) throws IOException {
78      return RandomAccessCharacterFile.this.read(b, off, len);
79  }
80
81        @Override
82        public void unread(int b) throws IOException {
83            RandomAccessCharacterFile.this.unreadByte((byte)b);
84        }
85
86        @Override
87        public void unread(byte[] b, int off, int len) throws IOException {
88            for (int i = 0; i < len; i++)
89                this.unread(b[off+i]);
90        }
91
92        @Override
93        public void unread(byte[] b) throws IOException {
94            this.unread(b, 0, b.length);
95        }
96
97        @Override
98        public int available() throws IOException {
99            return (int)(RandomAccessCharacterFile.this.length()
100                            - RandomAccessCharacterFile.this.position());
101        }
102
103        @Override
104        public synchronized void mark(int readlimit) {
105        }
106
107        @Override
108        public boolean markSupported() {
109            return false;
110        }
111
112        @Override
113        public synchronized void reset() throws IOException {
114            throw new IOException("Operation not supported");
115        }
116
117        @Override
118        public long skip(long n) throws IOException {
119            RandomAccessCharacterFile.this.position(RandomAccessCharacterFile.this.position()+n);
120            return n;
121        }
122
123        @Override
124        public int read(byte[] b) throws IOException {
125            return this.read(b, 0, b.length);
126        }
127
128  @Override
129  public void close() throws IOException {
130      RandomAccessCharacterFile.this.close();
131  }
132    }
133
134    private class RandomAccessOutputStream extends OutputStream {
135
136  private RandomAccessOutputStream() {
137  }
138
139  private byte[] buf = new byte[1];
140  public void write(int b) throws IOException {
141      buf[0] = (byte)b;
142      write(buf);
143  }
144
145  @Override
146      public void write(byte[] b, int off, int len) throws IOException {
147      RandomAccessCharacterFile.this.write(b, off, len);
148  }
149
150  @Override
151  public void flush() throws IOException {
152      RandomAccessCharacterFile.this.flush();
153  }
154
155  @Override
156  public void close() throws IOException {
157      RandomAccessCharacterFile.this.close();
158  }
159    }
160   
161    // dummy reader which we need to call the Pushback constructor
162    // because a null value won't work
163    private static Reader staticReader = new StringReader("");
164   
165    private class RandomAccessReader extends PushbackReader {
166
167  private RandomAccessReader() {
168            // because we override all methods of Pushbackreader,
169            // staticReader will never be referenced
170            super(staticReader);
171  }
172
173        @Override
174  public void close() throws IOException {
175      RandomAccessCharacterFile.this.close();
176  }
177       
178        private char[] read_buf = new char[1];
179
180        @Override
181        public int read() throws IOException {
182            int n = this.read(read_buf);
183           
184            if (n == 1)
185                return read_buf[0];
186            else
187                return -1;
188        }
189
190        @Override
191        public void unread(int c) throws IOException {
192            RandomAccessCharacterFile.this.unreadChar((char)c);
193        }
194
195        @Override
196        public void unread(char[] cbuf, int off, int len) throws IOException {
197            for (int i = 0; i < len; i++)
198                this.unread(cbuf[off+i]);
199        }
200
201        @Override
202        public void unread(char[] cbuf) throws IOException {
203            this.unread(cbuf, 0, cbuf.length);
204        }
205
206        @Override
207        public int read(CharBuffer target) throws IOException {
208            //FIXME: to be implemented
209            throw new IOException("Not implemented");
210        }
211
212        @Override
213        public int read(char[] cbuf) throws IOException {
214            return RandomAccessCharacterFile.this.read(cbuf, 0, cbuf.length);
215        }
216       
217
218       
219  @Override
220  public int read(char[] cb, int off, int len) throws IOException {
221      return RandomAccessCharacterFile.this.read(cb, off, len);
222  }
223    }
224
225    private class RandomAccessWriter extends Writer {
226
227  private RandomAccessWriter() {
228  }
229
230  public void close() throws IOException {
231      RandomAccessCharacterFile.this.close();
232  }
233
234  public void flush() throws IOException {
235      RandomAccessCharacterFile.this.flush();
236  }
237
238  @Override
239      public void write(char[] cb, int off, int len) throws IOException {
240      RandomAccessCharacterFile.this.write(cb, off, len);
241  }
242
243    }
244
245
246    final static int BUFSIZ = 4*1024; // setting this to a small value like 8 is helpful for testing.
247 
248    private RandomAccessWriter writer;
249    private RandomAccessReader reader;
250    private RandomAccessInputStream inputStream;
251    private RandomAccessOutputStream outputStream;
252    private FileChannel fcn;
253    private long fcnpos; /* where fcn is pointing now. */
254    private long fcnsize; /* the file size */
255 
256    private Charset cset;
257    private CharsetEncoder cenc;
258    private CharsetDecoder cdec;
259 
260    /**
261     * bbuf is treated as a cache of the file content.
262     * If it points to somewhere in the middle of the file, it holds the copy of the file content,
263     * even when you are writing a large chunk of data.  If you write in the middle of a file,
264     * bbuf first gets filled with contents of the data, and only after that any new data is
265     * written on bbuf.
266     * The exception is when you are appending data at the end of the file.
267     */
268    private ByteBuffer bbuf;
269    private boolean bbufIsDirty; /* whether bbuf holds data that must be written. */
270    private long bbufpos; /* where the beginning of bbuf is pointing in the file now. */
271
272    public RandomAccessCharacterFile(RandomAccessFile raf, String encoding) throws IOException {
273
274  fcn = raf.getChannel();
275  fcnpos = fcn.position();
276  fcnsize = fcn.size();
277 
278  cset = (encoding == null) ? Charset.defaultCharset() : Charset.forName(encoding);
279  cdec = cset.newDecoder();
280  cdec.onMalformedInput(CodingErrorAction.REPLACE); 
281  cdec.onUnmappableCharacter(CodingErrorAction.REPLACE); 
282  cenc = cset.newEncoder(); 
283   
284  bbuf = ByteBuffer.allocate(BUFSIZ);
285   
286  // there is no readable data available in the buffers.
287  bbuf.flip();
288   
289  // there is no write pending data in the buffers.
290  bbufIsDirty = false;
291   
292  bbufpos = fcn.position();
293
294  reader = new RandomAccessReader();
295  writer = new RandomAccessWriter();
296  inputStream = new RandomAccessInputStream();
297  outputStream = new RandomAccessOutputStream();
298    }
299 
300    public Writer getWriter() {
301  return writer;
302    }
303 
304    public PushbackReader getReader() {
305  return reader;
306    }
307 
308    public PushbackInputStream getInputStream() {
309  return inputStream;
310    }
311 
312    public OutputStream getOutputStream() {
313  return outputStream;
314    }
315 
316    public void close() throws IOException {
317  internalFlush(true);
318  fcn.close();
319    }
320 
321    public void flush() throws IOException {
322  internalFlush(false);
323    }
324
325    private int read(char[] cb, int off, int len) throws IOException {
326  CharBuffer cbuf = CharBuffer.wrap(cb, off, len);
327  boolean decodeWasUnderflow = false;
328  boolean atEof = false;
329  while ((cbuf.remaining() > 0) && dataIsAvailableForRead()
330         && ! atEof) {
331      if ((bbuf.remaining() == 0) || decodeWasUnderflow) {
332    // need to read from the file.
333    flushBbuf(); // in case bbuf is dirty.
334    // update bbufpos.
335    bbufpos += bbuf.position();
336    int partialBytes = bbuf.remaining(); // partialBytes > 0 happens when decodeWasUnderflow
337    // if reads and writes are mixed, we may need to seek first.
338    if (bbufpos + partialBytes != fcnpos) {
339        fcn.position(bbufpos + partialBytes);
340    }
341    // need to read data from file.
342    bbuf.compact();
343    //###FIXME: we're ignoring end-of-stream here!!!
344    atEof = (fcn.read(bbuf) == -1);
345    bbuf.flip();
346    fcnpos = bbufpos + bbuf.remaining();
347      }
348      CoderResult r = cdec.decode(bbuf, cbuf, pointingAtEOF() );
349      decodeWasUnderflow = (CoderResult.UNDERFLOW == r);
350  }
351  if (cbuf.remaining() == len) {
352      return -1;
353  } else {
354      return len - cbuf.remaining();
355  }
356    }
357
358    private boolean dataIsAvailableForRead() throws IOException {
359  return ((bbuf.remaining() > 0) || (fcn.position() < fcn.size()));
360    }
361 
362    private boolean pointingAtEOF() {
363  return (bbuf.remaining() == 0) && (fcnpos == fcnsize);
364    }
365
366    private void write(char[] cb, int off, int len) throws IOException {
367  CharBuffer cbuf = CharBuffer.wrap(cb, off, len);
368  encodeAndWrite(cbuf, false, false);
369    }
370
371    private void internalFlush(boolean endOfFile) throws IOException {
372  if (endOfFile) {
373      CharBuffer cbuf = CharBuffer.allocate(0);
374      encodeAndWrite(cbuf, true, endOfFile);
375  } else {
376      flushBbuf();
377  }
378    }
379
380    private void encodeAndWrite(CharBuffer cbuf, boolean flush, boolean endOfFile) throws IOException {
381  if (bbufpos == fcnsize) {
382      bbuf.clear();
383  }
384  while (cbuf.remaining() > 0) {
385      CoderResult r = cenc.encode(cbuf, bbuf, endOfFile);
386      bbufIsDirty = true;
387      long curpos = bbufpos + bbuf.position();
388      if (curpos > fcnsize) {
389    // the file is extended.
390    fcnsize = curpos;
391      }
392      if (CoderResult.OVERFLOW == r || bbuf.remaining() == 0) {
393    flushBbuf();
394    bbufpos += bbuf.limit();
395    bbuf.clear();
396    if (fcnpos < fcnsize) {
397        fcn.read(bbuf);
398        bbuf.flip();
399        fcnpos += bbuf.remaining();
400    }
401    // if we are at the end of file, bbuf is simply cleared.
402    // in that case, bbufpos + bbuf.position points to the EOF, not fcnpos.
403      }
404  }
405  if (bbuf.position() > 0 && bbufIsDirty && flush) {
406      flushBbuf();
407  }
408    }
409
410    public void position(long newPosition) throws IOException {
411  flushBbuf();
412  long bbufend = bbufpos + bbuf.limit();
413  if (newPosition >= bbufpos && newPosition < bbufend) {
414      // near seek. within existing data of bbuf.
415      bbuf.position((int)(newPosition - bbufpos));
416  } else {
417      // far seek. discard the buffer.
418      flushBbuf();
419      fcn.position(newPosition);
420      fcnpos = newPosition;
421      bbuf.clear();
422      bbuf.flip(); // "there is no useful data on this buffer yet."
423      bbufpos = fcnpos;
424  }
425    }
426 
427    public long position() throws IOException {
428  flushBbuf();
429  return bbufpos + bbuf.position(); // the logical position within the file.
430    }
431
432    public long length() throws IOException {
433  flushBbuf();
434  return fcn.size();
435    }
436       
437    private void flushBbuf() throws IOException {
438  if (bbufIsDirty) {
439      if (fcnpos != bbufpos) {
440    fcn.position(bbufpos);
441      }
442      bbuf.position(0);
443      if (bbufpos + bbuf.limit() > fcnsize) {
444    // the buffer is at the end of the file.
445    // area beyond fcnsize does not have data.
446    bbuf.limit((int)(fcnsize - bbufpos));
447      }
448      fcn.write(bbuf);
449      fcnpos = bbufpos + bbuf.limit();
450      bbufIsDirty = false;
451  }
452    }
453
454    public int read(byte[] b, int off, int len) throws IOException {
455  int pos = off;
456  boolean atEof = false;
457  while (pos - off < len && dataIsAvailableForRead()
458         && ! atEof) {
459      if (bbuf.remaining() == 0) {
460    // need to read from the file.
461    flushBbuf(); // in case bbuf is dirty.
462    // update bbufpos.
463    bbufpos += bbuf.limit();
464    // if reads and writes are mixed, we may need to seek first.
465    if (bbufpos != fcnpos) {
466        fcn.position(bbufpos);
467    }
468    // need to read data from file.
469    bbuf.clear();
470    atEof = (fcn.read(bbuf) == -1);
471    bbuf.flip();
472    fcnpos = bbufpos + bbuf.remaining();
473      }
474      int want = len - pos;
475      if (want > bbuf.remaining()) {
476    want = bbuf.remaining();
477      }
478      bbuf.get(b, pos, want);
479      pos += want;
480  }
481  return pos - off;
482    }
483       
484    // a method corresponding to the good ol' ungetc in C.
485    // This function may fail when using (combined) character codes that use
486    // escape sequences to switch between sub-codes.
487    // ASCII, ISO-8859 series, any 8bit code are OK, all unicode variations are OK,
488    // but applications of the ISO-2022 encoding framework can have trouble.
489    // Example of such code is ISO-2022-JP which is used in Japanese e-mail.
490    private CharBuffer singleCharBuf;
491    private ByteBuffer shortByteBuf;
492    public void unreadChar(char c) throws IOException {
493  // algorithm :
494  //  1. encode c into bytes, to find out how many bytes it corresponds to
495  //  2. move the position backwards that many bytes.
496  //  ** we stop here.  Don't bother to write the bytes to the buffer,
497  //     assuming that it is the same as the original data.
498  //     If we allow to write back different characters, the buffer must get 'dirty'
499  //     but that would require read/write permissions on files you use unreadChar,
500  //     even if you are just reading for some tokenizer.
501  //
502  //  So we don't do the following.
503  //  3. write the bytes.
504  //  4. move the position back again.
505  if (singleCharBuf == null) {
506      singleCharBuf = CharBuffer.allocate(1);
507      shortByteBuf = ByteBuffer.allocate((int)cenc.maxBytesPerChar());
508  }
509  singleCharBuf.clear();
510  singleCharBuf.append(c);
511  singleCharBuf.flip();
512  shortByteBuf.clear();
513  cenc.encode(singleCharBuf, shortByteBuf, false);
514  int n = shortByteBuf.position();
515  long pos = position() - n;
516  position(pos);
517    }
518 
519    public void unreadByte(byte b) throws IOException {
520  long pos = position() - 1;
521  position(pos);
522    }
523
524    private void write(byte[] b, int off, int len) throws IOException {
525  int pos = off;
526  while (pos < off + len) {
527      int want = len;
528      if (want > bbuf.remaining()) {
529    want = bbuf.remaining();
530      }
531      bbuf.put(b, pos, want);
532      pos += want;
533      bbufIsDirty = true;
534      long curpos = bbufpos + bbuf.position();
535      if (curpos > fcn.size()) {
536    // the file is extended.
537    fcnsize = curpos;
538      }
539      if (bbuf.remaining() == 0) {
540    flushBbuf();
541    bbufpos += bbuf.limit();
542    bbuf.clear();
543    if (fcn.position() < fcn.size()) {
544        bbufpos = fcn.position();
545        fcn.read(bbuf);
546        bbuf.flip();
547        fcnpos += bbuf.remaining();
548    }
549    // if we are at the end of file, bbuf is simply cleared.
550    // in that case, bbufpos + bbuf.position points to the EOF, not fcnpos.
551      }
552  }
553    }
554}
Note: See TracBrowser for help on using the repository browser.