NOTICE: This version of the NSF Unidata web site (archive.unidata.ucar.edu) is no longer being updated.
Current content can be found at unidata.ucar.edu.

To learn about what's going on, see About the Archive Site.

[IDV #CTI-404702]: Unix compress/uncompress for Java

Hi Jeffry:

Ive attached the source; its also available in the netccdf-java source release 
at 

  ftp://ftp.unidata.ucar.edu/pub/netcdf-java/v2.2.14/ncSrc-2.2.14.zip

It looks like we orginally got it from the HTTPClient package.

Enjoy.

Ticket Details
==================
Ticket ID: CTI-404702
Department: Support netCDF Java
Priority: Normal
Status: Open
/*
 * @(#)UncompressInputStream.java                       0.3-3 06/05/2001
 *
 *  This file is part of the HTTPClient package
 *  Copyright (C) 1996-2001 Ronald Tschalar
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free
 *  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
 *  MA 02111-1307, USA
 *
 *  For questions, suggestions, bug-reports, enhancement-requests etc.
 *  I may be contacted at:
 *
 *  ronald@xxxxxxxxxxxxx
 *
 *  The HTTPClient's home page is located at:
 *
 *  http://www.innovation.ch/java/HTTPClient/
 */

package ucar.unidata.io;

import java.io.*;


/**
 * This class decompresses an input stream containing data compressed with
 * the unix "compress" utility (LZC, a LZW variant). This code is based
 * heavily on the <var>unlzw.c</var> code in <var>gzip-1.2.4</var> (written
 * by Peter Jannesen) and the original compress code.
 *
 *
 *  This version has been modified from the original 0.3-3 version by the
 *  Unidata Program Center (support@xxxxxxxxxxxxxxxx) to make the constructor
 *  public and to fix a couple of bugs.
 *  Also:
 *   - markSupported() returns false
 *   - add uncompress() static method
 *
 * @version     0.3-3 06/05/2001
 * @author      Ronald Tschalar
 * @author      Unidata Program Center
 */
public class UncompressInputStream extends FilterInputStream {
  /**
   * @param is the input stream to decompress
   * @throws IOException if the header is malformed
   */
  public UncompressInputStream(InputStream is) throws IOException {
    super(is);
    parse_header();
  }


  byte[] one = new byte[1];

  public synchronized int read() throws IOException {
    int b = read(one, 0, 1);
    if (b == 1)
      return (one[0] & 0xff);
    else
      return -1;
  }


  // string table stuff
  private static final int TBL_CLEAR = 0x100;
  private static final int TBL_FIRST = TBL_CLEAR + 1;

  private int[] tab_prefix;
  private byte[] tab_suffix;
  private int[] zeros = new int[256];
  private byte[] stack;

  // various state
  private boolean block_mode;
  private int n_bits;
  private int maxbits;
  private int maxmaxcode;
  private int maxcode;
  private int bitmask;
  private int oldcode;
  private byte finchar;
  private int stackp;
  private int free_ent;

  // input buffer
  private byte[] data = new byte[10000];
  private int bit_pos = 0, end = 0, got = 0;
  private boolean eof = false;
  private static final int EXTRA = 64;


  public synchronized int read(byte[] buf, int off, int len)
      throws IOException {
    if (eof) return -1;
    int start = off;

/* Using local copies of various variables speeds things up by as
         * much as 30% !
         */
    int[] l_tab_prefix = tab_prefix;
    byte[] l_tab_suffix = tab_suffix;
    byte[] l_stack = stack;
    int l_n_bits = n_bits;
    int l_maxcode = maxcode;
    int l_maxmaxcode = maxmaxcode;
    int l_bitmask = bitmask;
    int l_oldcode = oldcode;
    byte l_finchar = finchar;
    int l_stackp = stackp;
    int l_free_ent = free_ent;
    byte[] l_data = data;
    int l_bit_pos = bit_pos;


// empty stack if stuff still left

    int s_size = l_stack.length - l_stackp;
    if (s_size > 0) {
      int num = (s_size >= len) ? len : s_size;
      System.arraycopy(l_stack, l_stackp, buf, off, num);
      off += num;
      len -= num;
      l_stackp += num;
    }

    if (len == 0) {
      stackp = l_stackp;
      return off - start;
    }


// loop, filling local buffer until enough data has been decompressed

    main_loop: do {
      if (end < EXTRA) fill();

      int bit_in = (got > 0) ? (end - end % l_n_bits) << 3 :
          (end << 3) - (l_n_bits - 1);

      while (l_bit_pos < bit_in) {
        // handle 1-byte reads correctly
        if (len == 0) {
          n_bits = l_n_bits;
          maxcode = l_maxcode;
          maxmaxcode = l_maxmaxcode;
          bitmask = l_bitmask;
          oldcode = l_oldcode;
          finchar = l_finchar;
          stackp = l_stackp;
          free_ent = l_free_ent;
          bit_pos = l_bit_pos;

          return off - start;
        }

        // check for code-width expansion

        if (l_free_ent > l_maxcode) {
          int n_bytes = l_n_bits << 3;
          l_bit_pos = (l_bit_pos - 1) +
              n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;

          l_n_bits++;
          l_maxcode = (l_n_bits == maxbits) ? l_maxmaxcode :
              (1 << l_n_bits) - 1;

          if (debug)
            System.err.println("Code-width expanded to " + l_n_bits);

          l_bitmask = (1 << l_n_bits) - 1;
          l_bit_pos = resetbuf(l_bit_pos);
          continue main_loop;
        }


        // read next code

        int pos = l_bit_pos >> 3;
        int code = (((l_data[pos] & 0xFF) | ((l_data[pos + 1] & 0xFF) << 8) |
            ((l_data[pos + 2] & 0xFF) << 16))
            >> (l_bit_pos & 0x7)) & l_bitmask;
        l_bit_pos += l_n_bits;


        // handle first iteration

        if (l_oldcode == -1) {
          if (code >= 256)
            throw new IOException("corrupt input: " + code +
                " > 255");
          l_finchar = (byte) (l_oldcode = code);
          buf[off++] = l_finchar;
          len--;
          continue;
        }


        // handle CLEAR code

        if (code == TBL_CLEAR && block_mode) {
          System.arraycopy(zeros, 0, l_tab_prefix, 0, zeros.length);
          l_free_ent = TBL_FIRST - 1;

          int n_bytes = l_n_bits << 3;
          l_bit_pos = (l_bit_pos - 1) +
              n_bytes - (l_bit_pos - 1 + n_bytes) % n_bytes;
          l_n_bits = INIT_BITS;
          l_maxcode = (1 << l_n_bits) - 1;
          l_bitmask = l_maxcode;

          if (debug) System.err.println("Code tables reset");

          l_bit_pos = resetbuf(l_bit_pos);
          continue main_loop;
        }


        // setup

        int incode = code;
        l_stackp = l_stack.length;


        // Handle KwK case

        if (code >= l_free_ent) {
          if (code > l_free_ent)
            throw new IOException("corrupt input: code=" + code +
                ", free_ent=" + l_free_ent);

          l_stack[--l_stackp] = l_finchar;
          code = l_oldcode;
        }


        // Generate output characters in reverse order

        while (code >= 256) {
          l_stack[--l_stackp] = l_tab_suffix[code];
          code = l_tab_prefix[code];
        }
        l_finchar = l_tab_suffix[code];
        buf[off++] = l_finchar;
        len--;


        // And put them out in forward order

        s_size = l_stack.length - l_stackp;
        int num = (s_size >= len) ? len : s_size;
        System.arraycopy(l_stack, l_stackp, buf, off, num);
        off += num;
        len -= num;
        l_stackp += num;


        // generate new entry in table

        if (l_free_ent < l_maxmaxcode) {
          l_tab_prefix[l_free_ent] = l_oldcode;
          l_tab_suffix[l_free_ent] = l_finchar;
          l_free_ent++;
        }


        // Remember previous code

        l_oldcode = incode;


        // if output buffer full, then return

        if (len == 0) {
          n_bits = l_n_bits;
          maxcode = l_maxcode;
          bitmask = l_bitmask;
          oldcode = l_oldcode;
          finchar = l_finchar;
          stackp = l_stackp;
          free_ent = l_free_ent;
          bit_pos = l_bit_pos;

          return off - start;
        }
      }

      l_bit_pos = resetbuf(l_bit_pos);
    } while (got > 0);

    n_bits = l_n_bits;
    maxcode = l_maxcode;
    bitmask = l_bitmask;
    oldcode = l_oldcode;
    finchar = l_finchar;
    stackp = l_stackp;
    free_ent = l_free_ent;
    bit_pos = l_bit_pos;

    eof = true;
    return off - start;
  }


  /**
   * Moves the unread data in the buffer to the beginning and resets
   * the pointers.
   */
  private final int resetbuf(int bit_pos) {
    int pos = bit_pos >> 3;
    System.arraycopy(data, pos, data, 0, end - pos);
    end -= pos;
    return 0;
  }


  private final void fill() throws IOException {
    got = in.read(data, end, data.length - 1 - end);
    if (got > 0) end += got;
  }


  public synchronized long skip(long num) throws IOException {
    byte[] tmp = new byte[(int) num];
    int got = read(tmp, 0, (int) num);

    if (got > 0)
      return (long) got;
    else
      return 0L;
  }


  public synchronized int available() throws IOException {
    if (eof) return 0;

    return in.available();
  }


  private static final int LZW_MAGIC = 0x1f9d;
  private static final int MAX_BITS = 16;
  private static final int INIT_BITS = 9;
  private static final int HDR_MAXBITS = 0x1f;
  private static final int HDR_EXTENDED = 0x20;
  private static final int HDR_FREE = 0x40;
  private static final int HDR_BLOCK_MODE = 0x80;

  private void parse_header() throws IOException {
// read in and check magic number

    int t = in.read();
    if (t < 0) throw new EOFException("Failed to read magic number");
    int magic = (t & 0xff) << 8;
    t = in.read();
    if (t < 0) throw new EOFException("Failed to read magic number");
    magic += t & 0xff;
    if (magic != LZW_MAGIC)
      throw new IOException("Input not in compress format (read " +
          "magic number 0x" +
          Integer.toHexString(magic) + ")");


// read in header byte

    int header = in.read();
    if (header < 0) throw new EOFException("Failed to read header");

    block_mode = (header & HDR_BLOCK_MODE) > 0;
    maxbits = header & HDR_MAXBITS;

    if (maxbits > MAX_BITS)
      throw new IOException("Stream compressed with " + maxbits +
          " bits, but can only handle " + MAX_BITS +
          " bits");

    if ((header & HDR_EXTENDED) > 0)
      throw new IOException("Header extension bit set");

    if ((header & HDR_FREE) > 0)
      throw new IOException("Header bit 6 set");

    if (debug) {
      System.err.println("block mode: " + block_mode);
      System.err.println("max bits:   " + maxbits);
    }


// initialize stuff

    maxmaxcode = 1 << maxbits;
    n_bits = INIT_BITS;
    maxcode = (1 << n_bits) - 1;
    bitmask = maxcode;
    oldcode = -1;
    finchar = 0;
    free_ent = block_mode ? TBL_FIRST : 256;

    tab_prefix = new int[1 << maxbits];
    tab_suffix = new byte[1 << maxbits];
    stack = new byte[1 << maxbits];
    stackp = stack.length;

    for (int idx = 255; idx >= 0; idx--)
      tab_suffix[idx] = (byte) idx;
  }

  /**
   * This stream does not support mark/reset on the stream.
   *
   * @return false
   */
  public boolean markSupported() {
    return false;
  }

  static public void uncompress( String fileInName, FileOutputStream out) 
throws IOException {
    long start = System.currentTimeMillis();

    InputStream in = new UncompressInputStream(  new 
FileInputStream(fileInName));

    int total = 0;
    byte[] buffer = new byte[100000];
    while (true) {
      int bytesRead = in.read(buffer);
      if (bytesRead == -1) break;
      out.write(buffer, 0, bytesRead);
      total += bytesRead;
    }
    in.close();
    out.close();

    if (debugTiming) {
      long end = System.currentTimeMillis();
      System.err.println("Decompressed " + total + " bytes");
      System.err.println("Time: " + (end - start) / 1000. + " seconds");
    }
  }


  private static final boolean debug = false, debugTiming = false;

  public static void main(String args[]) throws Exception {
    if (args.length != 1) {
      System.err.println("Usage: UncompressInputStream <file>");
      System.exit(1);
    }

    InputStream in =
        new UncompressInputStream(new FileInputStream(args[0]));

    byte[] buf = new byte[100000];
    int tot = 0;
    long beg = System.currentTimeMillis();

    while (true) {
      int got = in.read(buf);
      if (got < 0) break;
      System.out.write(buf, 0, got);
      tot += got;
    }

    long end = System.currentTimeMillis();
    System.err.println("Decompressed " + tot + " bytes");
    System.err.println("Time: " + (end - beg) / 1000. + " seconds");
  }
}

  • 2006 messages navigation, sorted by:
    1. Thread
    2. Subject
    3. Author
    4. Date
    5. ↑ Table Of Contents
  • Search the netcdf-java archives: