diff options
author | Ivo Hinkelmann <ihi@openoffice.org> | 2006-08-01 11:40:14 +0000 |
---|---|---|
committer | Ivo Hinkelmann <ihi@openoffice.org> | 2006-08-01 11:40:14 +0000 |
commit | 710eaafbd576b7e6f8e37171b3bce42d093bc7ca (patch) | |
tree | 39997955e89ab2803bd8564be7dfaba4d9b296df /xmerge/source/aportisdoc | |
parent | d09adcf061f91e15fdf4ac63f8929d05bb266811 (diff) |
INTEGRATION: CWS pj57 (1.2.24); FILE MERGED
2006/07/26 11:02:09 pjanik 1.2.24.1: #i61626#: Remove deprecated SISSL license.
Diffstat (limited to 'xmerge/source/aportisdoc')
-rw-r--r-- | xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java | 341 |
1 files changed, 299 insertions, 42 deletions
diff --git a/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java b/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java index c4ff9e3b1dc4..9a6ca564d796 100644 --- a/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java +++ b/xmerge/source/aportisdoc/java/org/openoffice/xmerge/converter/xml/sxw/aportisdoc/DocDecoder.java @@ -1,55 +1,35 @@ -/************************************************************************ +/************************************************************************* * - * The Contents of this file are made available subject to the terms of - * either of the following licenses + * OpenOffice.org - a multi-platform office productivity suite * - * - GNU Lesser General Public License Version 2.1 - * - Sun Industry Standards Source License Version 1.1 + * $RCSfile: DocDecoder.java,v $ * - * Sun Microsystems Inc., October, 2000 + * $Revision: 1.3 $ * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA + * last change: $Author: ihi $ $Date: 2006-08-01 12:40:14 $ * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. + * The Contents of this file are made available subject to + * the terms of GNU Lesser General Public License Version 2.1. * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2005 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. * - * Sun Industry Standards Source License Version 1.1 - * ================================================= - * The contents of this file are subject to the Sun Industry Standards - * Source License Version 1.1 (the "License"); You may not use this file - * except in compliance with the License. You may obtain a copy of the - * License at http://www.openoffice.org/license.html. - * - * Software provided under this License is provided on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, - * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, - * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. - * See the License for the specific provisions governing your rights and - * obligations concerning the Software. - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA * ************************************************************************/ @@ -330,3 +310,280 @@ final class DocDecoder implements DocConstants { } } +package org.openoffice.xmerge.converter.xml.sxw.aportisdoc; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; +import java.util.ArrayList; + +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.util.Resources; +import org.openoffice.xmerge.util.Debug; + +/** + * This class is used by {@link + * org.openoffice.xmerge.converter.xml.sxw.DocumentDeserializerImpl} + * to decode the AportisDoc format. It currently decodes + * the text content into a single <code>String</code> object. + * + * @author Herbie Ong + */ +final class DocDecoder implements DocConstants { + + /** For decoding purposes. */ + private final static int COUNT_BITS = 3; + + /** Resources object for I18N. */ + private Resources res = null; + + + /** + * Default constructor creates a header and a text buffer + * for holding all the text in the AportisDoc database. + */ + DocDecoder() { + res = Resources.getInstance(); + } + + + /** + * Decode the text records into a single <code>String</code> + * of text content. + * + * @param Record <code>Record</code> array holding AportisDoc + * contents. + * + * @throws IOException If any I/O error occurs. + */ + String parseRecords(Record[] recs) throws IOException { + + // read the header record + HeaderInfo header = readHeader(recs[0].getBytes()); + + dumpHeader(header); + + // store all the characters in textBuffer + StringBuffer textBuffer = new StringBuffer(header.textLen); + + switch (header.version) { + + case COMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = decompress(recs[i].getBytes(), + header.textRecordSize); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + case UNCOMPRESSED: + for (int i = 1; i <= header.textRecordCount; i++) { + + byte[] bytes = recs[i].getBytes(); + log("processing " + bytes.length + " bytes"); + String str = new String(bytes, ENCODING); + textBuffer.append(str); + } + + break; + + default: + throw new IOException(res.getString("UNKNOWN_DOC_VERSION")); + + } + + return textBuffer.toString(); + } + + + /** + * <p>Decompress the <code>byte</code> array.</p> + * + * <p>The resulting uncompressed <code>byte</code> array should + * be within <code>textRecordSize</code> length, definitely + * within twice the size it claims, else treat it as a problem + * with the encoding of that PDB and throw + * <code>IOException</code>.</p> + * + * @param bytes Compressed <code>byte</code> array. + * @param textRecordSize Size of uncompressed + * <code>byte</code> array. + * + * @throws IOException If <code>textRecordSize</code> < + * <code>cBytes.length</code>. + */ + private byte[] decompress(byte[] cBytes, int textRecordSize) + throws IOException { + + // create byte array for storing uncompressed bytes + // it should be within textRecordSize range, definitely + // within twice of textRecordSize! if not, then + // an ArrayIndexOutOfBoundsException will get thrown, + // and it should be converted into an IOException, and + // treat it as a conversion error. + byte[] uBytes = new byte[textRecordSize*2]; + + int up = 0; + int cp = 0; + + try { + + while (cp < cBytes.length) { + + int c = cBytes[cp++] & 0xff; + + // codes 1...8 mean copy that many bytes + if (c > 0 && c < 9) { + + while (c-- > 0) + uBytes[up++] = cBytes[cp++]; + } + + // codes 0, 9...0x7F represent themselves + else if (c < 0x80) { + uBytes[up++] = (byte) c; + } + + // codes 0xC0...0xFF represent "space + ascii char" + else if (c >= 0xC0) { + uBytes[up++] = (byte) ' '; + uBytes[up++] = (byte) (c ^ 0x80); + } + + // codes 0x80...0xBf represent sequences + else { + c <<= 8; + c += cBytes[cp++] & 0xff; + int m = (c & 0x3fff) >> COUNT_BITS; + int n = c & ((1 << COUNT_BITS) - 1); + n += COUNT_BITS; + while (n-- > 0) { + uBytes[up] = uBytes[up - m]; + up++; + } + } + } + + } catch (ArrayIndexOutOfBoundsException e) { + + throw new IOException( + res.getString("DOC_TEXT_RECORD_SIZE_EXCEEDED")); + } + + // note that ubytes may be larger that the amount of + // uncompressed bytes, so trim it to another byte array + // with the exact size. + byte[] textBytes = new byte[up]; + System.arraycopy(uBytes, 0, textBytes, 0, up); + + return textBytes; + } + + + /** + * Read the header <code>byte</code> array. + * + * @param bytes <code>byte</code> array containing header + * record data. + * + * @return <code>HeaderInfo</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private HeaderInfo readHeader(byte[] bytes) throws IOException { + + HeaderInfo header = new HeaderInfo(); + + ByteArrayInputStream bis = new ByteArrayInputStream(bytes); + DataInputStream dis = new DataInputStream(bis); + + // Normally the first 2 bytes comprised of the version + // which should either be COMPRESSED or UNCOMPRESSED + // SmartDoc/Quickword would add a 0x01 to the first + // byte, thus their version would be 0x0101 for UNCOMPRESSED + // instead of 0x0001 and 0x0102 for UNCOMPRESSED instead of + // 0x0002. + + dis.readByte(); + header.version = dis.readByte(); + + // read extra 2 unused bytes + dis.readShort(); + + // Read the text length, this should be unsigned 4 bytes. + // We could store the read value into a long, but then + // our current buffer limit is the max positive of an int. + // That is a large enough limit, thus we shall stay with + // storing the value in an int. If it exceeds, then + // an IOException should be thrown. + header.textLen = dis.readInt(); + if (header.textLen < 0) { + throw new IOException(res.getString("DOC_TEXT_LENGTH_EXCEEDED")); + } + + // read the number of records - unsigned 2 bytes + header.textRecordCount = ((int) dis.readShort()) & 0x0000ffff; + + // read the record size - unsigned 2 bytes + header.textRecordSize = ((int) dis.readShort()) & 0x0000ffff; + + // read extra 4 unused bytes + dis.readInt(); + + return header; + } + + + /** + * Prints out header info into log. Used for debugging purposes only. + * + * @param header <code>HeaderInfo</code> structure. + */ + private void dumpHeader(HeaderInfo header) { + + log("<DOC_INFO "); + log("version=\"" + header.version + "\" "); + log("text-length=\"" + header.textLen + "\" "); + log("number-of-records=\"" + header.textRecordCount + "\" "); + log("record-size=\"" + header.textRecordSize + "\" />"); + } + + + /** + * Sends message to the log object. + * + * @param str Debug string message. + */ + private void log(String str) { + Debug.log(Debug.TRACE, str); + } + + + /** + * Inner class to store AportisDoc header information. + */ + private class HeaderInfo { + + /** length of text section */ + int textLen = 0; + + /** number of text records */ + int textRecordCount = 0; + + /** + * size of a text record. This is normally the same as + * TEXT_RECORD_SIZE, but some applications may modify this. + */ + int textRecordSize = 0; + + /** compression type */ + int version = 0; + } +} + |