diff options
author | Jens-Heiner Rechtien <hr@openoffice.org> | 2005-10-24 16:54:34 +0000 |
---|---|---|
committer | Jens-Heiner Rechtien <hr@openoffice.org> | 2005-10-24 16:54:34 +0000 |
commit | cfaf0fd33c7d546da92c88505515a8672a43d6d5 (patch) | |
tree | 6b2285550f682d5f79cfe65097c8fb1887aa070a /xmerge/source/wordsmith | |
parent | 25138c93a590b8c4376e3b66a6f135bb0cf1876c (diff) |
INTEGRATION: CWS lo8 (1.1.2); FILE ADDED
2005/06/08 16:30:33 lo 1.1.2.1: restructuring of project and fix for #i44847#
Diffstat (limited to 'xmerge/source/wordsmith')
3 files changed, 1284 insertions, 0 deletions
diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java new file mode 100644 index 000000000000..61e22e385bfd --- /dev/null +++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentDeserializerImpl.java @@ -0,0 +1,593 @@ +/************************************************************************ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.*; + +import java.io.IOException; +import java.util.Enumeration; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentDeserializer; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PdbDecoder; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; + +import java.util.Vector; +import java.io.ByteArrayInputStream; + +import org.openoffice.xmerge.converter.xml.*; +import org.openoffice.xmerge.util.Debug; +import org.openoffice.xmerge.util.XmlUtil; + +/** + * <p>WordSmith implementation of + * org.openoffice.xmerge.DocumentDeserializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * The <code>deserialize</code> method uses a + * <code>DocDecoder</code> to read the WordSmith format into a + * <code>String</code> object, then it calls <code>buildDocument</code> + * to create a <code>SxwDocument</code> object from it. + * + * @author Herbie Ong, David Proulx + */ +public final class DocumentDeserializerImpl +implements DOCConstants, OfficeConstants, DocumentDeserializer { + + /** A Decoder object for decoding WordSmith format. */ + private WSDecoder decoder = null; + + WseFontTable fontTable = null; + WseColorTable colorTable = null; + StyleCatalog styleCat = null; + StyleCatalog oldStyleCat = null; + + /** A <code>ConvertData</code> object assigned to this object. */ + private ConvertData cd = null; + + + /** + * Constructor that assigns the given <code>ConvertData</code> + * to the object. + * + * @param cd A <code>ConvertData</code> object to read data for + * the conversion process by the deserialize method. + */ + public DocumentDeserializerImpl(ConvertData cd) { + this.cd = cd; + } + + + /** + * Convert the given <code>ConvertData</code> into a + * <code>SxwDocument</code> object. + * + * @return Resulting <code>Document</code> object. + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public Document deserialize() throws ConvertException, + IOException { + return deserialize(null, cd); + } + + + public Document deserialize(Document origDoc, ConvertData cd) + throws IOException { + + Document doc = null; + PalmDocument palmDoc = null; + Enumeration e = cd.getDocumentEnumeration(); + + while(e.hasMoreElements()) { + palmDoc = (PalmDocument) e.nextElement(); + PalmDB pdb = palmDoc.getPdb(); + Record[] recs = pdb.getRecords(); + decoder = new WSDecoder(); + Wse[] b = decoder.parseDocument(recs); + String docName = palmDoc.getName(); + doc = buildDocument(docName, b, origDoc); + } + return doc; + } + + + /** + * Temporary method to read existing <code>StyleCatalog</code> + * as a starting point. + * + * @param parentDoc The parent <code>Document</code>. + */ + private void readStyleCatalog(Document parentDoc) { + Element rootNode = null; + try { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + parentDoc.write(bos); + SxwDocument sxwDoc = new SxwDocument("old"); + sxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + String families[] = new String[3]; + families[0] = "text"; + families[1] = "paragraph"; + families[2] = "paragraph"; + Class classes[] = new Class[3]; + classes[0] = TextStyle.class; + classes[1] = ParaStyle.class; + classes[2] = TextStyle.class; + + NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + oldStyleCat.add(nl.item(0), families, classes, null, false); + + } catch (Exception e) { + Debug.log(Debug.ERROR, "", e); + } + + } + + + /** + * Given an array of paragraph <code>Style</code> objects, see if + * there is exactly one which matches the text formatting + * <code>Style</code> of <code>tStyle</code>. + * + * @param paraStyles An array of paragraph <code>Style</code> + * objects. + * @param tStyle Text <code>Style</code> to match. + * + * @return The paragraph <code>Style</code> that matches. + */ + private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) { + int matchIndex = -1; + int matchCount = 0; + Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle); + if (txtMatches.length >= 1) { + for (int j = 0; j < txtMatches.length; j++) { + TextStyle t = (TextStyle)txtMatches[j]; + + if (!t.getFamily().equals("paragraph")) + continue; + + for (int k = 0; k < paraStyles.length; k++) { + if (t.getName().equals(paraStyles[k].getName())) { + matchCount++; + matchIndex = k; + } + } + } + } + if (matchCount == 1) + return (ParaStyle)paraStyles[matchIndex]; + else return null; + } + + + /** + * Take a <code>String</code> of text and turn it into a sequence + * of <code>Node</code> objects. + * + * @param text <code>String</code> of text. + * @param parentDoc Parent <code>Document</code>. + * + * @return Array of <code>Node</code> objects. + */ + private Node[] parseText(String text, org.w3c.dom.Document parentDoc) { + Vector nodeVec = new Vector(); + + // Break up the text from the WordSmith text run into Open + // Office text runs. There may be more runs in OO because + // runs of 2 or more spaces map to nodes. + while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) { + + // Find the indices of tabs and multiple spaces, and + // figure out which of them occurs first in the string. + int spaceIndex = text.indexOf(" "); + int tabIndex = text.indexOf("\t"); + if ((spaceIndex == -1) && (tabIndex == -1)) + break; // DJP This should not be necessary. What is wrong + // with the while() stmt up above? + int closerIndex; // Index of the first of these + if (spaceIndex == -1) + closerIndex = tabIndex; + else if (tabIndex == -1) + closerIndex = spaceIndex; + else + closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex; + + // If there is any text prior to the first occurrence of a + // tab or spaces, create a text node from it, then chop it + // off the string we're working with. + if (closerIndex > 0) { + String beginningText = text.substring(0, closerIndex); + Text textNode = parentDoc.createTextNode(beginningText); + nodeVec.addElement(textNode); + log("<TEXT>"); + log(beginningText); + log("</TEXT>"); + } + text = text.substring(closerIndex); + + // Handle either tab character or space sequence by creating + // an element for it, and then chopping out the text that + // represented it in "text". + if (closerIndex == tabIndex) { + Element tabNode = parentDoc.createElement(TAG_TAB_STOP); + nodeVec.add(tabNode); + text = text.substring(1); // tab is always a single character + log("<TAB/>"); + } else { + // Compute length of space sequence. + int nrSpaces = 2; + while ((nrSpaces < text.length()) + && text.substring(nrSpaces, nrSpaces + 1).equals(" ")) + nrSpaces++; + + Element spaceNode = parentDoc.createElement(TAG_SPACE); + spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString()); + nodeVec.add(spaceNode); + text = text.substring(nrSpaces); + log("<SPACE count=\"" + nrSpaces + "\" />"); + } + } + + // No more tabs or space sequences. If there's any remaining + // text create a text node for it. + if (text.length() > 0) { + Text textNode = parentDoc.createTextNode(text); + nodeVec.add(textNode); + log("<TEXT>"); + log(text); + log("</TEXT>"); + } + + // Now create and populate an array to return the nodes in. + Node nodes[] = new Node[nodeVec.size()]; + for (int i = 0; i < nodeVec.size(); i++) + nodes[i] = (Node)nodeVec.elementAt(i); + return nodes; + } + + + /** + * Parses the text content of a WordSmith format and builds a + * <code>SXWDocument</code>. + * + * @param docName <code>Document</code> name + * @param str Text content of WordSmith format + * + * @return Resulting <code>SXWDocument</code> object. + * + * @throws IOException If any I/O error occurs. + */ + private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc) + throws IOException { + + // create minimum office xml document. + SxwDocument sxwDoc = new SxwDocument(docName); + sxwDoc.initContentDOM(); + + org.w3c.dom.Document doc = sxwDoc.getContentDOM(); + + // Grab hold of the office:body tag, + // Assume there should be one. + // This is where top level paragraphs will append to. + NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY); + Node bodyNode = list.item(0); + + styleCat = new StyleCatalog(50); + oldStyleCat = new StyleCatalog(50); + if (origDoc != null) + readStyleCatalog(origDoc); + + Element currPara = null; + ParaStyle currParaStyle = null; + int newTextStyleNr = 0; + int newParaStyleNr = 0; + + // Now write out the document body by running through + // the list of WordSmith elements and processing each one + // in turn. + for (int i = 0; i < data.length; i++) { + + if (data[i].getClass() == WsePara.class) { + + currPara = doc.createElement(TAG_PARAGRAPH); + log("</PARA>"); + log("<PARA>"); + + WsePara p = (WsePara)data[i]; + + // Save info about the first text run, if there is one. + WseTextRun firstTextRun = null; + + if ((data.length >= i + 2) + && (data[i+1].getClass() == WseTextRun.class)) + firstTextRun = (WseTextRun)data[i+1]; + + Style matches[] = oldStyleCat.getMatching(p.makeStyle()); + + // See if we can find a unique match in the catalog + // of existing styles from the original document. + ParaStyle pStyle = null; + if (matches.length == 1) { + pStyle = (ParaStyle)matches[0]; + log("using an existing style"); + } else if ((matches.length > 1) && (firstTextRun != null)) { + pStyle = matchParaByText(matches, firstTextRun.makeStyle()); + log("resolved a para by looking @ text"); + } + + // If nothing found so far, try looking in the catalog + // of newly-created styles. + // DJP FIXME: if we need to add two para styles with the + // same para formatting info but different default text + // styles, this won't work! + if (pStyle == null) { + log("had " + matches.length + " matches in old catalog"); + matches = styleCat.getMatching(p.makeStyle()); + if (matches.length == 0) { + pStyle = p.makeStyle(); + String newName = new String("PPP" + ++newParaStyleNr); + pStyle.setName(newName); + styleCat.add(pStyle); + // DJP: write in the text format info here + log("created a new style"); + } else if (matches.length == 1) { + pStyle = (ParaStyle)matches[0]; + log("re-using a new style"); + } else if (firstTextRun != null) { + pStyle = matchParaByText(matches, firstTextRun.makeStyle()); + if (pStyle != null) { + log("resolved a (new) para by looking @ text"); + } else + log("Hey this shouldn't happen! - nr of matches is " + + matches.length); + } + } + + if (pStyle == null) + log("Unable to figure out a para style"); + + // Figured out a style to use. Specify the style in this + // paragraph's attributes. + currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName()); + + bodyNode.appendChild(currPara); + currParaStyle = pStyle; + } else if (data[i].getClass() == WseTextRun.class) { + WseTextRun tr = (WseTextRun)data[i]; + TextStyle trStyle = null; + Node trNodes[] = parseText(tr.getText(), doc); + + // First see if the formatting of this text run matches + // the default text formatting for this paragraph. If + // it does, then just make the text node(s) children of + // the current paragraph. + Style[] cps = new Style[1]; + cps[0] = currParaStyle; + if (matchParaByText(cps, tr.makeStyle()) != null) { + for (int ii = 0; ii < trNodes.length; ii++) { + currPara.appendChild(trNodes[ii]); + } + continue; + } + + // Check for existing, matching styles in the old style + // catalog. If exactly one is found, use it. Otherwise, + // check the new style catalog, and either use the style + // found or add this new one to it. + Style matches[] = oldStyleCat.getMatching(tr.makeStyle()); + if (matches.length == 1) + trStyle = (TextStyle)matches[0]; + else { + matches = styleCat.getMatching(tr.makeStyle()); + if (matches.length == 0) { + trStyle = tr.makeStyle(); + String newName = new String("TTT" + ++newTextStyleNr); + trStyle.setName(newName); + styleCat.add(trStyle); + } else if (matches.length == 1) + trStyle = (TextStyle)matches[0]; + else + log("multiple text style matches from new catalog"); + } + + // Create a text span node, set the style attribute, make the + // text node(s) its children, and append it to current paragraph's + // list of children. + Element textSpanNode = doc.createElement(TAG_SPAN); + textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName()); + for (int ii = 0; ii < trNodes.length; ii++) { + textSpanNode.appendChild(trNodes[ii]); + } + currPara.appendChild(textSpanNode); + log("</SPAN>"); + } + + else if (data[i].getClass() == WseFontTable.class) { + fontTable = (WseFontTable)data[i]; + } + + else if (data[i].getClass() == WseColorTable.class) { + colorTable = (WseColorTable)data[i]; + } + } + + + //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT); + NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT); + Node rootNode = r.item(0); + + // read the original document + org.w3c.dom.NodeList nl; + if (origDoc != null) { + java.io.ByteArrayOutputStream bos = new java.io.ByteArrayOutputStream(); + origDoc.write(bos); + SxwDocument origSxwDoc = new SxwDocument("old"); + origSxwDoc.read(new ByteArrayInputStream(bos.toByteArray())); + org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM(); + + XmlUtil xu = new XmlUtil(); + org.w3c.dom.DocumentFragment df; + org.w3c.dom.Node newNode; + + // copy font declarations from original document to the new document + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + // copy style catalog from original document to the new document + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + + nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + df = doc.createDocumentFragment(); + newNode = xu.deepClone(df, nl.item(0)); + rootNode.insertBefore(newNode, bodyNode); + } + + // Original document not specified. We need to add font declarations. + // DJP: this might just be for debugging. Merger will probably put + // the "real" ones in. + // DJP: if really doing it this way, do it right: gather font names + // from style catalog(s). + else { + org.w3c.dom.Node declNode; + + log("<FONT-DECLS/>"); + + declNode = doc.createElement(TAG_OFFICE_FONT_DECLS); + rootNode.insertBefore(declNode, bodyNode); + org.w3c.dom.Element fontNode; + + fontNode = doc.createElement(TAG_STYLE_FONT_DECL); + fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial"); + fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial"); + fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); + declNode.appendChild(fontNode); + + fontNode = doc.createElement(TAG_STYLE_FONT_DECL); + fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso"); + fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso"); + fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable"); + declNode.appendChild(fontNode); + } + + + // Now add any new styles we have created in this document. + nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + Node autoStylesNode = nl.item(0); + if (autoStylesNode == null) { + autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES); + log("<OFFICE-AUTOMATIC-STYLES/>"); + rootNode.insertBefore(autoStylesNode, bodyNode); + } + + Node newStyleCatNode = styleCat.writeNode(doc, "dummy"); + nl = newStyleCatNode.getChildNodes(); + int nNodes = nl.getLength(); + for (int i = 0; i < nNodes; i++) { + autoStylesNode.appendChild(nl.item(0)); + } + + oldStyleCat.dumpCSV(true); + styleCat.dumpCSV(true); + return sxwDoc; + } + + + /** + * Sends message to the log object. + * + * @param str Debug message. + */ + private void log(String str) { + + Debug.log(Debug.TRACE, str); + } + + + /* + public static void main(String args[]) { + + // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream()); + + Node nodes[] = parseText("Tab here:\tThen some more text"); + } +*/ +} + diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java new file mode 100644 index 000000000000..d3ac39ab559a --- /dev/null +++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentMergerImpl.java @@ -0,0 +1,127 @@ +/************************************************************************ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.openoffice.xmerge.DocumentMerger; +import org.openoffice.xmerge.MergeException; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.Difference; +import org.openoffice.xmerge.merger.NodeMergeAlgorithm; +import org.openoffice.xmerge.merger.Iterator; +import org.openoffice.xmerge.merger.DiffAlgorithm; +import org.openoffice.xmerge.merger.diff.ParaNodeIterator; +import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm; +import org.openoffice.xmerge.merger.merge.DocumentMerge; +import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge; +import org.openoffice.xmerge.util.Debug; + + +/** + * Wordsmith implementation of <code>DocumentMerger</code> + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + */ +public class DocumentMergerImpl implements DocumentMerger { + + private ConverterCapabilities cc_; + private org.openoffice.xmerge.Document orig = null; + + public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) { + cc_ = cc; + this.orig = doc; + } + + public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException { + + SxwDocument wdoc1 = (SxwDocument) orig; + SxwDocument wdoc2 = (SxwDocument) modifiedDoc; + + Document doc1 = wdoc1.getContentDOM(); + Document doc2 = wdoc2.getContentDOM(); + + Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement()); + Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement()); + + DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm(); + + // find out the paragrah level diffs + Difference[] diffTable = diffAlgo.computeDiffs(i1, i2); + + if (Debug.isFlagSet(Debug.INFO)) { + Debug.log(Debug.INFO, "Diff Result: "); + + for (int i = 0; i < diffTable.length; i++) { + Debug.log(Debug.INFO, diffTable[i].debug()); + } + } + + // merge the paragraphs + NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge(); + DocumentMerge docMerge = new DocumentMerge(cc_, charMerge); + + Iterator result = null; + + docMerge.applyDifference(i1, i2, diffTable); + } +} + + diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java new file mode 100644 index 000000000000..181fc9019f24 --- /dev/null +++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/DocumentSerializerImpl.java @@ -0,0 +1,564 @@ +/************************************************************************ + * + * The Contents of this file are made available subject to the terms of + * either of the following licenses + * + * - GNU Lesser General Public License Version 2.1 + * - Sun Industry Standards Source License Version 1.1 + * + * Sun Microsystems Inc., October, 2000 + * + * GNU Lesser General Public License Version 2.1 + * ============================================= + * Copyright 2000 by Sun Microsystems, Inc. + * 901 San Antonio Road, Palo Alto, CA 94303, USA + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * + * Sun Industry Standards Source License Version 1.1 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.1 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://www.openoffice.org/license.html. + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2000 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + * Contributor(s): _______________________________________ + * + * + ************************************************************************/ + +package org.openoffice.xmerge.converter.xml.sxw.wordsmith; + +import org.w3c.dom.NodeList; +import org.w3c.dom.Node; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Element; + +import java.io.IOException; + +import org.openoffice.xmerge.Document; +import org.openoffice.xmerge.ConvertData; +import org.openoffice.xmerge.ConvertException; +import org.openoffice.xmerge.DocumentSerializer; +import org.openoffice.xmerge.ConverterCapabilities; +import org.openoffice.xmerge.converter.xml.OfficeConstants; +import org.openoffice.xmerge.converter.xml.sxw.SxwDocument; +import org.openoffice.xmerge.converter.palm.PalmDB; +import org.openoffice.xmerge.converter.palm.PdbEncoder; +import org.openoffice.xmerge.converter.palm.Record; +import org.openoffice.xmerge.converter.palm.PdbUtil; +import org.openoffice.xmerge.converter.palm.PalmDocument; +import org.openoffice.xmerge.converter.xml.OfficeDocument; +import org.openoffice.xmerge.util.*; +import org.openoffice.xmerge.converter.xml.*; + +/** + * <p>WordSmith implementation of + * org.openoffice.xmerge.DocumentSerializer + * for the {@link + * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl + * PluginFactoryImpl}.</p> + * + * <p>The <code>serialize</code> method traverses the DOM + * document from the given <code>Document</code> object. It uses a + * <code>DocEncoder</code> object for the actual conversion of + * contents to the WordSmith format.</p> + * + * @author Herbie Ong, David Proulx + */ + +// DJP: take out "implements OfficeConstants" +public final class DocumentSerializerImpl +implements OfficeConstants, DocumentSerializer { + + /** A WSEncoder object for encoding to WordSmith. */ + private WSEncoder encoder = null; + + /** The <code>StyleCatalog</code>. */ + private StyleCatalog styleCat = null; + + private WseFontTable fontTable = new WseFontTable(); + private WseColorTable colorTable = new WseColorTable(); + + /** + * The <code>SxwDocument</code> object that this converter + * processes. + */ + private SxwDocument sxwDoc = null; + + /** + * Constructor. + * + * @param doc The <code>Document</code> to convert. + */ + public DocumentSerializerImpl(Document doc) { + sxwDoc = (SxwDocument) doc; + } + + + /** + * <p>Method to convert a <code>Document</code> into a + * <code>PalmDocument</code>.</p> + * + * <p>This method is not thread safe for performance reasons. + * This method should not be called from within two threads. + * It would be best to call this method only once per object + * instance.</p> + * + * <p>Note that the doc parameter needs to be an XML + * <code>Document</code>, else this method will throw a + * <code>ClassCastException</code>. I think this is a hack, + * but this is the only way to not modify most of the existing + * code right now.</p> + * + * @param doc Input should be an XML <code>Document</code> + * object + * @param os Output of <code>PalmDB</code> object + * + * @throws ConvertException If any conversion error occurs. + * @throws IOException If any I/O error occurs. + */ + public ConvertData serialize() + throws IOException { + + + // get the server document name + String docName = sxwDoc.getName(); + + // get DOM document + org.w3c.dom.Document domDoc = sxwDoc.getContentDOM(); + + // Create WordSmith encoder object. Add WordSmith header, + // empty font table to it. + encoder = new WSEncoder(); + encoder.addElement(fontTable); + encoder.addElement(colorTable); + + // Read the styles into the style catalog + String families[] = new String[3]; + families[0] = "text"; + families[1] = "paragraph"; + families[2] = "paragraph"; + Class classes[] = new Class[3]; + classes[0] = TextStyle.class; + classes[1] = ParaStyle.class; + classes[2] = TextStyle.class; + styleCat = new StyleCatalog(25); + + // Parse the input document + // DJP todo: eliminate multiple calls to add() when it can + // recurse properly. + NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); + styleCat.add(nl.item(0), families, classes, null, false); + + // Traverse to the office:body element. + // There should only be one. + NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY); + int len = list.getLength(); + if (len > 0) { + Node node = list.item(0); + traverseBody(node); + } + + // create a PalmDB object and ConvertData object. + // + Record records[] = encoder.getRecords(); + + ConvertData cd = new ConvertData(); + PalmDocument palmDoc = new PalmDocument(docName, + PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0, + PalmDB.PDB_HEADER_ATTR_BACKUP, records); + cd.addDocument(palmDoc); + return cd; + } + + + /** + * This method traverses <i>office:body</i> element. + * + * @param node <i>office:body</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseBody(Node node) throws IOException { + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH) || + nodeName.equals(TAG_HEADING)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); + } + } + } + } + + } + + + /** + * This method traverses the <i>text:p</i> and <i>text:h</i> + * element <code>Node</code> objects. + * + * @param node A <i>text:p</i> or <i>text:h</i> <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseParagraph(Node node) throws IOException { + + String styleName = findAttribute(node, "text:style-name"); + ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", + null, ParaStyle.class); + + // If the style does not exist in the style catalog for some reason, + // make up a default style and use it. We'll have to add this default + // style to the style catalog the first time it is used. + if (pstyle == null) { + styleName = "CONVERTER-DEFAULT"; + pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null, + ParaStyle.class); + if (pstyle == null) { + pstyle = new ParaStyle(styleName, "paragraph", null, + (String [])null, null, styleCat); + styleCat.add(pstyle); + styleCat.add(new TextStyle(styleName, "paragraph", null, + 0, 0, 12, "Times-Roman", styleCat)); + } + } + + pstyle = (ParaStyle)pstyle.getResolved(); + encoder.addElement(new WsePara(pstyle, styleCat)); + TextStyle defParaTextStyle = (TextStyle) + styleCat.lookup(styleName, "paragraph", null, TextStyle.class); + + traverseParaContents(node, defParaTextStyle); + } + + + /** + * This method traverses a paragraph content. Note that this + * method may recurse to call itself. + * + * @param node A paragraph or content <code>Node</code> + */ + private void traverseParaContents(Node node, TextStyle defTextStyle) { + + String styleName = findAttribute(node, "text:style-name"); + TextStyle style = (TextStyle) + styleCat.lookup(styleName, "text", null, TextStyle.class); + + if (node.hasChildNodes()) { + NodeList nodeList = node.getChildNodes(); + int nChildren = nodeList.getLength(); + + for (int i = 0; i < nChildren; i++) { + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.TEXT_NODE) { + + // this is for grabbing text nodes. + String s = child.getNodeValue(); + + if (s.length() > 0) { + if (style != null) + encoder.addElement(new WseTextRun(s, style, styleCat, + fontTable, colorTable)); + else + encoder.addElement(new WseTextRun(s, defTextStyle, + styleCat, fontTable, colorTable)); + } + + } else if (child.getNodeType() == Node.ELEMENT_NODE) { + + String childNodeName = child.getNodeName(); + + if (childNodeName.equals(TAG_SPACE)) { + + // this is for text:s tags. + NamedNodeMap map = child.getAttributes(); + Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT); + StringBuffer space = new StringBuffer(" "); + int count = 1; + + if (attr != null) { + try { + String countStr = attr.getNodeValue(); + count = Integer.parseInt(countStr.trim()); + } catch (NumberFormatException e) { + Debug.log(Debug.ERROR, "Problem parsing space tag", e); + } + } + + for (int j = 1; j < count; j++) + space.append(" "); + + encoder.addElement(new WseTextRun(space.toString(), + defTextStyle, + styleCat, fontTable, colorTable)); + Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />"); + + } else if (childNodeName.equals(TAG_TAB_STOP)) { + + // this is for text:tab-stop + encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat, + fontTable, colorTable)); + + Debug.log(Debug.INFO, "<TAB/>"); + + } else if (childNodeName.equals(TAG_LINE_BREAK)) { + + // this is for text:line-break + encoder.addElement(new WseTextRun("\n", defTextStyle, + styleCat, fontTable, colorTable)); + + Debug.log(Debug.INFO, "<LINE-BREAK/>"); + + } else if (childNodeName.equals(TAG_SPAN)) { + + // this is for text:span + Debug.log(Debug.INFO, "<SPAN>"); + traverseParaContents(child, defTextStyle); + Debug.log(Debug.INFO, "</SPAN>"); + + } else if (childNodeName.equals(TAG_HYPERLINK)) { + + // this is for text:a + Debug.log(Debug.INFO, "<HYPERLINK>"); + traverseParaContents(child, defTextStyle); + Debug.log(Debug.INFO, "<HYPERLINK/>"); + + } else if (childNodeName.equals(TAG_BOOKMARK) || + childNodeName.equals(TAG_BOOKMARK_START)) { + + Debug.log(Debug.INFO, "<BOOKMARK/>"); + + } else { + + Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />"); + } + + } + + } + } + } + + + /** + * This method traverses list tags <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. A list can only contain one optional + * <i>text:list-header</i> and one or more <i>text:list-item</i> + * elements. + * + * @param node A list <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseList(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_LIST_ITEM)) { + + traverseListItem(child); + + } else if (nodeName.equals(TAG_LIST_HEADER)) { + + traverseListHeader(child); + + } else { + + Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST>"); + } + + + /** + * This method traverses a <i>text:list-header</i> element. + * It contains one or more <i>text:p</i> elements. + * + * @param node A list header <code>Node</code>. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListHeader(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST-HEADER>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else { + + Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST-HEADER>"); + } + + + /** + * This method will traverse a <i>text:list-item</i>. + * A list item may contain one or more of <i>text:p</i>, + * <i>text:h</i>, <i>text:section</i>, + * <i>text:ordered-list</i> and <i>text:unordered-list</i>. + * + * This method currently only implements grabbing <i>text:p</i>, + * <i>text:h</i>, <i>text:unordered-list</i> and + * <i>text:ordered-list</i>. + * + * @param Node <code>Node</code> to traverse. + * + * @throws IOException If any I/O error occurs. + */ + private void traverseListItem(Node node) throws IOException { + + Debug.log(Debug.TRACE, "<LIST-ITEM>"); + + if (node.hasChildNodes()) { + + NodeList nodeList = node.getChildNodes(); + int len = nodeList.getLength(); + + for (int i = 0; i < len; i++) { + + Node child = nodeList.item(i); + + if (child.getNodeType() == Node.ELEMENT_NODE) { + + String nodeName = child.getNodeName(); + + if (nodeName.equals(TAG_PARAGRAPH)) { + + traverseParagraph(child); + + } else if (nodeName.equals(TAG_UNORDERED_LIST)) { + + traverseList(child); + + } else if (nodeName.equals(TAG_ORDERED_LIST)) { + + traverseList(child); + + } else { + + Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />"); + } + } + } + } + + Debug.log(Debug.TRACE, "</LIST-ITEM>"); + } + + + /** + * Look up a <code>Node</code> object's named attribute and return + * its value + * + * @param node The <code>Node</code>. + * @param name The attribute name. + * + * @return The value of the named attribute + */ + private String findAttribute(Node node, String name) { + NamedNodeMap attrNodes = node.getAttributes(); + if (attrNodes != null) { + int len = attrNodes.getLength(); + for (int i = 0; i < len; i++) { + Node attr = attrNodes.item(i); + if (attr.getNodeName().equals(name)) + return attr.getNodeValue(); + } + } + return null; + } +} + |