path: root/xmerge/source/wordsmith
diff options
authorJens-Heiner Rechtien <>2005-10-24 16:54:34 +0000
committerJens-Heiner Rechtien <>2005-10-24 16:54:34 +0000
commitcfaf0fd33c7d546da92c88505515a8672a43d6d5 (patch)
tree6b2285550f682d5f79cfe65097c8fb1887aa070a /xmerge/source/wordsmith
parent25138c93a590b8c4376e3b66a6f135bb0cf1876c (diff)
2005/06/08 16:30:33 lo restructuring of project and fix for #i44847#
Diffstat (limited to 'xmerge/source/wordsmith')
3 files changed, 1284 insertions, 0 deletions
diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
new file mode 100644
index 000000000000..61e22e385bfd
--- /dev/null
+++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
@@ -0,0 +1,593 @@
+ *
+ * The Contents of this file are made available subject to the terms of
+ * either of the following licenses
+ *
+ * - GNU Lesser General Public License Version 2.1
+ * - Sun Industry Standards Source License Version 1.1
+ *
+ * Sun Microsystems Inc., October, 2000
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2000 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2000 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+import org.w3c.dom.*;
+import java.util.Enumeration;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentDeserializer;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PdbDecoder;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import java.util.Vector;
+import org.openoffice.xmerge.converter.xml.*;
+import org.openoffice.xmerge.util.Debug;
+import org.openoffice.xmerge.util.XmlUtil;
+ * <p>WordSmith implementation of
+ * org.openoffice.xmerge.DocumentDeserializer
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * The <code>deserialize</code> method uses a
+ * <code>DocDecoder</code> to read the WordSmith format into a
+ * <code>String</code> object, then it calls <code>buildDocument</code>
+ * to create a <code>SxwDocument</code> object from it.
+ *
+ * @author Herbie Ong, David Proulx
+ */
+public final class DocumentDeserializerImpl
+implements DOCConstants, OfficeConstants, DocumentDeserializer {
+ /** A Decoder object for decoding WordSmith format. */
+ private WSDecoder decoder = null;
+ WseFontTable fontTable = null;
+ WseColorTable colorTable = null;
+ StyleCatalog styleCat = null;
+ StyleCatalog oldStyleCat = null;
+ /** A <code>ConvertData</code> object assigned to this object. */
+ private ConvertData cd = null;
+ /**
+ * Constructor that assigns the given <code>ConvertData</code>
+ * to the object.
+ *
+ * @param cd A <code>ConvertData</code> object to read data for
+ * the conversion process by the deserialize method.
+ */
+ public DocumentDeserializerImpl(ConvertData cd) {
+ = cd;
+ }
+ /**
+ * Convert the given <code>ConvertData</code> into a
+ * <code>SxwDocument</code> object.
+ *
+ * @return Resulting <code>Document</code> object.
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public Document deserialize() throws ConvertException,
+ IOException {
+ return deserialize(null, cd);
+ }
+ public Document deserialize(Document origDoc, ConvertData cd)
+ throws IOException {
+ Document doc = null;
+ PalmDocument palmDoc = null;
+ Enumeration e = cd.getDocumentEnumeration();
+ while(e.hasMoreElements()) {
+ palmDoc = (PalmDocument) e.nextElement();
+ PalmDB pdb = palmDoc.getPdb();
+ Record[] recs = pdb.getRecords();
+ decoder = new WSDecoder();
+ Wse[] b = decoder.parseDocument(recs);
+ String docName = palmDoc.getName();
+ doc = buildDocument(docName, b, origDoc);
+ }
+ return doc;
+ }
+ /**
+ * Temporary method to read existing <code>StyleCatalog</code>
+ * as a starting point.
+ *
+ * @param parentDoc The parent <code>Document</code>.
+ */
+ private void readStyleCatalog(Document parentDoc) {
+ Element rootNode = null;
+ try {
+ bos = new;
+ parentDoc.write(bos);
+ SxwDocument sxwDoc = new SxwDocument("old");
+ ByteArrayInputStream(bos.toByteArray()));
+ org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+ String families[] = new String[3];
+ families[0] = "text";
+ families[1] = "paragraph";
+ families[2] = "paragraph";
+ Class classes[] = new Class[3];
+ classes[0] = TextStyle.class;
+ classes[1] = ParaStyle.class;
+ classes[2] = TextStyle.class;
+ NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ oldStyleCat.add(nl.item(0), families, classes, null, false);
+ } catch (Exception e) {
+ Debug.log(Debug.ERROR, "", e);
+ }
+ }
+ /**
+ * Given an array of paragraph <code>Style</code> objects, see if
+ * there is exactly one which matches the text formatting
+ * <code>Style</code> of <code>tStyle</code>.
+ *
+ * @param paraStyles An array of paragraph <code>Style</code>
+ * objects.
+ * @param tStyle Text <code>Style</code> to match.
+ *
+ * @return The paragraph <code>Style</code> that matches.
+ */
+ private ParaStyle matchParaByText(Style paraStyles[], TextStyle tStyle) {
+ int matchIndex = -1;
+ int matchCount = 0;
+ Style txtMatches[] = (Style[]) oldStyleCat.getMatching(tStyle);
+ if (txtMatches.length >= 1) {
+ for (int j = 0; j < txtMatches.length; j++) {
+ TextStyle t = (TextStyle)txtMatches[j];
+ if (!t.getFamily().equals("paragraph"))
+ continue;
+ for (int k = 0; k < paraStyles.length; k++) {
+ if (t.getName().equals(paraStyles[k].getName())) {
+ matchCount++;
+ matchIndex = k;
+ }
+ }
+ }
+ }
+ if (matchCount == 1)
+ return (ParaStyle)paraStyles[matchIndex];
+ else return null;
+ }
+ /**
+ * Take a <code>String</code> of text and turn it into a sequence
+ * of <code>Node</code> objects.
+ *
+ * @param text <code>String</code> of text.
+ * @param parentDoc Parent <code>Document</code>.
+ *
+ * @return Array of <code>Node</code> objects.
+ */
+ private Node[] parseText(String text, org.w3c.dom.Document parentDoc) {
+ Vector nodeVec = new Vector();
+ // Break up the text from the WordSmith text run into Open
+ // Office text runs. There may be more runs in OO because
+ // runs of 2 or more spaces map to nodes.
+ while ((text.indexOf(" ") != -1) || (text.indexOf("\t") != 1)) {
+ // Find the indices of tabs and multiple spaces, and
+ // figure out which of them occurs first in the string.
+ int spaceIndex = text.indexOf(" ");
+ int tabIndex = text.indexOf("\t");
+ if ((spaceIndex == -1) && (tabIndex == -1))
+ break; // DJP This should not be necessary. What is wrong
+ // with the while() stmt up above?
+ int closerIndex; // Index of the first of these
+ if (spaceIndex == -1)
+ closerIndex = tabIndex;
+ else if (tabIndex == -1)
+ closerIndex = spaceIndex;
+ else
+ closerIndex = (spaceIndex > tabIndex) ? tabIndex : spaceIndex;
+ // If there is any text prior to the first occurrence of a
+ // tab or spaces, create a text node from it, then chop it
+ // off the string we're working with.
+ if (closerIndex > 0) {
+ String beginningText = text.substring(0, closerIndex);
+ Text textNode = parentDoc.createTextNode(beginningText);
+ nodeVec.addElement(textNode);
+ log("<TEXT>");
+ log(beginningText);
+ log("</TEXT>");
+ }
+ text = text.substring(closerIndex);
+ // Handle either tab character or space sequence by creating
+ // an element for it, and then chopping out the text that
+ // represented it in "text".
+ if (closerIndex == tabIndex) {
+ Element tabNode = parentDoc.createElement(TAG_TAB_STOP);
+ nodeVec.add(tabNode);
+ text = text.substring(1); // tab is always a single character
+ log("<TAB/>");
+ } else {
+ // Compute length of space sequence.
+ int nrSpaces = 2;
+ while ((nrSpaces < text.length())
+ && text.substring(nrSpaces, nrSpaces + 1).equals(" "))
+ nrSpaces++;
+ Element spaceNode = parentDoc.createElement(TAG_SPACE);
+ spaceNode.setAttribute(ATTRIBUTE_SPACE_COUNT, new Integer(nrSpaces).toString());
+ nodeVec.add(spaceNode);
+ text = text.substring(nrSpaces);
+ log("<SPACE count=\"" + nrSpaces + "\" />");
+ }
+ }
+ // No more tabs or space sequences. If there's any remaining
+ // text create a text node for it.
+ if (text.length() > 0) {
+ Text textNode = parentDoc.createTextNode(text);
+ nodeVec.add(textNode);
+ log("<TEXT>");
+ log(text);
+ log("</TEXT>");
+ }
+ // Now create and populate an array to return the nodes in.
+ Node nodes[] = new Node[nodeVec.size()];
+ for (int i = 0; i < nodeVec.size(); i++)
+ nodes[i] = (Node)nodeVec.elementAt(i);
+ return nodes;
+ }
+ /**
+ * Parses the text content of a WordSmith format and builds a
+ * <code>SXWDocument</code>.
+ *
+ * @param docName <code>Document</code> name
+ * @param str Text content of WordSmith format
+ *
+ * @return Resulting <code>SXWDocument</code> object.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private SxwDocument buildDocument(String docName, Wse[] data, Document origDoc)
+ throws IOException {
+ // create minimum office xml document.
+ SxwDocument sxwDoc = new SxwDocument(docName);
+ sxwDoc.initContentDOM();
+ org.w3c.dom.Document doc = sxwDoc.getContentDOM();
+ // Grab hold of the office:body tag,
+ // Assume there should be one.
+ // This is where top level paragraphs will append to.
+ NodeList list = doc.getElementsByTagName(TAG_OFFICE_BODY);
+ Node bodyNode = list.item(0);
+ styleCat = new StyleCatalog(50);
+ oldStyleCat = new StyleCatalog(50);
+ if (origDoc != null)
+ readStyleCatalog(origDoc);
+ Element currPara = null;
+ ParaStyle currParaStyle = null;
+ int newTextStyleNr = 0;
+ int newParaStyleNr = 0;
+ // Now write out the document body by running through
+ // the list of WordSmith elements and processing each one
+ // in turn.
+ for (int i = 0; i < data.length; i++) {
+ if (data[i].getClass() == WsePara.class) {
+ currPara = doc.createElement(TAG_PARAGRAPH);
+ log("</PARA>");
+ log("<PARA>");
+ WsePara p = (WsePara)data[i];
+ // Save info about the first text run, if there is one.
+ WseTextRun firstTextRun = null;
+ if ((data.length >= i + 2)
+ && (data[i+1].getClass() == WseTextRun.class))
+ firstTextRun = (WseTextRun)data[i+1];
+ Style matches[] = oldStyleCat.getMatching(p.makeStyle());
+ // See if we can find a unique match in the catalog
+ // of existing styles from the original document.
+ ParaStyle pStyle = null;
+ if (matches.length == 1) {
+ pStyle = (ParaStyle)matches[0];
+ log("using an existing style");
+ } else if ((matches.length > 1) && (firstTextRun != null)) {
+ pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+ log("resolved a para by looking @ text");
+ }
+ // If nothing found so far, try looking in the catalog
+ // of newly-created styles.
+ // DJP FIXME: if we need to add two para styles with the
+ // same para formatting info but different default text
+ // styles, this won't work!
+ if (pStyle == null) {
+ log("had " + matches.length + " matches in old catalog");
+ matches = styleCat.getMatching(p.makeStyle());
+ if (matches.length == 0) {
+ pStyle = p.makeStyle();
+ String newName = new String("PPP" + ++newParaStyleNr);
+ pStyle.setName(newName);
+ styleCat.add(pStyle);
+ // DJP: write in the text format info here
+ log("created a new style");
+ } else if (matches.length == 1) {
+ pStyle = (ParaStyle)matches[0];
+ log("re-using a new style");
+ } else if (firstTextRun != null) {
+ pStyle = matchParaByText(matches, firstTextRun.makeStyle());
+ if (pStyle != null) {
+ log("resolved a (new) para by looking @ text");
+ } else
+ log("Hey this shouldn't happen! - nr of matches is "
+ + matches.length);
+ }
+ }
+ if (pStyle == null)
+ log("Unable to figure out a para style");
+ // Figured out a style to use. Specify the style in this
+ // paragraph's attributes.
+ currPara.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, pStyle.getName());
+ bodyNode.appendChild(currPara);
+ currParaStyle = pStyle;
+ } else if (data[i].getClass() == WseTextRun.class) {
+ WseTextRun tr = (WseTextRun)data[i];
+ TextStyle trStyle = null;
+ Node trNodes[] = parseText(tr.getText(), doc);
+ // First see if the formatting of this text run matches
+ // the default text formatting for this paragraph. If
+ // it does, then just make the text node(s) children of
+ // the current paragraph.
+ Style[] cps = new Style[1];
+ cps[0] = currParaStyle;
+ if (matchParaByText(cps, tr.makeStyle()) != null) {
+ for (int ii = 0; ii < trNodes.length; ii++) {
+ currPara.appendChild(trNodes[ii]);
+ }
+ continue;
+ }
+ // Check for existing, matching styles in the old style
+ // catalog. If exactly one is found, use it. Otherwise,
+ // check the new style catalog, and either use the style
+ // found or add this new one to it.
+ Style matches[] = oldStyleCat.getMatching(tr.makeStyle());
+ if (matches.length == 1)
+ trStyle = (TextStyle)matches[0];
+ else {
+ matches = styleCat.getMatching(tr.makeStyle());
+ if (matches.length == 0) {
+ trStyle = tr.makeStyle();
+ String newName = new String("TTT" + ++newTextStyleNr);
+ trStyle.setName(newName);
+ styleCat.add(trStyle);
+ } else if (matches.length == 1)
+ trStyle = (TextStyle)matches[0];
+ else
+ log("multiple text style matches from new catalog");
+ }
+ // Create a text span node, set the style attribute, make the
+ // text node(s) its children, and append it to current paragraph's
+ // list of children.
+ Element textSpanNode = doc.createElement(TAG_SPAN);
+ textSpanNode.setAttribute(ATTRIBUTE_TEXT_STYLE_NAME, trStyle.getName());
+ for (int ii = 0; ii < trNodes.length; ii++) {
+ textSpanNode.appendChild(trNodes[ii]);
+ }
+ currPara.appendChild(textSpanNode);
+ log("</SPAN>");
+ }
+ else if (data[i].getClass() == WseFontTable.class) {
+ fontTable = (WseFontTable)data[i];
+ }
+ else if (data[i].getClass() == WseColorTable.class) {
+ colorTable = (WseColorTable)data[i];
+ }
+ }
+ //NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT);
+ NodeList r = doc.getElementsByTagName(TAG_OFFICE_DOCUMENT_CONTENT);
+ Node rootNode = r.item(0);
+ // read the original document
+ org.w3c.dom.NodeList nl;
+ if (origDoc != null) {
+ bos = new;
+ origDoc.write(bos);
+ SxwDocument origSxwDoc = new SxwDocument("old");
+ ByteArrayInputStream(bos.toByteArray()));
+ org.w3c.dom.Document origDomDoc = origSxwDoc.getContentDOM();
+ XmlUtil xu = new XmlUtil();
+ org.w3c.dom.DocumentFragment df;
+ org.w3c.dom.Node newNode;
+ // copy font declarations from original document to the new document
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+ // copy style catalog from original document to the new document
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+ nl = origDomDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ df = doc.createDocumentFragment();
+ newNode = xu.deepClone(df, nl.item(0));
+ rootNode.insertBefore(newNode, bodyNode);
+ }
+ // Original document not specified. We need to add font declarations.
+ // DJP: this might just be for debugging. Merger will probably put
+ // the "real" ones in.
+ // DJP: if really doing it this way, do it right: gather font names
+ // from style catalog(s).
+ else {
+ org.w3c.dom.Node declNode;
+ log("<FONT-DECLS/>");
+ declNode = doc.createElement(TAG_OFFICE_FONT_DECLS);
+ rootNode.insertBefore(declNode, bodyNode);
+ org.w3c.dom.Element fontNode;
+ fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+ fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arial");
+ fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arial");
+ fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+ declNode.appendChild(fontNode);
+ fontNode = doc.createElement(TAG_STYLE_FONT_DECL);
+ fontNode.setAttribute(ATTRIBUTE_STYLE_NAME, "Arioso");
+ fontNode.setAttribute(ATTRIBUTE_FO_FONT_FAMILY, "Arioso");
+ fontNode.setAttribute(ATTRIBUTE_STYLE_FONT_PITCH, "variable");
+ declNode.appendChild(fontNode);
+ }
+ // Now add any new styles we have created in this document.
+ nl = doc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ Node autoStylesNode = nl.item(0);
+ if (autoStylesNode == null) {
+ autoStylesNode = doc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
+ rootNode.insertBefore(autoStylesNode, bodyNode);
+ }
+ Node newStyleCatNode = styleCat.writeNode(doc, "dummy");
+ nl = newStyleCatNode.getChildNodes();
+ int nNodes = nl.getLength();
+ for (int i = 0; i < nNodes; i++) {
+ autoStylesNode.appendChild(nl.item(0));
+ }
+ oldStyleCat.dumpCSV(true);
+ styleCat.dumpCSV(true);
+ return sxwDoc;
+ }
+ /**
+ * Sends message to the log object.
+ *
+ * @param str Debug message.
+ */
+ private void log(String str) {
+ Debug.log(Debug.TRACE, str);
+ }
+ /*
+ public static void main(String args[]) {
+ // DocumentDeserializerImpl d = new DocumentDeserializerImpl(new InputStream());
+ Node nodes[] = parseText("Tab here:\tThen some more text");
+ }
diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
new file mode 100644
index 000000000000..d3ac39ab559a
--- /dev/null
+++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
@@ -0,0 +1,127 @@
+ *
+ * The Contents of this file are made available subject to the terms of
+ * either of the following licenses
+ *
+ * - GNU Lesser General Public License Version 2.1
+ * - Sun Industry Standards Source License Version 1.1
+ *
+ * Sun Microsystems Inc., October, 2000
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2000 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2000 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+import org.w3c.dom.Document;
+import org.w3c.dom.Element;
+import org.openoffice.xmerge.DocumentMerger;
+import org.openoffice.xmerge.MergeException;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.Difference;
+import org.openoffice.xmerge.merger.NodeMergeAlgorithm;
+import org.openoffice.xmerge.merger.Iterator;
+import org.openoffice.xmerge.merger.DiffAlgorithm;
+import org.openoffice.xmerge.merger.diff.ParaNodeIterator;
+import org.openoffice.xmerge.merger.diff.IteratorLCSAlgorithm;
+import org.openoffice.xmerge.merger.merge.DocumentMerge;
+import org.openoffice.xmerge.merger.merge.CharacterBaseParagraphMerge;
+import org.openoffice.xmerge.util.Debug;
+ * Wordsmith implementation of <code>DocumentMerger</code>
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ */
+public class DocumentMergerImpl implements DocumentMerger {
+ private ConverterCapabilities cc_;
+ private org.openoffice.xmerge.Document orig = null;
+ public DocumentMergerImpl(org.openoffice.xmerge.Document doc, ConverterCapabilities cc) {
+ cc_ = cc;
+ this.orig = doc;
+ }
+ public void merge(org.openoffice.xmerge.Document modifiedDoc) throws MergeException {
+ SxwDocument wdoc1 = (SxwDocument) orig;
+ SxwDocument wdoc2 = (SxwDocument) modifiedDoc;
+ Document doc1 = wdoc1.getContentDOM();
+ Document doc2 = wdoc2.getContentDOM();
+ Iterator i1 = new ParaNodeIterator(cc_, doc1.getDocumentElement());
+ Iterator i2 = new ParaNodeIterator(cc_, doc2.getDocumentElement());
+ DiffAlgorithm diffAlgo = new IteratorLCSAlgorithm();
+ // find out the paragrah level diffs
+ Difference[] diffTable = diffAlgo.computeDiffs(i1, i2);
+ if (Debug.isFlagSet(Debug.INFO)) {
+ Debug.log(Debug.INFO, "Diff Result: ");
+ for (int i = 0; i < diffTable.length; i++) {
+ Debug.log(Debug.INFO, diffTable[i].debug());
+ }
+ }
+ // merge the paragraphs
+ NodeMergeAlgorithm charMerge = new CharacterBaseParagraphMerge();
+ DocumentMerge docMerge = new DocumentMerge(cc_, charMerge);
+ Iterator result = null;
+ docMerge.applyDifference(i1, i2, diffTable);
+ }
diff --git a/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
new file mode 100644
index 000000000000..181fc9019f24
--- /dev/null
+++ b/xmerge/source/wordsmith/java/org/openoffice/xmerge/converter/xml/sxw/wordsmith/
@@ -0,0 +1,564 @@
+ *
+ * The Contents of this file are made available subject to the terms of
+ * either of the following licenses
+ *
+ * - GNU Lesser General Public License Version 2.1
+ * - Sun Industry Standards Source License Version 1.1
+ *
+ * Sun Microsystems Inc., October, 2000
+ *
+ * GNU Lesser General Public License Version 2.1
+ * =============================================
+ * Copyright 2000 by Sun Microsystems, Inc.
+ * 901 San Antonio Road, Palo Alto, CA 94303, USA
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+ * MA 02111-1307 USA
+ *
+ *
+ * Sun Industry Standards Source License Version 1.1
+ * =================================================
+ * The contents of this file are subject to the Sun Industry Standards
+ * Source License Version 1.1 (the "License"); You may not use this file
+ * except in compliance with the License. You may obtain a copy of the
+ * License at
+ *
+ * Software provided under this License is provided on an "AS IS" basis,
+ * See the License for the specific provisions governing your rights and
+ * obligations concerning the Software.
+ *
+ * The Initial Developer of the Original Code is: Sun Microsystems, Inc.
+ *
+ * Copyright: 2000 by Sun Microsystems, Inc.
+ *
+ * All Rights Reserved.
+ *
+ * Contributor(s): _______________________________________
+ *
+ *
+ ************************************************************************/
+package org.openoffice.xmerge.converter.xml.sxw.wordsmith;
+import org.w3c.dom.NodeList;
+import org.w3c.dom.Node;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Element;
+import org.openoffice.xmerge.Document;
+import org.openoffice.xmerge.ConvertData;
+import org.openoffice.xmerge.ConvertException;
+import org.openoffice.xmerge.DocumentSerializer;
+import org.openoffice.xmerge.ConverterCapabilities;
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+import org.openoffice.xmerge.converter.xml.sxw.SxwDocument;
+import org.openoffice.xmerge.converter.palm.PalmDB;
+import org.openoffice.xmerge.converter.palm.PdbEncoder;
+import org.openoffice.xmerge.converter.palm.Record;
+import org.openoffice.xmerge.converter.palm.PdbUtil;
+import org.openoffice.xmerge.converter.palm.PalmDocument;
+import org.openoffice.xmerge.converter.xml.OfficeDocument;
+import org.openoffice.xmerge.util.*;
+import org.openoffice.xmerge.converter.xml.*;
+ * <p>WordSmith implementation of
+ * org.openoffice.xmerge.DocumentSerializer
+ * for the {@link
+ * org.openoffice.xmerge.converter.xml.sxw.wordsmith.PluginFactoryImpl
+ * PluginFactoryImpl}.</p>
+ *
+ * <p>The <code>serialize</code> method traverses the DOM
+ * document from the given <code>Document</code> object. It uses a
+ * <code>DocEncoder</code> object for the actual conversion of
+ * contents to the WordSmith format.</p>
+ *
+ * @author Herbie Ong, David Proulx
+ */
+// DJP: take out "implements OfficeConstants"
+public final class DocumentSerializerImpl
+implements OfficeConstants, DocumentSerializer {
+ /** A WSEncoder object for encoding to WordSmith. */
+ private WSEncoder encoder = null;
+ /** The <code>StyleCatalog</code>. */
+ private StyleCatalog styleCat = null;
+ private WseFontTable fontTable = new WseFontTable();
+ private WseColorTable colorTable = new WseColorTable();
+ /**
+ * The <code>SxwDocument</code> object that this converter
+ * processes.
+ */
+ private SxwDocument sxwDoc = null;
+ /**
+ * Constructor.
+ *
+ * @param doc The <code>Document</code> to convert.
+ */
+ public DocumentSerializerImpl(Document doc) {
+ sxwDoc = (SxwDocument) doc;
+ }
+ /**
+ * <p>Method to convert a <code>Document</code> into a
+ * <code>PalmDocument</code>.</p>
+ *
+ * <p>This method is not thread safe for performance reasons.
+ * This method should not be called from within two threads.
+ * It would be best to call this method only once per object
+ * instance.</p>
+ *
+ * <p>Note that the doc parameter needs to be an XML
+ * <code>Document</code>, else this method will throw a
+ * <code>ClassCastException</code>. I think this is a hack,
+ * but this is the only way to not modify most of the existing
+ * code right now.</p>
+ *
+ * @param doc Input should be an XML <code>Document</code>
+ * object
+ * @param os Output of <code>PalmDB</code> object
+ *
+ * @throws ConvertException If any conversion error occurs.
+ * @throws IOException If any I/O error occurs.
+ */
+ public ConvertData serialize()
+ throws IOException {
+ // get the server document name
+ String docName = sxwDoc.getName();
+ // get DOM document
+ org.w3c.dom.Document domDoc = sxwDoc.getContentDOM();
+ // Create WordSmith encoder object. Add WordSmith header,
+ // empty font table to it.
+ encoder = new WSEncoder();
+ encoder.addElement(fontTable);
+ encoder.addElement(colorTable);
+ // Read the styles into the style catalog
+ String families[] = new String[3];
+ families[0] = "text";
+ families[1] = "paragraph";
+ families[2] = "paragraph";
+ Class classes[] = new Class[3];
+ classes[0] = TextStyle.class;
+ classes[1] = ParaStyle.class;
+ classes[2] = TextStyle.class;
+ styleCat = new StyleCatalog(25);
+ // Parse the input document
+ // DJP todo: eliminate multiple calls to add() when it can
+ // recurse properly.
+ NodeList nl = domDoc.getElementsByTagName(TAG_OFFICE_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+ nl = domDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
+ styleCat.add(nl.item(0), families, classes, null, false);
+ // Traverse to the office:body element.
+ // There should only be one.
+ NodeList list = domDoc.getElementsByTagName(TAG_OFFICE_BODY);
+ int len = list.getLength();
+ if (len > 0) {
+ Node node = list.item(0);
+ traverseBody(node);
+ }
+ // create a PalmDB object and ConvertData object.
+ //
+ Record records[] = encoder.getRecords();
+ ConvertData cd = new ConvertData();
+ PalmDocument palmDoc = new PalmDocument(docName,
+ PdbUtil.intID("WrdS"), PdbUtil.intID("BDOC"), 0,
+ cd.addDocument(palmDoc);
+ return cd;
+ }
+ /**
+ * This method traverses <i>office:body</i> element.
+ *
+ * @param node <i>office:body</i> <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseBody(Node node) throws IOException {
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+ for (int i = 0; i < len; i++) {
+ Node child = nodeList.item(i);
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+ if (nodeName.equals(TAG_PARAGRAPH) ||
+ nodeName.equals(TAG_HEADING)) {
+ traverseParagraph(child);
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+ traverseList(child);
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+ traverseList(child);
+ } else {
+ Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
+ }
+ }
+ }
+ }
+ }
+ /**
+ * This method traverses the <i>text:p</i> and <i>text:h</i>
+ * element <code>Node</code> objects.
+ *
+ * @param node A <i>text:p</i> or <i>text:h</i> <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseParagraph(Node node) throws IOException {
+ String styleName = findAttribute(node, "text:style-name");
+ ParaStyle pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph",
+ null, ParaStyle.class);
+ // If the style does not exist in the style catalog for some reason,
+ // make up a default style and use it. We'll have to add this default
+ // style to the style catalog the first time it is used.
+ if (pstyle == null) {
+ styleName = "CONVERTER-DEFAULT";
+ pstyle = (ParaStyle)styleCat.lookup(styleName, "paragraph", null,
+ ParaStyle.class);
+ if (pstyle == null) {
+ pstyle = new ParaStyle(styleName, "paragraph", null,
+ (String [])null, null, styleCat);
+ styleCat.add(pstyle);
+ styleCat.add(new TextStyle(styleName, "paragraph", null,
+ 0, 0, 12, "Times-Roman", styleCat));
+ }
+ }
+ pstyle = (ParaStyle)pstyle.getResolved();
+ encoder.addElement(new WsePara(pstyle, styleCat));
+ TextStyle defParaTextStyle = (TextStyle)
+ styleCat.lookup(styleName, "paragraph", null, TextStyle.class);
+ traverseParaContents(node, defParaTextStyle);
+ }
+ /**
+ * This method traverses a paragraph content. Note that this
+ * method may recurse to call itself.
+ *
+ * @param node A paragraph or content <code>Node</code>
+ */
+ private void traverseParaContents(Node node, TextStyle defTextStyle) {
+ String styleName = findAttribute(node, "text:style-name");
+ TextStyle style = (TextStyle)
+ styleCat.lookup(styleName, "text", null, TextStyle.class);
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int nChildren = nodeList.getLength();
+ for (int i = 0; i < nChildren; i++) {
+ Node child = nodeList.item(i);
+ if (child.getNodeType() == Node.TEXT_NODE) {
+ // this is for grabbing text nodes.
+ String s = child.getNodeValue();
+ if (s.length() > 0) {
+ if (style != null)
+ encoder.addElement(new WseTextRun(s, style, styleCat,
+ fontTable, colorTable));
+ else
+ encoder.addElement(new WseTextRun(s, defTextStyle,
+ styleCat, fontTable, colorTable));
+ }
+ } else if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String childNodeName = child.getNodeName();
+ if (childNodeName.equals(TAG_SPACE)) {
+ // this is for text:s tags.
+ NamedNodeMap map = child.getAttributes();
+ Node attr = map.getNamedItem(ATTRIBUTE_SPACE_COUNT);
+ StringBuffer space = new StringBuffer(" ");
+ int count = 1;
+ if (attr != null) {
+ try {
+ String countStr = attr.getNodeValue();
+ count = Integer.parseInt(countStr.trim());
+ } catch (NumberFormatException e) {
+ Debug.log(Debug.ERROR, "Problem parsing space tag", e);
+ }
+ }
+ for (int j = 1; j < count; j++)
+ space.append(" ");
+ encoder.addElement(new WseTextRun(space.toString(),
+ defTextStyle,
+ styleCat, fontTable, colorTable));
+ Debug.log(Debug.INFO, "<SPACE count=\"" + count + "\" />");
+ } else if (childNodeName.equals(TAG_TAB_STOP)) {
+ // this is for text:tab-stop
+ encoder.addElement(new WseTextRun("\t", defTextStyle, styleCat,
+ fontTable, colorTable));
+ Debug.log(Debug.INFO, "<TAB/>");
+ } else if (childNodeName.equals(TAG_LINE_BREAK)) {
+ // this is for text:line-break
+ encoder.addElement(new WseTextRun("\n", defTextStyle,
+ styleCat, fontTable, colorTable));
+ Debug.log(Debug.INFO, "<LINE-BREAK/>");
+ } else if (childNodeName.equals(TAG_SPAN)) {
+ // this is for text:span
+ Debug.log(Debug.INFO, "<SPAN>");
+ traverseParaContents(child, defTextStyle);
+ Debug.log(Debug.INFO, "</SPAN>");
+ } else if (childNodeName.equals(TAG_HYPERLINK)) {
+ // this is for text:a
+ Debug.log(Debug.INFO, "<HYPERLINK>");
+ traverseParaContents(child, defTextStyle);
+ Debug.log(Debug.INFO, "<HYPERLINK/>");
+ } else if (childNodeName.equals(TAG_BOOKMARK) ||
+ childNodeName.equals(TAG_BOOKMARK_START)) {
+ Debug.log(Debug.INFO, "<BOOKMARK/>");
+ } else {
+ Debug.log(Debug.INFO, "<OTHERS " /* + XmlDebug.nodeInfo(child) */ + " />");
+ }
+ }
+ }
+ }
+ }
+ /**
+ * This method traverses list tags <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>. A list can only contain one optional
+ * <i>text:list-header</i> and one or more <i>text:list-item</i>
+ * elements.
+ *
+ * @param node A list <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseList(Node node) throws IOException {
+ Debug.log(Debug.TRACE, "<LIST>");
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+ for (int i = 0; i < len; i++) {
+ Node child = nodeList.item(i);
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+ if (nodeName.equals(TAG_LIST_ITEM)) {
+ traverseListItem(child);
+ } else if (nodeName.equals(TAG_LIST_HEADER)) {
+ traverseListHeader(child);
+ } else {
+ Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+ Debug.log(Debug.TRACE, "</LIST>");
+ }
+ /**
+ * This method traverses a <i>text:list-header</i> element.
+ * It contains one or more <i>text:p</i> elements.
+ *
+ * @param node A list header <code>Node</code>.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListHeader(Node node) throws IOException {
+ Debug.log(Debug.TRACE, "<LIST-HEADER>");
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+ for (int i = 0; i < len; i++) {
+ Node child = nodeList.item(i);
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+ traverseParagraph(child);
+ } else {
+ Debug.log(Debug.TRACE, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+ Debug.log(Debug.TRACE, "</LIST-HEADER>");
+ }
+ /**
+ * This method will traverse a <i>text:list-item</i>.
+ * A list item may contain one or more of <i>text:p</i>,
+ * <i>text:h</i>, <i>text:section</i>,
+ * <i>text:ordered-list</i> and <i>text:unordered-list</i>.
+ *
+ * This method currently only implements grabbing <i>text:p</i>,
+ * <i>text:h</i>, <i>text:unordered-list</i> and
+ * <i>text:ordered-list</i>.
+ *
+ * @param Node <code>Node</code> to traverse.
+ *
+ * @throws IOException If any I/O error occurs.
+ */
+ private void traverseListItem(Node node) throws IOException {
+ Debug.log(Debug.TRACE, "<LIST-ITEM>");
+ if (node.hasChildNodes()) {
+ NodeList nodeList = node.getChildNodes();
+ int len = nodeList.getLength();
+ for (int i = 0; i < len; i++) {
+ Node child = nodeList.item(i);
+ if (child.getNodeType() == Node.ELEMENT_NODE) {
+ String nodeName = child.getNodeName();
+ if (nodeName.equals(TAG_PARAGRAPH)) {
+ traverseParagraph(child);
+ } else if (nodeName.equals(TAG_UNORDERED_LIST)) {
+ traverseList(child);
+ } else if (nodeName.equals(TAG_ORDERED_LIST)) {
+ traverseList(child);
+ } else {
+ Debug.log(Debug.ERROR, "<INVALID-XML-BUG " + " />");
+ }
+ }
+ }
+ }
+ Debug.log(Debug.TRACE, "</LIST-ITEM>");
+ }
+ /**
+ * Look up a <code>Node</code> object's named attribute and return
+ * its value
+ *
+ * @param node The <code>Node</code>.
+ * @param name The attribute name.
+ *
+ * @return The value of the named attribute
+ */
+ private String findAttribute(Node node, String name) {
+ NamedNodeMap attrNodes = node.getAttributes();
+ if (attrNodes != null) {
+ int len = attrNodes.getLength();
+ for (int i = 0; i < len; i++) {
+ Node attr = attrNodes.item(i);
+ if (attr.getNodeName().equals(name))
+ return attr.getNodeValue();
+ }
+ }
+ return null;
+ }