summaryrefslogtreecommitdiff
path: root/sc/source/filter/inc/htmlpars.hxx
diff options
context:
space:
mode:
authorJens-Heiner Rechtien <hr@openoffice.org>2003-03-26 17:07:02 +0000
committerJens-Heiner Rechtien <hr@openoffice.org>2003-03-26 17:07:02 +0000
commit555d702903fb0857122024e1ab78a72d122d3f16 (patch)
tree6c749e09d7b761f4ce353bedcc31627a38529196 /sc/source/filter/inc/htmlpars.hxx
parentff721adbe05f7a6659d9dffcf46c671f8a644da7 (diff)
MWS_SRX644: migrate branch mws_srx644 -> HEAD
Diffstat (limited to 'sc/source/filter/inc/htmlpars.hxx')
-rw-r--r--sc/source/filter/inc/htmlpars.hxx616
1 files changed, 464 insertions, 152 deletions
diff --git a/sc/source/filter/inc/htmlpars.hxx b/sc/source/filter/inc/htmlpars.hxx
index 541d158d2939..73563659e433 100644
--- a/sc/source/filter/inc/htmlpars.hxx
+++ b/sc/source/filter/inc/htmlpars.hxx
@@ -2,9 +2,9 @@
*
* $RCSfile: htmlpars.hxx,v $
*
- * $Revision: 1.5 $
+ * $Revision: 1.6 $
*
- * last change: $Author: dr $ $Date: 2001-04-06 12:38:30 $
+ * last change: $Author: hr $ $Date: 2003-03-26 18:05:03 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -66,12 +66,43 @@
#include <tools/stack.hxx>
#endif
+#include <memory>
+#include <vector>
+#include <list>
+#include <map>
+
#include "rangelst.hxx"
#include "eeparser.hxx"
-#define SC_HTML_FONTSIZES 7 // wie Export, HTML-Options
+const sal_uInt32 SC_HTML_FONTSIZES = 7; // wie Export, HTML-Options
#define SC_HTML_OFFSET_TOL 10 // Pixeltoleranz fuer SeekOffset
+// ============================================================================
+// BASE class for HTML parser classes
+// ============================================================================
+
+class ScHTMLTable;
+
+/** Base class for HTML parser classes. */
+class ScHTMLParser : public ScEEParser
+{
+protected:
+ sal_uInt32 maFontHeights[ SC_HTML_FONTSIZES ];
+ ScDocument* mpDoc; /// The destination document.
+
+public:
+ explicit ScHTMLParser( EditEngine* pEditEngine, ScDocument* pDoc );
+ virtual ~ScHTMLParser();
+
+ virtual sal_uInt32 Read( SvStream& rStrm ) = 0;
+
+ /** Returns the "global table" which contains the entire HTML document. */
+ virtual const ScHTMLTable* GetGlobalTable() const = 0;
+};
+
+
+// ============================================================================
+
SV_DECL_VARARR_SORT( ScHTMLColOffset, ULONG, 16, 4);
struct ScHTMLTableStackEntry
@@ -123,161 +154,20 @@ struct ScHTMLAdjustStackEntry
DECLARE_STACK( ScHTMLAdjustStack, ScHTMLAdjustStackEntry* );
-//------------------------------------------------------------------------
-
-enum ScHTMLTableDataKey { tdCol, tdRow };
-
-class ScHTMLTableDataTable;
-
-class ScHTMLTableData // data for one HTML table
-{
-private:
- String aTableName; // <TABLE ID="name">
- Table aColCount; // count of cells per HTML column
- Table aRowCount; // count of cells per HTML row
- USHORT nFirstCol; // first column index
- USHORT nFirstRow; // first row index
- USHORT nLastCol; // last column index
- USHORT nLastRow; // last row index
- USHORT nColSpan; // column spanning of the parent cell
- USHORT nRowSpan; // row spanning of the parent cell
- USHORT nDocCol; // resulting position in ScDoc
- USHORT nDocRow; // resulting position in ScDoc
- BOOL bBorderOn; // table borders
-
- ScHTMLTableDataTable* pNestedTables; // table of nested HTML tables
-
- inline Table& GetDataTable( ScHTMLTableDataKey eCRKey )
- { return (eCRKey == tdCol) ? aColCount : aRowCount; }
- inline const Table& GetDataTable( ScHTMLTableDataKey eCRKey ) const
- { return (eCRKey == tdCol) ? aColCount : aRowCount; }
-
- static USHORT GetCount( const Table& rDataTab, ULONG nIndex );
- static USHORT GetSize( const Table& rDataTab, ULONG nStart, ULONG nEnd );
-
- USHORT CalcDocPos( ScHTMLTableDataKey eCRKey, ULONG nIndex ) const;
- USHORT CalcSpan( ScHTMLTableDataKey eCRKey, ULONG nIndex, USHORT nSpan ) const;
-
- void SetMaxCount( Table& rDataTab, ULONG nIndex, USHORT nCount );
- void ChangeDocCoord( short nColDiff, short nRowDiff );
-
- void RecalcSizeDim( ScHTMLTableDataKey eCRKey );
-
-public:
- ScHTMLTableData(
- const String& rTabName,
- USHORT _nFirstCol, USHORT _nFirstRow,
- USHORT _nColSpan, USHORT _nRowSpan,
- BOOL bBorder );
- ~ScHTMLTableData();
-
- inline USHORT GetFirstCol() const { return nFirstCol; }
- inline USHORT GetFirstRow() const { return nFirstRow; }
- inline USHORT GetLastCol() const { return nLastCol; }
- inline USHORT GetLastRow() const { return nLastRow; }
- inline const String& GetTableName() const { return aTableName; }
- void GetRange( ScRange& rRange ) const;
-
- inline USHORT GetFirst( ScHTMLTableDataKey eCRKey ) const
- { return (eCRKey == tdCol) ? nFirstCol : nFirstRow; }
- inline USHORT GetLast( ScHTMLTableDataKey eCRKey ) const
- { return (eCRKey == tdCol) ? nLastCol : nLastRow; }
- inline USHORT GetDocPos( ScHTMLTableDataKey eCRKey ) const
- { return (eCRKey == tdCol) ? nDocCol : nDocRow; }
- inline USHORT GetSpan( ScHTMLTableDataKey eCRKey ) const
- { return (eCRKey == tdCol) ? nColSpan : nRowSpan; }
- inline USHORT GetCount( ScHTMLTableDataKey eCRKey, ULONG nIndex ) const
- { return GetCount( GetDataTable( eCRKey ), nIndex ); }
- inline USHORT GetSize( ScHTMLTableDataKey eCRKey ) const
- { return GetSize( GetDataTable( eCRKey ), GetFirst( eCRKey ), GetLast( eCRKey ) ); }
-
- inline USHORT CalcDocCol( ULONG nIndex ) const { return CalcDocPos( tdCol, nIndex ); }
- inline USHORT CalcDocRow( ULONG nIndex ) const { return CalcDocPos( tdRow, nIndex ); }
- inline USHORT CalcColSpan( ULONG nIndex, USHORT nSpan ) const { return CalcSpan( tdCol, nIndex, nSpan ); }
- inline USHORT CalcRowSpan( ULONG nIndex, USHORT nSpan ) const { return CalcSpan( tdRow, nIndex, nSpan ); }
-
- ScHTMLTableData* GetNestedTable( ULONG nTab ) const;
-
- ScHTMLTableData* InsertNestedTable(
- ULONG nTab, const String& rTabName,
- USHORT _nFirstCol, USHORT _nFirstRow,
- USHORT _nColSpan, USHORT _nRowSpan,
- BOOL bBorder );
- inline void SetCellCoord( USHORT nCol, USHORT nRow );
- void SetDocCoord( USHORT nCol, USHORT nRow );
-
- void RecalcSize();
- void SetCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos );
-};
-
-inline void ScHTMLTableData::SetCellCoord( USHORT nCol, USHORT nRow )
-{
- nLastCol = Max( nLastCol, nCol );
- nLastRow = Max( nLastRow, nRow );
-}
-
-
-
-class ScHTMLTableDataTable : private Table
-{
-private:
- ScHTMLTableData* pCurrTab;
- ULONG nCurrTab;
-
- USHORT nSearchMin1;
- USHORT nSearchMin2;
- ScHTMLTableDataKey eSearchKey;
-
- inline void _Insert( ULONG nTab, ScHTMLTableData* pTable )
- { Table::Insert( nTab, pTable ); }
-
- inline ScHTMLTableData* _Get( ULONG nTab ) const
- { return (ScHTMLTableData*) Table::Get( nTab ); }
-
- inline void SetCurrTable( ULONG nTab, ScHTMLTableData* pTab )
- { nCurrTab = nTab; pCurrTab = pTab; }
-public:
- ScHTMLTableDataTable();
- virtual ~ScHTMLTableDataTable();
-
- ScHTMLTableData* GetTable( ULONG nTab );
- inline ScHTMLTableData* GetFirst() { return (ScHTMLTableData*) Table::First(); }
- inline ScHTMLTableData* GetNext() { return (ScHTMLTableData*) Table::Next(); }
- ScHTMLTableData* GetFirstInOrder( ScHTMLTableDataKey eKey );
- ScHTMLTableData* GetNextInOrder();
-
- USHORT GetNextFreeRow( ULONG nTab );
-
- ScHTMLTableData* InsertTable(
- ULONG nTab, const String& rTabName,
- USHORT nFirstCol, USHORT nFirstRow,
- USHORT nColSpan, USHORT nRowSpan,
- BOOL bBorder, ULONG nNestedIn = 0 );
- void SetCellCoord( ULONG nTab, USHORT nCol, USHORT nRow );
- void RecalcSizes();
-
- void Recalc();
- void SetCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos );
-};
-
-//------------------------------------------------------------------------
-
+// ============================================================================
class EditEngine;
class ScDocument;
class HTMLOption;
-class ScHTMLParser : public ScEEParser
+class ScHTMLLayoutParser : public ScHTMLParser
{
private:
- ULONG aFontHeights[SC_HTML_FONTSIZES];
+ Size aPageSize;
ScHTMLTableStack aTableStack;
- Size aPageSize; // in Pixeln
String aString;
ScRangeListRef xLockedList; // je Table
- ScDocument* pDoc;
Table* pTables;
- ScHTMLTableDataTable* pTableData; // data for each HTML table
ScHTMLColOffset* pColOffset;
ScHTMLColOffset* pLocalColOffset; // je Table
ULONG nFirstTableCell; // je Table
@@ -322,28 +212,450 @@ private:
USHORT GetWidth( ScEEParseEntry* );
void SetWidths();
void Adjust();
- void AdjustNoWidthHeight();
USHORT GetWidthPixel( const HTMLOption* );
BOOL IsAtBeginningOfText( ImportInfo* );
void TableOn( ImportInfo* );
void ColOn( ImportInfo* );
+ void TableRowOn( ImportInfo* );
+ void TableRowOff( ImportInfo* );
void TableDataOn( ImportInfo* );
+ void TableDataOff( ImportInfo* );
void TableOff( ImportInfo* );
void Image( ImportInfo* );
void AnchorOn( ImportInfo* );
void FontOn( ImportInfo* );
public:
- ScHTMLParser( EditEngine*, const Size& aPageSize,
- ScDocument*, BOOL _bCalcWidthHeight );
- virtual ~ScHTMLParser();
+ ScHTMLLayoutParser( EditEngine*, const Size& aPageSize, ScDocument* );
+ virtual ~ScHTMLLayoutParser();
virtual ULONG Read( SvStream& );
+ virtual const ScHTMLTable* GetGlobalTable() const;
+};
+
+
+
+// ============================================================================
+// HTML DATA QUERY PARSER
+// ============================================================================
+
+/** Declares the orientation in or for a table: column or row. */
+enum ScHTMLOrient { tdCol = 0 , tdRow = 1 };
+
+/** Type for a unique identifier for each table. */
+typedef sal_uInt16 ScHTMLTableId;
+/** Identifier of the "global table" (the entire HTML document). */
+const ScHTMLTableId SC_HTML_GLOBAL_TABLE = 0;
+/** Used as table index for normal (non-table) entries in ScHTMLEntry structs. */
+const ScHTMLTableId SC_HTML_NO_TABLE = 0;
+
+
+// ============================================================================
+
+/** A 2D cell position in an HTML table. */
+struct ScHTMLPos
+{
+ sal_uInt16 mnCol;
+ sal_uInt16 mnRow;
+
+ inline explicit ScHTMLPos() : mnCol( 0 ), mnRow( 0 ) {}
+ inline explicit ScHTMLPos( sal_uInt16 nCol, sal_uInt16 nRow ) :
+ mnCol( nCol ), mnRow( nRow ) {}
+ inline explicit ScHTMLPos( const ScAddress& rAddr ) { Set( rAddr ); }
+
+ inline sal_uInt16 Get( ScHTMLOrient eOrient ) const
+ { return (eOrient == tdCol) ? mnCol : mnRow; }
+ inline void Set( sal_uInt16 nCol, sal_uInt16 nRow )
+ { mnCol = nCol; mnRow = nRow; }
+ inline void Set( const ScAddress& rAddr )
+ { Set( rAddr.Col(), rAddr.Row() ); }
+ inline void Move( sal_Int16 nColDiff, sal_Int16 nRowDiff )
+ { mnCol += nColDiff; mnRow += nRowDiff; }
+ inline ScAddress MakeAddr() const
+ { return ScAddress( mnCol, mnRow, 0 ); }
+};
- ScHTMLTableDataTable* GetHTMLTables() const { return pTableData; }
+inline bool operator==( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
+{
+ return (rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol == rPos2.mnCol);
+}
+
+inline bool operator<( const ScHTMLPos& rPos1, const ScHTMLPos& rPos2 )
+{
+ return (rPos1.mnRow < rPos2.mnRow) || ((rPos1.mnRow == rPos2.mnRow) && (rPos1.mnCol < rPos2.mnCol));
+}
+
+
+// ----------------------------------------------------------------------------
+
+/** A 2D cell size in an HTML table. */
+struct ScHTMLSize
+{
+ sal_uInt16 mnCols;
+ sal_uInt16 mnRows;
+
+ inline explicit ScHTMLSize() : mnCols( 0 ), mnRows( 0 ) {}
+ inline explicit ScHTMLSize( sal_uInt16 nCols, sal_uInt16 nRows ) :
+ mnCols( nCols ), mnRows( nRows ) {}
+
+ inline sal_uInt16 Get( ScHTMLOrient eOrient ) const
+ { return (eOrient == tdCol) ? mnCols : mnRows; }
+ inline void Set( sal_uInt16 nCols, sal_uInt16 nRows )
+ { mnCols = nCols; mnRows = nRows; }
+ inline void Expand( sal_Int16 nColDiff, sal_Int16 nRowDiff )
+ { mnCols += nColDiff; mnRows += nRowDiff; }
+};
+
+inline bool operator==( const ScHTMLSize& rSize1, const ScHTMLSize& rSize2 )
+{
+ return (rSize1.mnRows == rSize2.mnRows) && (rSize1.mnCols == rSize2.mnCols);
+}
+
+
+// ============================================================================
+
+/** A single entry containing a line of text or representing a table. */
+struct ScHTMLEntry : public ScEEParseEntry
+{
+private:
+ bool mbImportAlways; /// true = Always import this entry.
+
+public:
+ explicit ScHTMLEntry(
+ const SfxItemSet& rItemSet,
+ ScHTMLTableId nTableId = SC_HTML_NO_TABLE );
+
+ /** Returns true, if the selection of the entry is empty. */
+ inline bool IsEmpty() const { return !aSel.HasRange(); }
+ /** Returns true, if the entry has any content to be imported. */
+ bool HasContents() const;
+ /** Returns true, if the entry represents a table. */
+ inline bool IsTable() const { return nTab != SC_HTML_NO_TABLE; }
+ /** Returns true, if the entry represents a table. */
+ inline ScHTMLTableId GetTableId() const { return nTab; }
+
+ /** Sets or cleares the import always state. */
+ inline void SetImportAlways( bool bSet = true ) { mbImportAlways = bSet; }
+ /** Sets start point of the entry selection to the start of the import info object. */
+ void AdjustStart( const ImportInfo& rInfo );
+ /** Sets end point of the entry selection to the end of the import info object. */
+ void AdjustEnd( const ImportInfo& rInfo );
+ /** Deletes leading and trailing empty paragraphs from the entry. */
+ void Strip( const EditEngine& rEditEngine );
+};
+
+
+// ============================================================================
+
+/** This struct handles creation of unique table identifiers. */
+struct ScHTMLTableAutoId
+{
+ const ScHTMLTableId mnTableId; /// The created unique table identifier.
+ ScHTMLTableId& mrnUnusedId; /// Reference to global unused identifier variable.
+
+ /** The constructor assigns an unused identifier to member mnTableId. */
+ explicit ScHTMLTableAutoId( ScHTMLTableId& rnUnusedId );
+};
+
+
+// ----------------------------------------------------------------------------
+
+class ScHTMLTableMap;
+
+/** Stores data for one table in an HTML document.
+ @descr This class does the main work for importing an HTML document. It manages
+ the correct insertion of parse entries into the correct cells and the creation
+ of nested tables. Recalculation of resulting document size and position is done
+ recursively in all nested tables. */
+class ScHTMLTable
+{
+private:
+ typedef ::std::auto_ptr< ScHTMLTableMap > ScHTMLTableMapPtr;
+ typedef ::std::auto_ptr< SfxItemSet > SfxItemSetPtr;
+ typedef ::std::vector< sal_uInt16 > ScSizeVec;
+ typedef ::std::list< ScHTMLEntry* > ScHTMLEntryList;
+ typedef ::std::map< ScHTMLPos, ScHTMLEntryList > ScHTMLEntryMap;
+ typedef ::std::auto_ptr< ScHTMLEntry > ScHTMLEntryPtr;
+
+ ScHTMLTable* mpParentTable; /// Pointer to parent table.
+ ScHTMLTableMapPtr mpNestedTables; /// Table of nested HTML tables.
+ String maTableName; /// Table name from <table id> option.
+ ScHTMLTableAutoId maTableId; /// Unique identifier of this table.
+ SfxItemSet maTableItemSet; /// Items for the entire table.
+ SfxItemSetPtr mpRowItemSet; /// Items for the current table row.
+ SfxItemSetPtr mpDataItemSet; /// Items for the current cell.
+ ScRangeList maLockList; /// Locked cells (needed for merged cells).
+ EditEngine& mrEditEngine; /// Edit engine (from ScEEParser).
+ ScEEParseList& mrEEParseList; /// List that owns the parse entries (from ScEEParser).
+ ScHTMLEntryMap maEntryMap; /// List of entries for each cell.
+ ScHTMLEntryList* mpCurrEntryList; /// Current entry list from map for faster access.
+ ScHTMLEntryPtr mpCurrEntry; /// Working entry, not yet inserted in a list.
+ ScSizeVec maSizes[ 2 ]; /// Calc cell count of each HTML table column/row.
+ ScHTMLSize maSize; /// Size of the table.
+ ScHTMLPos maCurrCell; /// Address of current cell to fill.
+ ScHTMLPos maDocBasePos; /// Resulting base address in a Calc document.
+ bool mbBorderOn; /// true = Table borders on.
+ bool mbPreFormText; /// true = Table from preformatted text (<pre> tag).
+ bool mbRowOn; /// true = Inside of <tr> </tr>.
+ bool mbDataOn; /// true = Inside of <td> </td> or <th> </th>.
+ bool mbPushEmptyLine; /// true = Insert empty line before current entry.
+
+public:
+ /** Creates a new HTML table without content.
+ @descr Internally handles a current cell position. This position is invalid
+ until first calls of RowOn() and DataOn().
+ @param rParentTable Reference to the parent table that owns this table.
+ @param bPreFormText true = Table is based on preformatted text (<pre> tag). */
+ explicit ScHTMLTable(
+ ScHTMLTable& rParentTable,
+ const ImportInfo& rInfo,
+ bool bPreFormText );
+
+ virtual ~ScHTMLTable();
+
+ /** Returns the name of the table, specified in the TABLE tag. */
+ inline const String& GetTableName() const { return maTableName; }
+ /** Returns the unique identifier of the table. */
+ inline ScHTMLTableId GetTableId() const { return maTableId.mnTableId; }
+ /** Returns the table size. */
+ inline const ScHTMLSize& GetSize() const { return maSize; }
+ /** Returns the cell spanning of the specified cell. */
+ ScHTMLSize GetSpan( const ScHTMLPos& rCellPos );
+
+ /** Searches in all nested tables for the specified table.
+ @param nTableId Unique identifier of the table. */
+ ScHTMLTable* FindNestedTable( ScHTMLTableId nTableId ) const;
+
+ /** Puts the item into the item set of the current entry. */
+ void PutItem( const SfxPoolItem& rItem );
+ /** Inserts a text portion into current entry. */
+ void PutText( const ImportInfo& rInfo );
+ /** Inserts a new line, if in preformatted text, else does nothing. */
+ void InsertPara( const ImportInfo& rInfo );
+
+ /** Inserts a line break (<br> tag).
+ @descr Inserts the current entry regardless if it is empty. */
+ void BreakOn();
+ /** Inserts a heading line (<p> and <h*> tags). */
+ void HeadingOn();
+ /** Processes a hyperlink (<a> tag). */
+ void AnchorOn();
+
+ /** Starts a *new* table nested in this table (<table> tag).
+ @return Pointer to the new table. */
+ ScHTMLTable* TableOn( const ImportInfo& rInfo );
+ /** Closes *this* table (</table> tag).
+ @return Pointer to the parent table. */
+ ScHTMLTable* TableOff( const ImportInfo& rInfo );
+ /** Starts a *new* table based on preformatted text (<pre> tag).
+ @return Pointer to the new table. */
+ ScHTMLTable* PreOn( const ImportInfo& rInfo );
+ /** Closes *this* table based on preformatted text (</pre> tag).
+ @return Pointer to the parent table. */
+ ScHTMLTable* PreOff( const ImportInfo& rInfo );
+
+ /** Starts next row (<tr> tag).
+ @descr Cell address is invalid until first call of DataOn(). */
+ void RowOn( const ImportInfo& rInfo );
+ /** Closes the current row (<tr> tag).
+ @descr Cell address is invalid until call of RowOn() and DataOn(). */
+ void RowOff( const ImportInfo& rInfo );
+ /** Starts the next cell (<td> or <th> tag). */
+ void DataOn( const ImportInfo& rInfo );
+ /** Closes the current cell (</td> or </th> tag).
+ @descr Cell address is invalid until next call of DataOn(). */
+ void DataOff( const ImportInfo& rInfo );
+
+ /** Starts the body of the HTML document (<body> tag). */
+ void BodyOn( const ImportInfo& rInfo );
+ /** Closes the body of the HTML document (</body> tag). */
+ void BodyOff( const ImportInfo& rInfo );
+
+ /** Returns the resulting document row/column count of the specified HTML row/column. */
+ sal_uInt16 GetDocSize( ScHTMLOrient eOrient, sal_uInt16 nCellPos ) const;
+ /** Returns the resulting document row/column count in the range [nCellBegin, nCellEnd). */
+ sal_uInt16 GetDocSize( ScHTMLOrient eOrient, sal_uInt16 nCellBegin, sal_uInt16 nCellEnd ) const;
+ /** Returns the total document row/column count in the specified direction. */
+ sal_uInt16 GetDocSize( ScHTMLOrient eOrient ) const;
+
+ /** Returns the resulting Calc position of the top left edge of the table. */
+ inline const ScHTMLPos& GetDocPos() const { return maDocBasePos; }
+ /** Calculates the resulting Calc position of the specified HTML column/row. */
+ sal_uInt16 GetDocPos( ScHTMLOrient eOrient, sal_uInt16 nCellPos = 0 ) const;
+ /** Calculates the resulting Calc position of the specified HTML cell. */
+ ScHTMLPos GetDocPos( const ScHTMLPos& rCellPos ) const;
+ /** Calculates the current Calc document area of this table. */
+ void GetDocRange( ScRange& rRange ) const;
+
+ /** Applies border formatting to the passed document. */
+ void ApplyCellBorders( ScDocument* pDoc, const ScAddress& rFirstPos ) const;
+
+private:
+ /** Returns true, if the current cell does not contain an entry yet. */
+ bool IsEmptyCell() const;
+ /** Returns the item set from cell, row, or table, depending on current state. */
+ const SfxItemSet& GetCurrItemSet() const;
+
+ /** Returns true, if import info represents a space character. */
+ static bool IsSpaceCharInfo( const ImportInfo& rInfo );
+
+ /** Creates and returns a new empty flying entry at position (0,0). */
+ ScHTMLEntry* CreateEntry() const;
+ /** Creates a new flying entry.
+ @param rInfo Contains the initial edit engine selection for the entry. */
+ void CreateNewEntry( const ImportInfo& rInfo );
+
+ /** Inserts an empty line in front of the next entry. */
+ void InsertLeadingEmptyLine();
+
+ /** Pushes the passed entry into the list of the current cell. */
+ void ImplPushEntryToList( ScHTMLEntryList& rEntryList, ScHTMLEntryPtr& rpEntry );
+ /** Tries to insert the entry into the current cell.
+ @descr If insertion is not possible (i.e., currently no cell open), the
+ entry will be inserted into the parent table. */
+ void PushEntry( ScHTMLEntryPtr& rpEntry );
+ /** Puts the current entry into the entry list, if it is not empty.
+ @param rInfo The import info struct containing the end position of the current entry.
+ @param bLastInCell true = If cell is still empty, put this entry always. */
+ void PushEntry( const ImportInfo& rInfo, bool bLastinCell = false );
+ /** Pushes a new entry into current cell which references a nested table. */
+ void PushTableEntry( ScHTMLTableId nTableId );
+
+ /** Tries to find a table from the table container.
+ @descr Assumes that the table is located in the current container or
+ that the passed table identifier is 0.
+ @param nTableId Unique identifier of the table or 0. */
+ ScHTMLTable* GetExistingTable( ScHTMLTableId nTableId ) const;
+ /** Inserts a nested table in the current cell at the specified position.
+ @param bPreFormText true = New table is based on preformatted text (<pre> tag). */
+ ScHTMLTable* InsertNestedTable( const ImportInfo& rInfo, bool bPreFormText );
+ /** Closes this table and inserts it into parent table.
+ @return Pointer to the parent table, or this, if no parent found. */
+ ScHTMLTable* CloseTable( const ImportInfo& rInfo );
+
+ /** Inserts a new cell in an unused position, starting from current cell position. */
+ void InsertNewCell( const ScHTMLSize& rSpanSize );
+
+ /** Set internal states for a new table row. */
+ void ImplRowOn();
+ /** Set internal states for leaving a table row. */
+ void ImplRowOff();
+ /** Set internal states for entering a new table cell. */
+ void ImplDataOn( const ScHTMLSize& rSpanSize );
+ /** Set internal states for leaving a table cell. */
+ void ImplDataOff();
+
+ /** Inserts additional formatting options from import info into the item set. */
+ void ProcessFormatOptions( SfxItemSet& rItemSet, const ImportInfo& rInfo );
+
+ /** Updates the document column/row size of the specified column or row.
+ @descr Only increases the present count, never decreases. */
+ void SetDocSize( ScHTMLOrient eOrient, sal_uInt16 nCellPos, sal_uInt16 nSize );
+ /** Calculates and sets the resulting size the cell needs in the document.
+ @descr Reduces the needed size in merged cells.
+ @param nCellPos The first column/row position of the (merged) cell.
+ @param nCellSpan The cell spanning in the specified orientation.
+ @param nRealDocSize The raw document size of all entries of the cell. */
+ void CalcNeededDocSize(
+ ScHTMLOrient eOrient, sal_uInt16 nCellPos,
+ sal_uInt16 nCellSpan, sal_uInt16 nRealDocSize );
+
+protected:
+ /** Creates a new HTML table without parent.
+ @descr This constructor is used to create the "global table". */
+ explicit ScHTMLTable(
+ SfxItemPool& rPool,
+ EditEngine& rEditEngine,
+ ScEEParseList& rEEParseList,
+ ScHTMLTableId& rnUnusedId );
+
+ /** Fills all empty cells in this and nested tables with dummy parse entries. */
+ void FillEmptyCells();
+ /** Recalculates the size of all columns/rows in the table, regarding nested tables. */
+ void RecalcDocSize();
+ /** Recalculates the position of all cell entries and nested tables.
+ @param rBasePos The origin of the table in the Calc document. */
+ void RecalcDocPos( const ScHTMLPos& rBasePos );
+};
+
+
+// ----------------------------------------------------------------------------
+
+/** The "global table" representing the entire HTML document. */
+class ScHTMLGlobalTable : public ScHTMLTable
+{
+public:
+ explicit ScHTMLGlobalTable(
+ SfxItemPool& rPool,
+ EditEngine& rEditEngine,
+ ScEEParseList& rEEParseList,
+ ScHTMLTableId& rnUnusedId );
+
+ virtual ~ScHTMLGlobalTable();
+
+ /** Recalculates sizes and resulting positions of all document entries. */
+ void Recalc();
};
+// ============================================================================
+
+/** The HTML parser for data queries. Focuses on data import, not on layout.
+ @descr Builds the table structure correctly, ignores extended formatting
+ like pictures or column widths. */
+class ScHTMLQueryParser : public ScHTMLParser
+{
+private:
+ typedef ::std::auto_ptr< ScHTMLGlobalTable > ScHTMLGlobalTablePtr;
+
+ String maTitle; /// The title of the document.
+ ScHTMLGlobalTablePtr mpGlobTable; /// Contains the entire imported document.
+ ScHTMLTable* mpCurrTable; /// Pointer to current table (performance).
+ ScHTMLTableId mnUnusedId; /// First unused table identifier.
+ bool mbTitleOn; /// true = Inside of <title> </title>.
+
+public:
+ explicit ScHTMLQueryParser( EditEngine* pEditEngine, ScDocument* pDoc );
+ virtual ~ScHTMLQueryParser();
+
+ virtual sal_uInt32 Read( SvStream& rStrm );
+
+ /** Returns the "global table" which contains the entire HTML document. */
+ virtual const ScHTMLTable* GetGlobalTable() const;
+
+private:
+ /** Returns the specified table (may be maGlobTable, if no real table can be found). */
+ ScHTMLTable* GetTable( ScHTMLTableId nTableId ) const;
+
+ /** Handles all possible tags in the HTML document. */
+ void ProcessToken( const ImportInfo& rInfo );
+ /** Inserts a text portion into current entry. */
+ void InsertText( const ImportInfo& rInfo );
+ /** Processes the <font> tag. */
+ void FontOn( const ImportInfo& rInfo );
+
+ /** Processes the <meta> tag. */
+ void MetaOn( const ImportInfo& rInfo );
+ /** Opens the title of the HTML document (<title> tag). */
+ void TitleOn( const ImportInfo& rInfo );
+ /** Closes the title of the HTML document (</title> tag). */
+ void TitleOff( const ImportInfo& rInfo );
+
+ /** Opens a new table at the current position. */
+ void TableOn( const ImportInfo& rInfo );
+ /** Closes the current table. */
+ void TableOff( const ImportInfo& rInfo );
+ /** Opens a new table based on preformatted text. */
+ void PreOn( const ImportInfo& rInfo );
+ /** Closes the current preformatted text table. */
+ void PreOff( const ImportInfo& rInfo );
+
+ DECL_LINK( HTMLImportHdl, const ImportInfo* );
+};
+
+
+// ============================================================================
+
#endif