summaryrefslogtreecommitdiff
path: root/sw/source/filter/ascii/parasc.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'sw/source/filter/ascii/parasc.cxx')
-rw-r--r--sw/source/filter/ascii/parasc.cxx515
1 files changed, 515 insertions, 0 deletions
diff --git a/sw/source/filter/ascii/parasc.cxx b/sw/source/filter/ascii/parasc.cxx
new file mode 100644
index 000000000000..fd4ab7226e1a
--- /dev/null
+++ b/sw/source/filter/ascii/parasc.cxx
@@ -0,0 +1,515 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*************************************************************************
+ *
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * Copyright 2000, 2010 Oracle and/or its affiliates.
+ *
+ * OpenOffice.org - a multi-platform office productivity suite
+ *
+ * This file is part of OpenOffice.org.
+ *
+ * OpenOffice.org is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License version 3
+ * only, as published by the Free Software Foundation.
+ *
+ * OpenOffice.org is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License version 3 for more details
+ * (a copy is included in the LICENSE file that accompanied this code).
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * version 3 along with OpenOffice.org. If not, see
+ * <http://www.openoffice.org/license.html>
+ * for a copy of the LGPLv3 License.
+ *
+ ************************************************************************/
+
+// MARKER(update_precomp.py): autogen include statement, do not remove
+#include "precompiled_sw.hxx"
+
+#include <tools/stream.hxx>
+#include <hintids.hxx>
+#include <rtl/tencinfo.h>
+#include <sfx2/printer.hxx>
+#include <editeng/fontitem.hxx>
+#include <editeng/langitem.hxx>
+#include <editeng/brkitem.hxx>
+#include <editeng/scripttypeitem.hxx>
+#include <shellio.hxx>
+#include <doc.hxx>
+#include <swtypes.hxx>
+#include <ndtxt.hxx>
+#include <pam.hxx>
+#include <frmatr.hxx>
+#include <fltini.hxx>
+#include <pagedesc.hxx>
+#include <breakit.hxx>
+#include <swerror.h>
+#include <statstr.hrc> // ResId fuer Statusleiste
+#include <mdiexp.hxx> // ...Percent()
+#include <poolfmt.hxx>
+
+#include "vcl/metric.hxx"
+
+#define ASC_BUFFLEN 4096
+
+class SwASCIIParser
+{
+ SwDoc* pDoc;
+ SwPaM* pPam;
+ SvStream& rInput;
+ sal_Char* pArr;
+ const SwAsciiOptions& rOpt;
+ SfxItemSet* pItemSet;
+ long nFileSize;
+ sal_uInt16 nScript;
+ bool bNewDoc;
+
+ sal_uLong ReadChars();
+ void InsertText( const String& rStr );
+
+public:
+ SwASCIIParser( SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
+ int bReadNewDoc, const SwAsciiOptions& rOpts );
+ ~SwASCIIParser();
+
+ sal_uLong CallParser();
+};
+
+
+// Aufruf fuer die allg. Reader-Schnittstelle
+sal_uLong AsciiReader::Read( SwDoc &rDoc, const String&, SwPaM &rPam, const String & )
+{
+ if( !pStrm )
+ {
+ OSL_ENSURE( !this, "ASCII-Read ohne Stream" );
+ return ERR_SWG_READ_ERROR;
+ }
+
+ // Alle Ueberschriften sind normalerweise ohne Kapitelnummer.
+ // Darum hier explizit abschalten weil das Default jetzt wieder auf AN ist.
+ if( !bInsertMode )
+ Reader::SetNoOutlineNum( rDoc );
+
+ SwASCIIParser* pParser = new SwASCIIParser( &rDoc, rPam, *pStrm,
+ !bInsertMode, aOpt.GetASCIIOpts() );
+ sal_uLong nRet = pParser->CallParser();
+
+ delete pParser;
+ // after Read reset the options
+ aOpt.ResetASCIIOpts();
+ return nRet;
+}
+
+SwASCIIParser::SwASCIIParser(SwDoc* pD, const SwPaM& rCrsr, SvStream& rIn,
+ int bReadNewDoc, const SwAsciiOptions& rOpts)
+ : pDoc(pD), rInput(rIn), rOpt(rOpts), nFileSize(0), nScript(0)
+ , bNewDoc(bReadNewDoc)
+{
+ pPam = new SwPaM( *rCrsr.GetPoint() );
+ pArr = new sal_Char [ ASC_BUFFLEN + 2 ];
+
+ pItemSet = new SfxItemSet( pDoc->GetAttrPool(),
+ RES_CHRATR_FONT, RES_CHRATR_LANGUAGE,
+ RES_CHRATR_CJK_FONT, RES_CHRATR_CJK_LANGUAGE,
+ RES_CHRATR_CTL_FONT, RES_CHRATR_CTL_LANGUAGE,
+ 0 );
+
+ // set defaults from the options
+ if( rOpt.GetLanguage() )
+ {
+ SvxLanguageItem aLang( (LanguageType)rOpt.GetLanguage(),
+ RES_CHRATR_LANGUAGE );
+ pItemSet->Put( aLang );
+ pItemSet->Put( aLang, RES_CHRATR_CJK_LANGUAGE );
+ pItemSet->Put( aLang, RES_CHRATR_CTL_LANGUAGE );
+ }
+ if( rOpt.GetFontName().Len() )
+ {
+ Font aTextFont( rOpt.GetFontName(), Size( 0, 10 ) );
+ if( pDoc->getPrinter( false ) )
+ aTextFont = pDoc->getPrinter( false )->GetFontMetric( aTextFont );
+ SvxFontItem aFont( aTextFont.GetFamily(), aTextFont.GetName(),
+ aEmptyStr, aTextFont.GetPitch(), aTextFont.GetCharSet(), RES_CHRATR_FONT );
+ pItemSet->Put( aFont );
+ pItemSet->Put( aFont, RES_CHRATR_CJK_FONT );
+ pItemSet->Put( aFont, RES_CHRATR_CTL_FONT );
+ }
+}
+
+SwASCIIParser::~SwASCIIParser()
+{
+ delete pPam;
+ delete [] pArr;
+ delete pItemSet;
+}
+
+
+// Aufruf des Parsers
+sal_uLong SwASCIIParser::CallParser()
+{
+ rInput.Seek(STREAM_SEEK_TO_END);
+ rInput.ResetError();
+
+ nFileSize = rInput.Tell();
+ rInput.Seek(STREAM_SEEK_TO_BEGIN);
+ rInput.ResetError();
+
+ ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
+
+ SwPaM* pInsPam = 0;
+ xub_StrLen nSttCntnt = 0;
+ if (!bNewDoc)
+ {
+ const SwNodeIndex& rTmp = pPam->GetPoint()->nNode;
+ pInsPam = new SwPaM( rTmp, rTmp, 0, -1 );
+ nSttCntnt = pPam->GetPoint()->nContent.GetIndex();
+ }
+
+ SwTxtFmtColl *pColl = 0;
+
+ if (bNewDoc)
+ {
+ pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_HTML_PRE, false);
+ if (!pColl)
+ pColl = pDoc->GetTxtCollFromPool(RES_POOLCOLL_STANDARD,false);
+ if (pColl)
+ pDoc->SetTxtFmtColl(*pPam, pColl);
+ }
+
+ sal_uLong nError = ReadChars();
+
+ if( pItemSet )
+ {
+ // set only the attribute, for scanned scripts.
+ if( !( SCRIPTTYPE_LATIN & nScript ))
+ {
+ pItemSet->ClearItem( RES_CHRATR_FONT );
+ pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
+ }
+ if( !( SCRIPTTYPE_ASIAN & nScript ))
+ {
+ pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
+ pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
+ }
+ if( !( SCRIPTTYPE_COMPLEX & nScript ))
+ {
+ pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
+ pItemSet->ClearItem( RES_CHRATR_CTL_LANGUAGE );
+ }
+ if( pItemSet->Count() )
+ {
+ if( bNewDoc )
+ {
+ if (pColl)
+ {
+ // Using the pool defaults for the font causes significant
+ // trouble for the HTML filter, because it is not able
+ // to export the pool defaults (or to be more precice:
+ // the HTML filter is not able to detect whether a pool
+ // default has changed or not. Even a comparison with the
+ // HTMLi template does not work, because the defaults are
+ // not copied when a new doc is created. The result of
+ // comparing pool defaults therfor would be that the
+ // defaults are exported always if the have changed for
+ // text documents in general. That's not sensible, as well
+ // as it is not sensible to export them always.
+ sal_uInt16 aWhichIds[4] =
+ {
+ RES_CHRATR_FONT, RES_CHRATR_CJK_FONT,
+ RES_CHRATR_CTL_FONT, 0
+ };
+ sal_uInt16 *pWhichIds = aWhichIds;
+ while (*pWhichIds)
+ {
+ const SfxPoolItem *pItem;
+ if (SFX_ITEM_SET == pItemSet->GetItemState(*pWhichIds,
+ false, &pItem))
+ {
+ pColl->SetFmtAttr( *pItem );
+ pItemSet->ClearItem( *pWhichIds );
+ }
+ ++pWhichIds;
+ }
+ }
+ if (pItemSet->Count())
+ pDoc->SetDefault(*pItemSet);
+ }
+ else if( pInsPam )
+ {
+ // then set over the insert range the defined attributes
+ *pInsPam->GetMark() = *pPam->GetPoint();
+ pInsPam->GetPoint()->nNode++;
+ pInsPam->GetPoint()->nContent.Assign(
+ pInsPam->GetCntntNode(), nSttCntnt );
+
+ // !!!!!
+ OSL_ENSURE( !this, "Have to change - hard attr. to para. style" );
+ pDoc->InsertItemSet( *pInsPam, *pItemSet, 0 );
+ }
+ }
+ delete pItemSet, pItemSet = 0;
+ }
+
+ if( pInsPam )
+ delete pInsPam;
+
+ ::EndProgress( pDoc->GetDocShell() );
+ return nError;
+}
+
+sal_uLong SwASCIIParser::ReadChars()
+{
+ sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
+ long nReadCnt = 0, nLineLen = 0;
+ sal_Unicode cLastCR = 0;
+ bool bSwapUnicode = false;
+
+ const SwAsciiOptions *pUseMe=&rOpt;
+ SwAsciiOptions aEmpty;
+ if (nFileSize >= 2 &&
+ aEmpty.GetFontName() == rOpt.GetFontName() &&
+ aEmpty.GetCharSet() == rOpt.GetCharSet() &&
+ aEmpty.GetLanguage() == rOpt.GetLanguage() &&
+ aEmpty.GetParaFlags() == rOpt.GetParaFlags())
+ {
+ sal_uLong nLen, nOrig;
+ nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
+ CharSet eCharSet;
+ bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, &bSwapUnicode);
+ OSL_ENSURE(bRet, "Autodetect of text import without nag dialog must "
+ "have failed");
+ if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
+ {
+ aEmpty.SetCharSet(eCharSet);
+ rInput.SeekRel(-(long(nLen)));
+ }
+ else
+ rInput.SeekRel(-(long(nOrig)));
+ pUseMe=&aEmpty;
+ }
+
+ rtl_TextToUnicodeConverter hConverter=0;
+ rtl_TextToUnicodeContext hContext=0;
+ CharSet currentCharSet = pUseMe->GetCharSet();
+ if (RTL_TEXTENCODING_UCS2 != currentCharSet)
+ {
+ if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
+ currentCharSet = RTL_TEXTENCODING_ASCII_US;
+ hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
+ OSL_ENSURE( hConverter, "no string convert avaiable" );
+ if (!hConverter)
+ return ERROR_SW_READ_BASE;
+ bSwapUnicode = false;
+ hContext = rtl_createTextToUnicodeContext( hConverter );
+ }
+ else if (pUseMe != &aEmpty) //Already successfully figured out type
+ {
+ rInput.StartReadingUnicodeText( currentCharSet );
+ bSwapUnicode = rInput.IsEndianSwap();
+ }
+
+ String sWork;
+ sal_uLong nArrOffset = 0;
+
+ do {
+ if( pStt >= pEnd )
+ {
+ if( pLastStt != pStt )
+ InsertText( String( pLastStt ));
+
+ // lese einen neuen Block ein
+ sal_uLong lGCount;
+ if( SVSTREAM_OK != rInput.GetError() || 0 == (lGCount =
+ rInput.Read( pArr + nArrOffset,
+ ASC_BUFFLEN - nArrOffset )))
+ break; // aus der WHILE-Schleife heraus
+
+ /*
+ If there was some unconverted bytes on the last cycle then they
+ were put at the beginning of the array, so total bytes available
+ to convert this cycle includes them. If we found 0 following bytes
+ then we ignore the previous partial character.
+ */
+ lGCount+=nArrOffset;
+
+ if( hConverter )
+ {
+ sal_uInt32 nInfo;
+ sal_Size nNewLen = lGCount, nCntBytes;
+ sal_Unicode* pBuf = sWork.AllocBuffer( static_cast< xub_StrLen >(nNewLen) );
+
+ nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
+ pArr, lGCount, pBuf, nNewLen,
+ (
+ RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
+ RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
+ ),
+ &nInfo,
+ &nCntBytes );
+ if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
+ memmove( pArr, pArr + nCntBytes, nArrOffset );
+ sWork.ReleaseBufferAccess( static_cast< xub_StrLen >(nNewLen) );
+
+ pStt = pLastStt = sWork.GetBufferAccess();
+ pEnd = pStt + nNewLen;
+ }
+ else
+ {
+ pStt = pLastStt = (sal_Unicode*)pArr;
+ pEnd = (sal_Unicode*)(pArr + lGCount);
+
+ if( bSwapUnicode )
+ {
+ sal_Char* pF = pArr, *pN = pArr + 1;
+ for( sal_uLong n = 0; n < lGCount; n += 2, pF += 2, pN += 2 )
+ {
+ sal_Char c = *pF;
+ *pF = *pN;
+ *pN = c;
+ }
+ }
+ }
+
+ *pEnd = 0;
+ nReadCnt += lGCount;
+
+ ::SetProgressState( nReadCnt, pDoc->GetDocShell() );
+
+ if( cLastCR )
+ {
+ if( 0x0a == *pStt && 0x0d == cLastCR )
+ pLastStt = ++pStt;
+ cLastCR = 0;
+ nLineLen = 0;
+ // das letze am Ende nehmen wir nicht
+ if( !rInput.IsEof() || !(pEnd == pStt ||
+ ( !*pEnd && pEnd == pStt+1 ) ) )
+ pDoc->SplitNode( *pPam->GetPoint(), false );
+ }
+ }
+
+ bool bIns = true, bSplitNode = false;
+ switch( *pStt )
+ {
+
+ case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
+ {
+ bIns = false;
+ *pStt = 0;
+ ++pStt;
+
+ // das letze am Ende nehmen wir nicht
+ if( !rInput.IsEof() || pEnd != pStt )
+ bSplitNode = true;
+ }
+ break;
+
+ case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
+ {
+ bIns = false;
+ *pStt = 0;
+ ++pStt;
+
+ bool bChkSplit = false;
+ if( LINEEND_CRLF == pUseMe->GetParaFlags() )
+ {
+ if( pStt == pEnd )
+ cLastCR = 0x0d;
+ else if( 0x0a == *pStt )
+ {
+ ++pStt;
+ bChkSplit = true;
+ }
+ }
+ else
+ bChkSplit = true;
+
+ // das letze am Ende nehmen wir nicht
+ if( bChkSplit && ( !rInput.IsEof() || pEnd != pStt ))
+ bSplitNode = true;
+ }
+ break;
+
+ case 0x0c:
+ {
+ // dann mal einen harten Seitenumbruch einfuegen
+ *pStt++ = 0;
+ if( nLineLen )
+ {
+ InsertText( String( pLastStt ));
+ }
+ pDoc->SplitNode( *pPam->GetPoint(), false );
+ pDoc->InsertPoolItem( *pPam, SvxFmtBreakItem(
+ SVX_BREAK_PAGE_BEFORE, RES_BREAK ), 0);
+ pLastStt = pStt;
+ nLineLen = 0;
+ bIns = false;
+ }
+ break;
+
+ case 0x1a:
+ if( nReadCnt == nFileSize && pStt+1 == pEnd )
+ *pStt = 0;
+ else
+ *pStt = '#'; // Ersatzdarstellung
+ break;
+
+ case '\t': break;
+
+ default:
+ if( ' ' > *pStt )
+ // Ctrl-Zchn gefunden ersetze durch '#'
+ *pStt = '#';
+ break;
+ }
+
+ if( bIns )
+ {
+ if( ( nLineLen >= MAX_ASCII_PARA - 100 ) &&
+ ( ( *pStt == ' ' ) || ( nLineLen >= MAX_ASCII_PARA - 1 ) ) )
+ {
+ sal_Unicode c = *pStt;
+ *pStt = 0;
+ InsertText( String( pLastStt ));
+ pDoc->SplitNode( *pPam->GetPoint(), false );
+ pLastStt = pStt;
+ nLineLen = 0;
+ *pStt = c;
+ }
+ ++pStt;
+ ++nLineLen;
+ }
+ else if( bSplitNode )
+ {
+ // es wurde ein CR/LF erkannt, also speichere den Text
+
+ InsertText( String( pLastStt ));
+ pDoc->SplitNode( *pPam->GetPoint(), false );
+ pLastStt = pStt;
+ nLineLen = 0;
+ }
+ } while(true);
+
+ if( hConverter )
+ {
+ rtl_destroyTextToUnicodeContext( hConverter, hContext );
+ rtl_destroyTextToUnicodeConverter( hConverter );
+ }
+ return 0;
+}
+
+void SwASCIIParser::InsertText( const String& rStr )
+{
+ pDoc->InsertString( *pPam, rStr );
+ if( pItemSet && pBreakIt && nScript != ( SCRIPTTYPE_LATIN |
+ SCRIPTTYPE_ASIAN |
+ SCRIPTTYPE_COMPLEX ) )
+ nScript |= pBreakIt->GetAllScriptsOfText( rStr );
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */