diff options
author | Caolán McNamara <cmc@openoffice.org> | 2002-08-13 13:52:35 +0000 |
---|---|---|
committer | Caolán McNamara <cmc@openoffice.org> | 2002-08-13 13:52:35 +0000 |
commit | 5ea8591b52ec0cb6afdc0b2118ef258d91d4715d (patch) | |
tree | c04d7ec255008f75cbe96f616f3bbeb8a48c601e /sw | |
parent | 1732f7f21f113f6341cf115f949a224dfe5de394 (diff) |
#102209# AutoDetect ucs2/utf8 byte marks
Diffstat (limited to 'sw')
-rw-r--r-- | sw/inc/shellio.hxx | 9 | ||||
-rw-r--r-- | sw/source/filter/ascii/parasc.cxx | 47 | ||||
-rw-r--r-- | sw/source/filter/ascii/wrtasc.cxx | 27 |
3 files changed, 64 insertions, 19 deletions
diff --git a/sw/inc/shellio.hxx b/sw/inc/shellio.hxx index 72e5ec0ff4e1..a47a36be544a 100644 --- a/sw/inc/shellio.hxx +++ b/sw/inc/shellio.hxx @@ -2,9 +2,9 @@ * * $RCSfile: shellio.hxx,v $ * - * $Revision: 1.15 $ + * $Revision: 1.16 $ * - * last change: $Author: mib $ $Date: 2002-06-24 12:51:56 $ + * last change: $Author: cmc $ $Date: 2002-08-13 14:52:35 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -668,7 +668,10 @@ public: static FASTBOOL IsValidStgFilter( SvStorage& , const SfxFilter& ); - static const SfxFilter* GetTextFilter( const sal_Char* pBuf, ULONG nLen ); + static bool IsDetectableText(const sal_Char* pBuf, ULONG &rLen, + CharSet *pCharSet=0, bool *pSwap=0, LineEnd *pLineEnd=0); + + static const SfxFilter* GetTextFilter(const sal_Char* pBuf, ULONG nLen); // gebe einen bestimmten Reader zurueck static Reader* GetReader( const String& rFltName ); diff --git a/sw/source/filter/ascii/parasc.cxx b/sw/source/filter/ascii/parasc.cxx index ee3cfca0a8e2..3c967545eee6 100644 --- a/sw/source/filter/ascii/parasc.cxx +++ b/sw/source/filter/ascii/parasc.cxx @@ -2,9 +2,9 @@ * * $RCSfile: parasc.cxx,v $ * - * $Revision: 1.11 $ + * $Revision: 1.12 $ * - * last change: $Author: mib $ $Date: 2002-05-24 12:40:01 $ + * last change: $Author: cmc $ $Date: 2002-08-13 14:51:42 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -749,20 +749,45 @@ ULONG SwASCIIParser::ReadChars() sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0; long nReadCnt = 0, nLineLen = 0; sal_Unicode cLastCR = 0; + bool bSwapUnicode; + + const SwAsciiOptions *pUseMe=&rOpt; + SwAsciiOptions aEmpty; + if (nFileSize >= 2 && + aEmpty.GetFontName() == rOpt.GetFontName() && + aEmpty.GetCharSet() == rOpt.GetCharSet() && + aEmpty.GetLanguage() == rOpt.GetLanguage() && + aEmpty.GetParaFlags() == rOpt.GetParaFlags()) + { + ULONG nLen, nOrig; + nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN); + CharSet eCharSet; + bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet, + &bSwapUnicode); + ASSERT(bRet, "Autodetect of text import without nag dialog must " + "have failed"); + if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW) + { + aEmpty.SetCharSet(eCharSet); + rInput.SeekRel(-(long(nLen))); + } + else + rInput.SeekRel(-(long(nOrig))); + pUseMe=&aEmpty; + } - BOOL bSwapUnicode; rtl_TextToUnicodeConverter hConverter; rtl_TextToUnicodeContext hContext; - if( RTL_TEXTENCODING_UCS2 != rOpt.GetCharSet() ) + if (RTL_TEXTENCODING_UCS2 != pUseMe->GetCharSet()) { - hConverter = rtl_createTextToUnicodeConverter( rOpt.GetCharSet() ); + hConverter = rtl_createTextToUnicodeConverter( pUseMe->GetCharSet() ); ASSERT( hConverter, "no string convert avaiable" ); - if( !hConverter ) + if (!hConverter) return ERR_W4W_DLL_ERROR | ERROR_SW_READ_BASE; - bSwapUnicode = FALSE; + bSwapUnicode = false; hContext = rtl_createTextToUnicodeContext( hConverter ); } - else + else if (pUseMe != &aEmpty) //Already successfully figured out type { hConverter = 0; rInput.StartReadingUnicodeText(); @@ -861,7 +886,7 @@ ULONG SwASCIIParser::ReadChars() // bIns = FALSE; // break; - case 0x0a: if( LINEEND_LF == rOpt.GetParaFlags() ) + case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() ) { bIns = FALSE; *pStt = 0; @@ -873,14 +898,14 @@ ULONG SwASCIIParser::ReadChars() } break; - case 0x0d: if( LINEEND_LF != rOpt.GetParaFlags() ) + case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() ) { bIns = FALSE; *pStt = 0; ++pStt; BOOL bChkSplit = FALSE; - if( LINEEND_CRLF == rOpt.GetParaFlags() ) + if( LINEEND_CRLF == pUseMe->GetParaFlags() ) { if( pStt == pEnd ) cLastCR = 0x0d; diff --git a/sw/source/filter/ascii/wrtasc.cxx b/sw/source/filter/ascii/wrtasc.cxx index 77f23aedc2a9..603b6eb2af67 100644 --- a/sw/source/filter/ascii/wrtasc.cxx +++ b/sw/source/filter/ascii/wrtasc.cxx @@ -2,9 +2,9 @@ * * $RCSfile: wrtasc.cxx,v $ * - * $Revision: 1.3 $ + * $Revision: 1.4 $ * - * last change: $Author: os $ $Date: 2001-09-28 08:00:34 $ + * last change: $Author: cmc $ $Date: 2002-08-13 14:51:42 $ * * The Contents of this file are made available subject to the terms of * either of the following licenses @@ -187,7 +187,8 @@ ULONG SwASCWriter::WriteStream() SwPaM* pPam = pOrigPam; BOOL bWriteSttTag = bUCS2_WithStartChar && - RTL_TEXTENCODING_UCS2 == GetAsciiOptions().GetCharSet(); + (RTL_TEXTENCODING_UCS2 == GetAsciiOptions().GetCharSet() || + RTL_TEXTENCODING_UTF8 == GetAsciiOptions().GetCharSet()); rtl_TextEncoding eOld = Strm().GetStreamCharSet(); Strm().SetStreamCharSet( GetAsciiOptions().GetCharSet() ); @@ -232,9 +233,25 @@ ULONG SwASCWriter::WriteStream() } else { - if( bWriteSttTag ) + if (bWriteSttTag) { - Strm().StartWritingUnicodeText(); + switch(GetAsciiOptions().GetCharSet()) + { + case RTL_TEXTENCODING_UTF8: + Strm() << BYTE(0xEF) << BYTE(0xBB) << + BYTE(0xBF); + break; + case RTL_TEXTENCODING_UCS2: + //Strm().StartWritingUnicodeText(); + Strm().SetEndianSwap(FALSE); +#ifdef __LITTLEENDIAN + Strm() << BYTE(0xFF) << BYTE(0xFE); +#else + Strm() << BYTE(0xFE) << BYTE(0xFF); +#endif + break; + + } bWriteSttTag = FALSE; } Out( aASCNodeFnTab, *pNd, *this ); |