summaryrefslogtreecommitdiff
path: root/sw
diff options
context:
space:
mode:
authorCaolán McNamara <cmc@openoffice.org>2002-08-13 13:52:35 +0000
committerCaolán McNamara <cmc@openoffice.org>2002-08-13 13:52:35 +0000
commit5ea8591b52ec0cb6afdc0b2118ef258d91d4715d (patch)
treec04d7ec255008f75cbe96f616f3bbeb8a48c601e /sw
parent1732f7f21f113f6341cf115f949a224dfe5de394 (diff)
#102209# AutoDetect ucs2/utf8 byte marks
Diffstat (limited to 'sw')
-rw-r--r--sw/inc/shellio.hxx9
-rw-r--r--sw/source/filter/ascii/parasc.cxx47
-rw-r--r--sw/source/filter/ascii/wrtasc.cxx27
3 files changed, 64 insertions, 19 deletions
diff --git a/sw/inc/shellio.hxx b/sw/inc/shellio.hxx
index 72e5ec0ff4e1..a47a36be544a 100644
--- a/sw/inc/shellio.hxx
+++ b/sw/inc/shellio.hxx
@@ -2,9 +2,9 @@
*
* $RCSfile: shellio.hxx,v $
*
- * $Revision: 1.15 $
+ * $Revision: 1.16 $
*
- * last change: $Author: mib $ $Date: 2002-06-24 12:51:56 $
+ * last change: $Author: cmc $ $Date: 2002-08-13 14:52:35 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -668,7 +668,10 @@ public:
static FASTBOOL IsValidStgFilter( SvStorage& , const SfxFilter& );
- static const SfxFilter* GetTextFilter( const sal_Char* pBuf, ULONG nLen );
+ static bool IsDetectableText(const sal_Char* pBuf, ULONG &rLen,
+ CharSet *pCharSet=0, bool *pSwap=0, LineEnd *pLineEnd=0);
+
+ static const SfxFilter* GetTextFilter(const sal_Char* pBuf, ULONG nLen);
// gebe einen bestimmten Reader zurueck
static Reader* GetReader( const String& rFltName );
diff --git a/sw/source/filter/ascii/parasc.cxx b/sw/source/filter/ascii/parasc.cxx
index ee3cfca0a8e2..3c967545eee6 100644
--- a/sw/source/filter/ascii/parasc.cxx
+++ b/sw/source/filter/ascii/parasc.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: parasc.cxx,v $
*
- * $Revision: 1.11 $
+ * $Revision: 1.12 $
*
- * last change: $Author: mib $ $Date: 2002-05-24 12:40:01 $
+ * last change: $Author: cmc $ $Date: 2002-08-13 14:51:42 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -749,20 +749,45 @@ ULONG SwASCIIParser::ReadChars()
sal_Unicode *pStt = 0, *pEnd = 0, *pLastStt = 0;
long nReadCnt = 0, nLineLen = 0;
sal_Unicode cLastCR = 0;
+ bool bSwapUnicode;
+
+ const SwAsciiOptions *pUseMe=&rOpt;
+ SwAsciiOptions aEmpty;
+ if (nFileSize >= 2 &&
+ aEmpty.GetFontName() == rOpt.GetFontName() &&
+ aEmpty.GetCharSet() == rOpt.GetCharSet() &&
+ aEmpty.GetLanguage() == rOpt.GetLanguage() &&
+ aEmpty.GetParaFlags() == rOpt.GetParaFlags())
+ {
+ ULONG nLen, nOrig;
+ nOrig = nLen = rInput.Read(pArr, ASC_BUFFLEN);
+ CharSet eCharSet;
+ bool bRet = SwIoSystem::IsDetectableText(pArr, nLen, &eCharSet,
+ &bSwapUnicode);
+ ASSERT(bRet, "Autodetect of text import without nag dialog must "
+ "have failed");
+ if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
+ {
+ aEmpty.SetCharSet(eCharSet);
+ rInput.SeekRel(-(long(nLen)));
+ }
+ else
+ rInput.SeekRel(-(long(nOrig)));
+ pUseMe=&aEmpty;
+ }
- BOOL bSwapUnicode;
rtl_TextToUnicodeConverter hConverter;
rtl_TextToUnicodeContext hContext;
- if( RTL_TEXTENCODING_UCS2 != rOpt.GetCharSet() )
+ if (RTL_TEXTENCODING_UCS2 != pUseMe->GetCharSet())
{
- hConverter = rtl_createTextToUnicodeConverter( rOpt.GetCharSet() );
+ hConverter = rtl_createTextToUnicodeConverter( pUseMe->GetCharSet() );
ASSERT( hConverter, "no string convert avaiable" );
- if( !hConverter )
+ if (!hConverter)
return ERR_W4W_DLL_ERROR | ERROR_SW_READ_BASE;
- bSwapUnicode = FALSE;
+ bSwapUnicode = false;
hContext = rtl_createTextToUnicodeContext( hConverter );
}
- else
+ else if (pUseMe != &aEmpty) //Already successfully figured out type
{
hConverter = 0;
rInput.StartReadingUnicodeText();
@@ -861,7 +886,7 @@ ULONG SwASCIIParser::ReadChars()
// bIns = FALSE;
// break;
- case 0x0a: if( LINEEND_LF == rOpt.GetParaFlags() )
+ case 0x0a: if( LINEEND_LF == pUseMe->GetParaFlags() )
{
bIns = FALSE;
*pStt = 0;
@@ -873,14 +898,14 @@ ULONG SwASCIIParser::ReadChars()
}
break;
- case 0x0d: if( LINEEND_LF != rOpt.GetParaFlags() )
+ case 0x0d: if( LINEEND_LF != pUseMe->GetParaFlags() )
{
bIns = FALSE;
*pStt = 0;
++pStt;
BOOL bChkSplit = FALSE;
- if( LINEEND_CRLF == rOpt.GetParaFlags() )
+ if( LINEEND_CRLF == pUseMe->GetParaFlags() )
{
if( pStt == pEnd )
cLastCR = 0x0d;
diff --git a/sw/source/filter/ascii/wrtasc.cxx b/sw/source/filter/ascii/wrtasc.cxx
index 77f23aedc2a9..603b6eb2af67 100644
--- a/sw/source/filter/ascii/wrtasc.cxx
+++ b/sw/source/filter/ascii/wrtasc.cxx
@@ -2,9 +2,9 @@
*
* $RCSfile: wrtasc.cxx,v $
*
- * $Revision: 1.3 $
+ * $Revision: 1.4 $
*
- * last change: $Author: os $ $Date: 2001-09-28 08:00:34 $
+ * last change: $Author: cmc $ $Date: 2002-08-13 14:51:42 $
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
@@ -187,7 +187,8 @@ ULONG SwASCWriter::WriteStream()
SwPaM* pPam = pOrigPam;
BOOL bWriteSttTag = bUCS2_WithStartChar &&
- RTL_TEXTENCODING_UCS2 == GetAsciiOptions().GetCharSet();
+ (RTL_TEXTENCODING_UCS2 == GetAsciiOptions().GetCharSet() ||
+ RTL_TEXTENCODING_UTF8 == GetAsciiOptions().GetCharSet());
rtl_TextEncoding eOld = Strm().GetStreamCharSet();
Strm().SetStreamCharSet( GetAsciiOptions().GetCharSet() );
@@ -232,9 +233,25 @@ ULONG SwASCWriter::WriteStream()
}
else
{
- if( bWriteSttTag )
+ if (bWriteSttTag)
{
- Strm().StartWritingUnicodeText();
+ switch(GetAsciiOptions().GetCharSet())
+ {
+ case RTL_TEXTENCODING_UTF8:
+ Strm() << BYTE(0xEF) << BYTE(0xBB) <<
+ BYTE(0xBF);
+ break;
+ case RTL_TEXTENCODING_UCS2:
+ //Strm().StartWritingUnicodeText();
+ Strm().SetEndianSwap(FALSE);
+#ifdef __LITTLEENDIAN
+ Strm() << BYTE(0xFF) << BYTE(0xFE);
+#else
+ Strm() << BYTE(0xFE) << BYTE(0xFF);
+#endif
+ break;
+
+ }
bWriteSttTag = FALSE;
}
Out( aASCNodeFnTab, *pNd, *this );