summaryrefslogtreecommitdiff
path: root/sax
diff options
context:
space:
mode:
authorMichael Stahl <mst@openoffice.org>2010-01-18 16:29:56 +0100
committerMichael Stahl <mst@openoffice.org>2010-01-18 16:29:56 +0100
commit8786083eb9dabb0d7b328a217ba99a1d71493ad7 (patch)
tree14205b8915401520b73c75a100eb7b7d6ad908cc /sax
parent8fda468433d62653864fc7fe4f6a7e306cdcb28c (diff)
sw33bf02: #i107437#: sax::Converter: support XMLSchema-2 "dateTime" better:
reimplement convertDateOrDateTime() to parse all "dateTime" features. also fix serialization function to write leading zeros if year < 1000.
Diffstat (limited to 'sax')
-rw-r--r--sax/source/tools/converter.cxx437
1 files changed, 356 insertions, 81 deletions
diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx
index e13df493628a..6eb66db984dd 100644
--- a/sax/source/tools/converter.cxx
+++ b/sax/source/tools/converter.cxx
@@ -962,8 +962,9 @@ readUnsignedNumber(const ::rtl::OUString & rString,
{
bool bOverflow(false);
sal_Int32 nTemp(0);
+ sal_Int32 nPos(io_rnPos);
- for (sal_Int32 nPos = io_rnPos; (nPos < rString.getLength()); ++nPos)
+ while (nPos < rString.getLength())
{
const sal_Unicode c = rString[nPos];
if ((sal_Unicode('0') <= c) && (c <= sal_Unicode('9')))
@@ -977,25 +978,20 @@ readUnsignedNumber(const ::rtl::OUString & rString,
}
else
{
- if (io_rnPos != nPos) // read something?
- {
- io_rnPos = nPos;
- if (bOverflow)
- {
- return R_OVERFLOW;
- }
- else
- {
- o_rNumber = nTemp;
- return R_SUCCESS;
- }
- }
- else break;
+ break;
}
+ ++nPos;
+ }
+
+ if (io_rnPos == nPos) // read something?
+ {
+ o_rNumber = -1;
+ return R_NOTHING;
}
- o_rNumber = -1;
- return R_NOTHING;
+ io_rnPos = nPos;
+ o_rNumber = nTemp;
+ return (bOverflow) ? R_OVERFLOW : R_SUCCESS;
}
static bool
@@ -1288,6 +1284,15 @@ void Converter::convertDateTime(
const sal_Unicode zero('0');
const sal_Unicode tee ('T');
+ if (i_rDateTime.Year < 1000) {
+ i_rBuffer.append(zero);
+ }
+ if (i_rDateTime.Year < 100) {
+ i_rBuffer.append(zero);
+ }
+ if (i_rDateTime.Year < 10) {
+ i_rBuffer.append(zero);
+ }
i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Year) ).append(dash);
if( i_rDateTime.Month < 10 ) {
i_rBuffer.append(zero);
@@ -1355,6 +1360,46 @@ bool Converter::convertDateTime( util::DateTime& rDateTime,
}
}
+static bool
+readDateTimeComponent(const ::rtl::OUString & rString,
+ sal_Int32 & io_rnPos, sal_Int32 & o_rnTarget,
+ const sal_Int32 nMinLength, const bool bExactLength)
+{
+ const sal_Int32 nOldPos(io_rnPos);
+ sal_Int32 nTemp(0);
+ if (R_SUCCESS != readUnsignedNumber(rString, io_rnPos, nTemp))
+ {
+ return false;
+ }
+ const sal_Int32 nTokenLength(io_rnPos - nOldPos);
+ if ((nTokenLength < nMinLength) ||
+ (bExactLength && (nTokenLength > nMinLength)))
+ {
+ return false; // bad length
+ }
+ o_rnTarget = nTemp;
+ return true;
+}
+
+static bool lcl_isLeapYear(const sal_uInt32 nYear)
+{
+ return ((nYear % 4) == 0)
+ && !(((nYear % 100) == 0) || ((nYear % 400) == 0));
+}
+
+static sal_uInt16
+lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear)
+{
+ static sal_uInt16 s_MaxDaysPerMonth[12] =
+ { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
+ OSL_ASSERT(0 < nMonth && nMonth <= 12);
+ if ((2 == nMonth) && lcl_isLeapYear(nYear))
+ {
+ return 29;
+ }
+ return s_MaxDaysPerMonth[nMonth - 1];
+}
+
/** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */
bool Converter::convertDateOrDateTime(
util::Date & rDate, util::DateTime & rDateTime,
@@ -1362,97 +1407,233 @@ bool Converter::convertDateOrDateTime(
{
bool bSuccess = true;
- rtl::OUString aDateStr, aTimeStr, sDoubleStr;
- sal_Int32 nPos = rString.indexOf( (sal_Unicode) 'T' );
- sal_Int32 nPos2 = rString.indexOf( (sal_Unicode) ',' );
- if (nPos2 < 0)
- nPos2 = rString.indexOf( (sal_Unicode) '.' );
- if ( nPos >= 0 )
+ const ::rtl::OUString string = rString.trim().toAsciiUpperCase();
+ sal_Int32 nPos(0);
+ bool bNegative(false);
+ if ((string.getLength() > nPos) && (sal_Unicode('-') == string[nPos]))
{
- aDateStr = rString.copy( 0, nPos );
- if ( nPos2 >= 0 )
- {
- aTimeStr = rString.copy( nPos + 1, nPos2 - nPos - 1 );
- sDoubleStr = OUString(RTL_CONSTASCII_USTRINGPARAM("0."));
- sDoubleStr += rString.copy( nPos2 + 1 );
- }
- else
- {
- aTimeStr = rString.copy(nPos + 1);
- sDoubleStr = OUString(RTL_CONSTASCII_USTRINGPARAM("0.0"));
- }
+ ++nPos;
+ bNegative = true;
}
- else
- aDateStr = rString; // no separator: only date part
- sal_Int32 nYear = 1899;
- sal_Int32 nMonth = 12;
- sal_Int32 nDay = 30;
- sal_Int32 nHour = 0;
- sal_Int32 nMin = 0;
- sal_Int32 nSec = 0;
+ sal_Int32 nYear(0);
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nYear, 4, false);
+ bSuccess &= (0 < nYear);
+ bSuccess &= (nPos < string.getLength()); // not last token
+ }
+ if (bSuccess && (sal_Unicode('-') != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+ }
- const sal_Unicode* pStr = aDateStr.getStr();
- sal_Int32 nDateTokens = 1;
- while ( *pStr )
+ sal_Int32 nMonth(0);
+ if (bSuccess)
{
- if ( *pStr == '-' )
- nDateTokens++;
- pStr++;
+ bSuccess = readDateTimeComponent(string, nPos, nMonth, 2, true);
+ bSuccess &= (0 < nMonth) && (nMonth <= 12);
+ bSuccess &= (nPos < string.getLength()); // not last token
}
- if ( nDateTokens > 3 || aDateStr.getLength() == 0 )
+ if (bSuccess && (sal_Unicode('-') != string[nPos])) // separator
+ {
bSuccess = false;
- else
+ }
+ if (bSuccess)
{
- sal_Int32 n = 0;
- if ( !convertNumber( nYear, aDateStr.getToken( 0, '-', n ), 0, 9999 ) )
+ ++nPos;
+ }
+
+ sal_Int32 nDay(0);
+ if (bSuccess)
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true);
+ bSuccess &= (0 < nDay) && (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear));
+ }
+
+ bool bHaveTime(false);
+ if (bSuccess && (nPos < string.getLength()))
+ {
+ if (sal_Unicode('T') == string[nPos]) // time separator
+ {
+ bHaveTime = true;
+ ++nPos;
+ }
+ }
+
+ sal_Int32 nHours(0);
+ sal_Int32 nMinutes(0);
+ sal_Int32 nSeconds(0);
+ sal_Int32 nMilliSeconds(0);
+ if (bSuccess && bHaveTime)
+ {
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true);
+ bSuccess &= (0 <= nHours) && (nHours <= 24);
+ bSuccess &= (nPos < string.getLength()); // not last token
+ }
+ if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator
+ {
bSuccess = false;
- if ( nDateTokens >= 2 )
- if ( !convertNumber( nMonth, aDateStr.getToken( 0, '-', n ), 0, 12 ) )
- bSuccess = false;
- if ( nDateTokens >= 3 )
- if ( !convertNumber( nDay, aDateStr.getToken( 0, '-', n ), 0, 31 ) )
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true);
+ bSuccess &= (0 <= nMinutes) && (nMinutes < 60);
+ bSuccess &= (nPos < string.getLength()); // not last token
+ }
+ if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+ }
+
+ if (bSuccess)
+ {
+ bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true);
+ bSuccess &= (0 <= nSeconds) && (nSeconds < 60);
+ }
+ if (bSuccess && (nPos < string.getLength()) &&
+ (sal_Unicode('.') == string[nPos])) // fraction separator
+ {
+ ++nPos;
+ const sal_Int32 nStart(nPos);
+ sal_Int32 nTemp(0);
+ if (R_NOTHING == readUnsignedNumber(string, nPos, nTemp))
+ {
bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ // cannot use nTemp because of possible leading zeros
+ // and possible overflow => read digits directly
+ const sal_Int32 nDigits(nPos - nStart);
+ OSL_ENSURE(nDigits > 0, "bad code monkey");
+ const sal_Unicode cZero('0');
+ nMilliSeconds = 100 * (string[nStart] - cZero);
+ if (nDigits >= 2)
+ {
+ nMilliSeconds += 10 * (string[nStart+1] - cZero);
+ if (nDigits >= 3)
+ {
+ nMilliSeconds += (string[nStart+2] - cZero);
+ }
+ }
+ }
+ }
+
+ if (bSuccess && (nHours == 24))
+ {
+ if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nMilliSeconds)))
+ {
+ bSuccess = false; // only 24:00:00 is valid
+ }
+#if 0
+ else
+ {
+ nHours = 0; // normalize 24:00:00 to 00:00:00 of next day
+ lcl_addDay(bNegative, nYear, nMonth, nDay, 1);
+ }
+#endif
+ }
}
- if ( aTimeStr.getLength() > 0 ) // time is optional
+ bool bHaveTimezone(false);
+ bool bHaveTimezonePlus(false);
+ bool bHaveTimezoneMinus(false);
+ if (bSuccess && (nPos < string.getLength()))
{
- pStr = aTimeStr.getStr();
- sal_Int32 nTimeTokens = 1;
- while ( *pStr )
+ const sal_Unicode c(string[nPos]);
+ if (sal_Unicode('+') == c)
{
- if ( *pStr == ':' )
- nTimeTokens++;
- pStr++;
+ bHaveTimezone = true;
+ bHaveTimezonePlus = true;
+ ++nPos;
+ }
+ else if (sal_Unicode('-') == c)
+ {
+ bHaveTimezone = true;
+ bHaveTimezoneMinus = true;
+ ++nPos;
+ }
+ else if (sal_Unicode('Z') == c)
+ {
+ bHaveTimezone = true;
+ ++nPos;
}
- if ( nTimeTokens > 3 )
- bSuccess = false;
else
{
- sal_Int32 n = 0;
- if ( !convertNumber( nHour, aTimeStr.getToken( 0, ':', n ), 0, 23 ) )
- bSuccess = false;
- if ( nTimeTokens >= 2 )
- if ( !convertNumber( nMin, aTimeStr.getToken( 0, ':', n ), 0, 59 ) )
- bSuccess = false;
- if ( nTimeTokens >= 3 )
- if ( !convertNumber( nSec, aTimeStr.getToken( 0, ':', n ), 0, 59 ) )
- bSuccess = false;
+ bSuccess = false;
}
}
+ sal_Int32 nTimezoneHours(0);
+ sal_Int32 nTimezoneMinutes(0);
+ if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus))
+ {
+ bSuccess = readDateTimeComponent(
+ string, nPos, nTimezoneHours, 2, true);
+ bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14);
+ bSuccess &= (nPos < string.getLength()); // not last token
+ if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator
+ {
+ bSuccess = false;
+ }
+ if (bSuccess)
+ {
+ ++nPos;
+ }
+ if (bSuccess)
+ {
+ bSuccess = readDateTimeComponent(
+ string, nPos, nTimezoneMinutes, 2, true);
+ bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60);
+ }
+ if (bSuccess && (nTimezoneHours == 14))
+ {
+ if (0 != nTimezoneMinutes)
+ {
+ bSuccess = false; // only +-14:00 is valid
+ }
+ }
+ }
+
+ bSuccess &= (nPos == string.getLength()); // trailing junk?
+
+ if (bSuccess && bHaveTimezone)
+ {
+ // util::DateTime does not support timezones!
+#if 0
+ // do not add timezone, just strip it (as suggested by er)
+ lcl_addTimezone(bNegative, nYear, nMonth, nDay, nHours, nMinutes,
+ !bHaveTimezoneMinus, nTimezoneHours, nTimezoneMinutes);
+#endif
+ }
if (bSuccess)
{
- if ( aTimeStr.getLength() > 0 ) // time is optional
+ if (bHaveTime) // time is optional
{
+ // util::DateTime does not support negative years!
rDateTime.Year = static_cast<sal_uInt16>(nYear);
rDateTime.Month = static_cast<sal_uInt16>(nMonth);
rDateTime.Day = static_cast<sal_uInt16>(nDay);
- rDateTime.Hours = static_cast<sal_uInt16>(nHour);
- rDateTime.Minutes = static_cast<sal_uInt16>(nMin);
- rDateTime.Seconds = static_cast<sal_uInt16>(nSec);
+ rDateTime.Hours = static_cast<sal_uInt16>(nHours);
+ rDateTime.Minutes = static_cast<sal_uInt16>(nMinutes);
+ rDateTime.Seconds = static_cast<sal_uInt16>(nSeconds);
+ // util::DateTime does not support 3 decimal digits of precision!
rDateTime.HundredthSeconds =
- static_cast<sal_uInt16>((sDoubleStr).toDouble() * 100);
+ static_cast<sal_uInt16>(nMilliSeconds / 10);
rbDateTime = true;
}
else
@@ -1466,6 +1647,100 @@ bool Converter::convertDateOrDateTime(
return bSuccess;
}
+#if 0
+struct Test {
+ static bool eqDateTime(util::DateTime a, util::DateTime b) {
+ return a.Year == b.Year && a.Month == b.Month && a.Day == b.Day
+ && a.Hours == b.Hours && a.Minutes == b.Minutes
+ && a.Seconds == b.Seconds
+ && a.HundredthSeconds == b.HundredthSeconds;
+ }
+ static void doTest(util::DateTime const & rdt, char const*const pis,
+ char const*const i_pos = 0)
+ {
+ char const*const pos((i_pos) ? i_pos : pis);
+ ::rtl::OUString is(::rtl::OUString::createFromAscii(pis));
+ util::DateTime odt;
+ bool bSuccess( Converter::convertDateTime(odt, is) );
+ OSL_TRACE("Y:%d M:%d D:%d H:%d M:%d S:%d H:%d",
+ odt.Year, odt.Month, odt.Day,
+ odt.Hours, odt.Minutes, odt.Seconds, odt.HundredthSeconds);
+ OSL_ASSERT(bSuccess);
+ OSL_ASSERT(eqDateTime(rdt, odt));
+ ::rtl::OUStringBuffer buf;
+ Converter::convertDateTime(buf, odt, true);
+ OSL_TRACE(
+ ::rtl::OUStringToOString(buf.getStr(), RTL_TEXTENCODING_UTF8));
+ OSL_ASSERT(buf.makeStringAndClear().equalsAscii(pos));
+ }
+ static void doTestF(const char * pis)
+ {
+ util::DateTime odt;
+ bool bSuccess = Converter::convertDateTime(odt,
+ ::rtl::OUString::createFromAscii(pis));
+ OSL_TRACE("Y:%d M:%d D:%d H:%dH M:%d S:%d H:%d",
+ odt.Year, odt.Month, odt.Day,
+ odt.Hours, odt.Minutes, odt.Seconds, odt.HundredthSeconds);
+ OSL_ASSERT(!bSuccess);
+ }
+ Test() {
+ OSL_TRACE("\nSAX CONVERTER TEST BEGIN\n");
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1), "0001-01-01T00:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1),
+ "0001-01-01T00:00:00Z", "0001-01-01T00:00:00" );
+// doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1), "-0001-01-01T00:00:00" );
+// doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1), "-0001-01-01T00:00:00Z" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1),
+ "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1),
+ "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 2, 1, 1)/*(0, 0, 12, 0, 2, 1, 1)*/,
+ "0001-01-02T00:00:00-12:00", "0001-01-02T00:00:00" );
+// "0001-02-01T12:00:00" );
+ doTest( util::DateTime(0, 0, 0, 0, 2, 1, 1)/*(0, 0, 12, 0, 1, 1, 1)*/,
+ "0001-01-02T00:00:00+12:00", "0001-01-02T00:00:00" );
+// "0001-01-01T12:00:00" );
+ doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999),
+ "9999-12-31T23:59:59.99" );
+ doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999),
+ "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.99" );
+ doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999",
+ "9999-12-31T23:59:59.99" );
+ doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999),
+ "9999-12-31T23:59:59.9999999999999999999999999999999999999Z",
+ "9999-12-31T23:59:59.99" );
+ doTest( util::DateTime(0, 0, 0, 24, 1, 1, 333)
+ /*(0, 0, 0, 0, 2, 1, 333)*/,
+ "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ );
+ doTestF( "+0001-01-01T00:00:00" ); // invalid: ^+
+ doTestF( "1-01-01T00:00:00" ); // invalid: < 4 Y
+ doTestF( "0001-1-01T00:00:00" ); // invalid: < 2 M
+ doTestF( "0001-01-1T00:00:00" ); // invalid: < 2 D
+ doTestF( "0001-01-01T0:00:00" ); // invalid: < 2 H
+ doTestF( "0001-01-01T00:0:00" ); // invalid: < 2 M
+ doTestF( "0001-01-01T00:00:0" ); // invalid: < 2 S
+ doTestF( "0001-01-01T00:00:00." ); // invalid: .$
+ doTestF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H
+ doTestF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M
+ doTestF( "0001-13-01T00:00:00" ); // invalid: M > 12
+ doTestF( "0001-01-32T00:00:00" ); // invalid: D > 31
+ doTestF( "0001-01-01T25:00:00" ); // invalid: H > 24
+ doTestF( "0001-01-01T00:60:00" ); // invalid: H > 59
+ doTestF( "0001-01-01T00:00:60" ); // invalid: S > 59
+ doTestF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0
+ doTestF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0
+ doTestF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0
+ doTestF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00
+ doTestF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00
+ doTestF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00
+ doTestF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00
+ OSL_TRACE("\nSAX CONVERTER TEST END\n");
+ }
+};
+static Test test;
+#endif
+
/** gets the position of the first comma after npos in the string
rStr. Commas inside '"' pairs are not matched */
sal_Int32 Converter::indexOfComma( const OUString& rStr,