diff options
author | Michael Stahl <mst@openoffice.org> | 2010-01-18 16:29:56 +0100 |
---|---|---|
committer | Michael Stahl <mst@openoffice.org> | 2010-01-18 16:29:56 +0100 |
commit | 8786083eb9dabb0d7b328a217ba99a1d71493ad7 (patch) | |
tree | 14205b8915401520b73c75a100eb7b7d6ad908cc /sax/source/tools | |
parent | 8fda468433d62653864fc7fe4f6a7e306cdcb28c (diff) |
sw33bf02: #i107437#: sax::Converter: support XMLSchema-2 "dateTime" better:
reimplement convertDateOrDateTime() to parse all "dateTime" features.
also fix serialization function to write leading zeros if year < 1000.
Diffstat (limited to 'sax/source/tools')
-rw-r--r-- | sax/source/tools/converter.cxx | 437 |
1 files changed, 356 insertions, 81 deletions
diff --git a/sax/source/tools/converter.cxx b/sax/source/tools/converter.cxx index e13df493628a..6eb66db984dd 100644 --- a/sax/source/tools/converter.cxx +++ b/sax/source/tools/converter.cxx @@ -962,8 +962,9 @@ readUnsignedNumber(const ::rtl::OUString & rString, { bool bOverflow(false); sal_Int32 nTemp(0); + sal_Int32 nPos(io_rnPos); - for (sal_Int32 nPos = io_rnPos; (nPos < rString.getLength()); ++nPos) + while (nPos < rString.getLength()) { const sal_Unicode c = rString[nPos]; if ((sal_Unicode('0') <= c) && (c <= sal_Unicode('9'))) @@ -977,25 +978,20 @@ readUnsignedNumber(const ::rtl::OUString & rString, } else { - if (io_rnPos != nPos) // read something? - { - io_rnPos = nPos; - if (bOverflow) - { - return R_OVERFLOW; - } - else - { - o_rNumber = nTemp; - return R_SUCCESS; - } - } - else break; + break; } + ++nPos; + } + + if (io_rnPos == nPos) // read something? + { + o_rNumber = -1; + return R_NOTHING; } - o_rNumber = -1; - return R_NOTHING; + io_rnPos = nPos; + o_rNumber = nTemp; + return (bOverflow) ? R_OVERFLOW : R_SUCCESS; } static bool @@ -1288,6 +1284,15 @@ void Converter::convertDateTime( const sal_Unicode zero('0'); const sal_Unicode tee ('T'); + if (i_rDateTime.Year < 1000) { + i_rBuffer.append(zero); + } + if (i_rDateTime.Year < 100) { + i_rBuffer.append(zero); + } + if (i_rDateTime.Year < 10) { + i_rBuffer.append(zero); + } i_rBuffer.append( static_cast<sal_Int32>(i_rDateTime.Year) ).append(dash); if( i_rDateTime.Month < 10 ) { i_rBuffer.append(zero); @@ -1355,6 +1360,46 @@ bool Converter::convertDateTime( util::DateTime& rDateTime, } } +static bool +readDateTimeComponent(const ::rtl::OUString & rString, + sal_Int32 & io_rnPos, sal_Int32 & o_rnTarget, + const sal_Int32 nMinLength, const bool bExactLength) +{ + const sal_Int32 nOldPos(io_rnPos); + sal_Int32 nTemp(0); + if (R_SUCCESS != readUnsignedNumber(rString, io_rnPos, nTemp)) + { + return false; + } + const sal_Int32 nTokenLength(io_rnPos - nOldPos); + if ((nTokenLength < nMinLength) || + (bExactLength && (nTokenLength > nMinLength))) + { + return false; // bad length + } + o_rnTarget = nTemp; + return true; +} + +static bool lcl_isLeapYear(const sal_uInt32 nYear) +{ + return ((nYear % 4) == 0) + && !(((nYear % 100) == 0) || ((nYear % 400) == 0)); +} + +static sal_uInt16 +lcl_MaxDaysPerMonth(const sal_Int32 nMonth, const sal_Int32 nYear) +{ + static sal_uInt16 s_MaxDaysPerMonth[12] = + { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; + OSL_ASSERT(0 < nMonth && nMonth <= 12); + if ((2 == nMonth) && lcl_isLeapYear(nYear)) + { + return 29; + } + return s_MaxDaysPerMonth[nMonth - 1]; +} + /** convert ISO "date" or "dateTime" string to util::DateTime or util::Date */ bool Converter::convertDateOrDateTime( util::Date & rDate, util::DateTime & rDateTime, @@ -1362,97 +1407,233 @@ bool Converter::convertDateOrDateTime( { bool bSuccess = true; - rtl::OUString aDateStr, aTimeStr, sDoubleStr; - sal_Int32 nPos = rString.indexOf( (sal_Unicode) 'T' ); - sal_Int32 nPos2 = rString.indexOf( (sal_Unicode) ',' ); - if (nPos2 < 0) - nPos2 = rString.indexOf( (sal_Unicode) '.' ); - if ( nPos >= 0 ) + const ::rtl::OUString string = rString.trim().toAsciiUpperCase(); + sal_Int32 nPos(0); + bool bNegative(false); + if ((string.getLength() > nPos) && (sal_Unicode('-') == string[nPos])) { - aDateStr = rString.copy( 0, nPos ); - if ( nPos2 >= 0 ) - { - aTimeStr = rString.copy( nPos + 1, nPos2 - nPos - 1 ); - sDoubleStr = OUString(RTL_CONSTASCII_USTRINGPARAM("0.")); - sDoubleStr += rString.copy( nPos2 + 1 ); - } - else - { - aTimeStr = rString.copy(nPos + 1); - sDoubleStr = OUString(RTL_CONSTASCII_USTRINGPARAM("0.0")); - } + ++nPos; + bNegative = true; } - else - aDateStr = rString; // no separator: only date part - sal_Int32 nYear = 1899; - sal_Int32 nMonth = 12; - sal_Int32 nDay = 30; - sal_Int32 nHour = 0; - sal_Int32 nMin = 0; - sal_Int32 nSec = 0; + sal_Int32 nYear(0); + { + bSuccess = readDateTimeComponent(string, nPos, nYear, 4, false); + bSuccess &= (0 < nYear); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && (sal_Unicode('-') != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + } - const sal_Unicode* pStr = aDateStr.getStr(); - sal_Int32 nDateTokens = 1; - while ( *pStr ) + sal_Int32 nMonth(0); + if (bSuccess) { - if ( *pStr == '-' ) - nDateTokens++; - pStr++; + bSuccess = readDateTimeComponent(string, nPos, nMonth, 2, true); + bSuccess &= (0 < nMonth) && (nMonth <= 12); + bSuccess &= (nPos < string.getLength()); // not last token } - if ( nDateTokens > 3 || aDateStr.getLength() == 0 ) + if (bSuccess && (sal_Unicode('-') != string[nPos])) // separator + { bSuccess = false; - else + } + if (bSuccess) { - sal_Int32 n = 0; - if ( !convertNumber( nYear, aDateStr.getToken( 0, '-', n ), 0, 9999 ) ) + ++nPos; + } + + sal_Int32 nDay(0); + if (bSuccess) + { + bSuccess = readDateTimeComponent(string, nPos, nDay, 2, true); + bSuccess &= (0 < nDay) && (nDay <= lcl_MaxDaysPerMonth(nMonth, nYear)); + } + + bool bHaveTime(false); + if (bSuccess && (nPos < string.getLength())) + { + if (sal_Unicode('T') == string[nPos]) // time separator + { + bHaveTime = true; + ++nPos; + } + } + + sal_Int32 nHours(0); + sal_Int32 nMinutes(0); + sal_Int32 nSeconds(0); + sal_Int32 nMilliSeconds(0); + if (bSuccess && bHaveTime) + { + { + bSuccess = readDateTimeComponent(string, nPos, nHours, 2, true); + bSuccess &= (0 <= nHours) && (nHours <= 24); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator + { bSuccess = false; - if ( nDateTokens >= 2 ) - if ( !convertNumber( nMonth, aDateStr.getToken( 0, '-', n ), 0, 12 ) ) - bSuccess = false; - if ( nDateTokens >= 3 ) - if ( !convertNumber( nDay, aDateStr.getToken( 0, '-', n ), 0, 31 ) ) + } + if (bSuccess) + { + ++nPos; + } + + if (bSuccess) + { + bSuccess = readDateTimeComponent(string, nPos, nMinutes, 2, true); + bSuccess &= (0 <= nMinutes) && (nMinutes < 60); + bSuccess &= (nPos < string.getLength()); // not last token + } + if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + } + + if (bSuccess) + { + bSuccess = readDateTimeComponent(string, nPos, nSeconds, 2, true); + bSuccess &= (0 <= nSeconds) && (nSeconds < 60); + } + if (bSuccess && (nPos < string.getLength()) && + (sal_Unicode('.') == string[nPos])) // fraction separator + { + ++nPos; + const sal_Int32 nStart(nPos); + sal_Int32 nTemp(0); + if (R_NOTHING == readUnsignedNumber(string, nPos, nTemp)) + { bSuccess = false; + } + if (bSuccess) + { + // cannot use nTemp because of possible leading zeros + // and possible overflow => read digits directly + const sal_Int32 nDigits(nPos - nStart); + OSL_ENSURE(nDigits > 0, "bad code monkey"); + const sal_Unicode cZero('0'); + nMilliSeconds = 100 * (string[nStart] - cZero); + if (nDigits >= 2) + { + nMilliSeconds += 10 * (string[nStart+1] - cZero); + if (nDigits >= 3) + { + nMilliSeconds += (string[nStart+2] - cZero); + } + } + } + } + + if (bSuccess && (nHours == 24)) + { + if (!((0 == nMinutes) && (0 == nSeconds) && (0 == nMilliSeconds))) + { + bSuccess = false; // only 24:00:00 is valid + } +#if 0 + else + { + nHours = 0; // normalize 24:00:00 to 00:00:00 of next day + lcl_addDay(bNegative, nYear, nMonth, nDay, 1); + } +#endif + } } - if ( aTimeStr.getLength() > 0 ) // time is optional + bool bHaveTimezone(false); + bool bHaveTimezonePlus(false); + bool bHaveTimezoneMinus(false); + if (bSuccess && (nPos < string.getLength())) { - pStr = aTimeStr.getStr(); - sal_Int32 nTimeTokens = 1; - while ( *pStr ) + const sal_Unicode c(string[nPos]); + if (sal_Unicode('+') == c) { - if ( *pStr == ':' ) - nTimeTokens++; - pStr++; + bHaveTimezone = true; + bHaveTimezonePlus = true; + ++nPos; + } + else if (sal_Unicode('-') == c) + { + bHaveTimezone = true; + bHaveTimezoneMinus = true; + ++nPos; + } + else if (sal_Unicode('Z') == c) + { + bHaveTimezone = true; + ++nPos; } - if ( nTimeTokens > 3 ) - bSuccess = false; else { - sal_Int32 n = 0; - if ( !convertNumber( nHour, aTimeStr.getToken( 0, ':', n ), 0, 23 ) ) - bSuccess = false; - if ( nTimeTokens >= 2 ) - if ( !convertNumber( nMin, aTimeStr.getToken( 0, ':', n ), 0, 59 ) ) - bSuccess = false; - if ( nTimeTokens >= 3 ) - if ( !convertNumber( nSec, aTimeStr.getToken( 0, ':', n ), 0, 59 ) ) - bSuccess = false; + bSuccess = false; } } + sal_Int32 nTimezoneHours(0); + sal_Int32 nTimezoneMinutes(0); + if (bSuccess && (bHaveTimezonePlus || bHaveTimezoneMinus)) + { + bSuccess = readDateTimeComponent( + string, nPos, nTimezoneHours, 2, true); + bSuccess &= (0 <= nTimezoneHours) && (nTimezoneHours <= 14); + bSuccess &= (nPos < string.getLength()); // not last token + if (bSuccess && (sal_Unicode(':') != string[nPos])) // separator + { + bSuccess = false; + } + if (bSuccess) + { + ++nPos; + } + if (bSuccess) + { + bSuccess = readDateTimeComponent( + string, nPos, nTimezoneMinutes, 2, true); + bSuccess &= (0 <= nTimezoneMinutes) && (nTimezoneMinutes < 60); + } + if (bSuccess && (nTimezoneHours == 14)) + { + if (0 != nTimezoneMinutes) + { + bSuccess = false; // only +-14:00 is valid + } + } + } + + bSuccess &= (nPos == string.getLength()); // trailing junk? + + if (bSuccess && bHaveTimezone) + { + // util::DateTime does not support timezones! +#if 0 + // do not add timezone, just strip it (as suggested by er) + lcl_addTimezone(bNegative, nYear, nMonth, nDay, nHours, nMinutes, + !bHaveTimezoneMinus, nTimezoneHours, nTimezoneMinutes); +#endif + } if (bSuccess) { - if ( aTimeStr.getLength() > 0 ) // time is optional + if (bHaveTime) // time is optional { + // util::DateTime does not support negative years! rDateTime.Year = static_cast<sal_uInt16>(nYear); rDateTime.Month = static_cast<sal_uInt16>(nMonth); rDateTime.Day = static_cast<sal_uInt16>(nDay); - rDateTime.Hours = static_cast<sal_uInt16>(nHour); - rDateTime.Minutes = static_cast<sal_uInt16>(nMin); - rDateTime.Seconds = static_cast<sal_uInt16>(nSec); + rDateTime.Hours = static_cast<sal_uInt16>(nHours); + rDateTime.Minutes = static_cast<sal_uInt16>(nMinutes); + rDateTime.Seconds = static_cast<sal_uInt16>(nSeconds); + // util::DateTime does not support 3 decimal digits of precision! rDateTime.HundredthSeconds = - static_cast<sal_uInt16>((sDoubleStr).toDouble() * 100); + static_cast<sal_uInt16>(nMilliSeconds / 10); rbDateTime = true; } else @@ -1466,6 +1647,100 @@ bool Converter::convertDateOrDateTime( return bSuccess; } +#if 0 +struct Test { + static bool eqDateTime(util::DateTime a, util::DateTime b) { + return a.Year == b.Year && a.Month == b.Month && a.Day == b.Day + && a.Hours == b.Hours && a.Minutes == b.Minutes + && a.Seconds == b.Seconds + && a.HundredthSeconds == b.HundredthSeconds; + } + static void doTest(util::DateTime const & rdt, char const*const pis, + char const*const i_pos = 0) + { + char const*const pos((i_pos) ? i_pos : pis); + ::rtl::OUString is(::rtl::OUString::createFromAscii(pis)); + util::DateTime odt; + bool bSuccess( Converter::convertDateTime(odt, is) ); + OSL_TRACE("Y:%d M:%d D:%d H:%d M:%d S:%d H:%d", + odt.Year, odt.Month, odt.Day, + odt.Hours, odt.Minutes, odt.Seconds, odt.HundredthSeconds); + OSL_ASSERT(bSuccess); + OSL_ASSERT(eqDateTime(rdt, odt)); + ::rtl::OUStringBuffer buf; + Converter::convertDateTime(buf, odt, true); + OSL_TRACE( + ::rtl::OUStringToOString(buf.getStr(), RTL_TEXTENCODING_UTF8)); + OSL_ASSERT(buf.makeStringAndClear().equalsAscii(pos)); + } + static void doTestF(const char * pis) + { + util::DateTime odt; + bool bSuccess = Converter::convertDateTime(odt, + ::rtl::OUString::createFromAscii(pis)); + OSL_TRACE("Y:%d M:%d D:%d H:%dH M:%d S:%d H:%d", + odt.Year, odt.Month, odt.Day, + odt.Hours, odt.Minutes, odt.Seconds, odt.HundredthSeconds); + OSL_ASSERT(!bSuccess); + } + Test() { + OSL_TRACE("\nSAX CONVERTER TEST BEGIN\n"); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1), "0001-01-01T00:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1), + "0001-01-01T00:00:00Z", "0001-01-01T00:00:00" ); +// doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1), "-0001-01-01T00:00:00" ); +// doTest( util::DateTime(0, 0, 0, 0, 1, 1, -1), "-0001-01-01T00:00:00Z" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1), + "0001-01-01T00:00:00-00:00", "0001-01-01T00:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 1, 1, 1), + "0001-01-01T00:00:00+00:00", "0001-01-01T00:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 2, 1, 1)/*(0, 0, 12, 0, 2, 1, 1)*/, + "0001-01-02T00:00:00-12:00", "0001-01-02T00:00:00" ); +// "0001-02-01T12:00:00" ); + doTest( util::DateTime(0, 0, 0, 0, 2, 1, 1)/*(0, 0, 12, 0, 1, 1, 1)*/, + "0001-01-02T00:00:00+12:00", "0001-01-02T00:00:00" ); +// "0001-01-01T12:00:00" ); + doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999), + "9999-12-31T23:59:59.99" ); + doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999), + "9999-12-31T23:59:59.99Z", "9999-12-31T23:59:59.99" ); + doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999), + "9999-12-31T23:59:59.9999999999999999999999999999999999999", + "9999-12-31T23:59:59.99" ); + doTest( util::DateTime(99, 59, 59, 23, 31, 12, 9999), + "9999-12-31T23:59:59.9999999999999999999999999999999999999Z", + "9999-12-31T23:59:59.99" ); + doTest( util::DateTime(0, 0, 0, 24, 1, 1, 333) + /*(0, 0, 0, 0, 2, 1, 333)*/, + "0333-01-01T24:00:00"/*, "0333-01-02T00:00:00"*/ ); + doTestF( "+0001-01-01T00:00:00" ); // invalid: ^+ + doTestF( "1-01-01T00:00:00" ); // invalid: < 4 Y + doTestF( "0001-1-01T00:00:00" ); // invalid: < 2 M + doTestF( "0001-01-1T00:00:00" ); // invalid: < 2 D + doTestF( "0001-01-01T0:00:00" ); // invalid: < 2 H + doTestF( "0001-01-01T00:0:00" ); // invalid: < 2 M + doTestF( "0001-01-01T00:00:0" ); // invalid: < 2 S + doTestF( "0001-01-01T00:00:00." ); // invalid: .$ + doTestF( "0001-01-01T00:00:00+1:00" ); // invalid: < 2 TZ H + doTestF( "0001-01-01T00:00:00+00:1" ); // invalid: < 2 TZ M + doTestF( "0001-13-01T00:00:00" ); // invalid: M > 12 + doTestF( "0001-01-32T00:00:00" ); // invalid: D > 31 + doTestF( "0001-01-01T25:00:00" ); // invalid: H > 24 + doTestF( "0001-01-01T00:60:00" ); // invalid: H > 59 + doTestF( "0001-01-01T00:00:60" ); // invalid: S > 59 + doTestF( "0001-01-01T24:01:00" ); // invalid: H=24, but M != 0 + doTestF( "0001-01-01T24:00:01" ); // invalid: H=24, but S != 0 + doTestF( "0001-01-01T24:00:00.1" ); // invalid: H=24, but H != 0 + doTestF( "0001-01-02T00:00:00+15:00" ); // invalid: TZ > +14:00 + doTestF( "0001-01-02T00:00:00+14:01" ); // invalid: TZ > +14:00 + doTestF( "0001-01-02T00:00:00-15:00" ); // invalid: TZ < -14:00 + doTestF( "0001-01-02T00:00:00-14:01" ); // invalid: TZ < -14:00 + OSL_TRACE("\nSAX CONVERTER TEST END\n"); + } +}; +static Test test; +#endif + /** gets the position of the first comma after npos in the string rStr. Commas inside '"' pairs are not matched */ sal_Int32 Converter::indexOfComma( const OUString& rStr, |