From 1636bce46df5f762eca594f5ebb05db7187287a0 Mon Sep 17 00:00:00 2001 From: Michael Stahl Date: Tue, 16 Jul 2024 12:12:09 +0200 Subject: package: add additional consistency checks for local file header Check it contains same as central directory header, also check data descriptor if available. Also check there are no gaps or overlaps. This causes 3 fuzzer generated test documents to fail to load; adapt tests. Change-Id: If5813652f3bd03e90fdf95eb97e1e1523455b2b8 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170571 Tested-by: Jenkins Reviewed-by: Michael Stahl (cherry picked from commit efae4fc42d5fe3c0a69757226f38efc10d101194) Reviewed-on: https://gerrit.libreoffice.org/c/core/+/170553 Reviewed-by: Miklos Vajna --- package/inc/ByteGrabber.hxx | 1 + package/inc/ZipFile.hxx | 7 +- package/source/zipapi/ByteGrabber.cxx | 18 +++ package/source/zipapi/ZipFile.cxx | 231 +++++++++++++++++++++++++++++++--- 4 files changed, 235 insertions(+), 22 deletions(-) (limited to 'package') diff --git a/package/inc/ByteGrabber.hxx b/package/inc/ByteGrabber.hxx index ba1512cf5162..4a93398a7704 100644 --- a/package/inc/ByteGrabber.hxx +++ b/package/inc/ByteGrabber.hxx @@ -61,6 +61,7 @@ public: sal_uInt16 ReadUInt16(); sal_uInt32 ReadUInt32(); + sal_uInt64 ReadUInt64(); sal_Int16 ReadInt16() { return static_cast(ReadUInt16()); diff --git a/package/inc/ZipFile.hxx b/package/inc/ZipFile.hxx index 8828d929273f..ed9847a30013 100644 --- a/package/inc/ZipFile.hxx +++ b/package/inc/ZipFile.hxx @@ -29,6 +29,7 @@ #include "HashMaps.hxx" #include "EncryptionData.hxx" +#include #include #include @@ -86,15 +87,15 @@ class ZipFile void getSizeAndCRC( sal_Int64 nOffset, sal_Int64 nCompressedSize, sal_Int64 *nSize, sal_Int32 *nCRC ); - void readLOC( ZipEntry &rEntry ); + sal_uInt64 readLOC(ZipEntry &rEntry); sal_Int32 readCEN(); sal_Int32 findEND(); void HandlePK34(std::span data, sal_Int64 dataOffset, sal_Int64 totalSize); void HandlePK78(std::span data, sal_Int64 dataOffset); void recover(); - static void readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, + static bool readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, sal_uInt64& nSize, sal_uInt64& nCompressedSize, - sal_uInt64* nOffset, + ::std::optional & roOffset, OUString const* pCENFilenameToCheck); public: diff --git a/package/source/zipapi/ByteGrabber.cxx b/package/source/zipapi/ByteGrabber.cxx index 5a491de6509b..0e701a60007f 100644 --- a/package/source/zipapi/ByteGrabber.cxx +++ b/package/source/zipapi/ByteGrabber.cxx @@ -119,4 +119,22 @@ sal_uInt32 ByteGrabber::ReadUInt32() | ( pSequence[3] & 0xFF ) << 24 ); } +sal_uInt64 ByteGrabber::ReadUInt64() +{ + std::scoped_lock aGuard(m_aMutex); + + if (xStream->readBytes(aSequence, 8) != 8) + return 0; + + pSequence = aSequence.getConstArray(); + return static_cast(pSequence[0] & 0xFF) + | static_cast(pSequence[1] & 0xFF) << 8 + | static_cast(pSequence[2] & 0xFF) << 16 + | static_cast(pSequence[3] & 0xFF) << 24 + | static_cast(pSequence[4] & 0xFF) << 32 + | static_cast(pSequence[5] & 0xFF) << 40 + | static_cast(pSequence[6] & 0xFF) << 48 + | static_cast(pSequence[7] & 0xFF) << 56; +} + /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ diff --git a/package/source/zipapi/ZipFile.cxx b/package/source/zipapi/ZipFile.cxx index f7713b8665c1..5fa21941b8b6 100644 --- a/package/source/zipapi/ZipFile.cxx +++ b/package/source/zipapi/ZipFile.cxx @@ -927,7 +927,7 @@ uno::Reference< XInputStream > ZipFile::getWrappedRawStream( return createStreamForZipEntry ( aMutexHolder, rEntry, rData, UNBUFF_STREAM_WRAPPEDRAW, true, true, aMediaType ); } -void ZipFile::readLOC( ZipEntry &rEntry ) +sal_uInt64 ZipFile::readLOC(ZipEntry &rEntry) { ::osl::MutexGuard aGuard( m_aMutexHolder->GetMutex() ); @@ -943,13 +943,16 @@ void ZipFile::readLOC( ZipEntry &rEntry ) // Just verify the path and calculate the data offset and otherwise // rely on the central directory info. - aGrabber.ReadInt16(); //version - aGrabber.ReadInt16(); //flag - aGrabber.ReadInt16(); //how + aGrabber.ReadInt16(); // version - ignore any mismatch (Maven created JARs) + sal_uInt16 const nLocFlag = aGrabber.ReadUInt16(); // general purpose bit flag + sal_uInt16 const nLocMethod = aGrabber.ReadUInt16(); // compression method + // Do *not* compare timestamps, since MSO 2010 can produce documents + // with timestamp difference in the central directory entry and local + // file header. aGrabber.ReadInt32(); //time - aGrabber.ReadInt32(); //crc - aGrabber.ReadInt32(); //compressed size - aGrabber.ReadInt32(); //size + sal_uInt32 nLocCrc = aGrabber.ReadUInt32(); //crc + sal_uInt64 nLocCompressedSize = aGrabber.ReadUInt32(); //compressed size + sal_uInt64 nLocSize = aGrabber.ReadUInt32(); //size sal_Int16 nPathLen = aGrabber.ReadInt16(); sal_Int16 nExtraLen = aGrabber.ReadInt16(); @@ -961,6 +964,7 @@ void ZipFile::readLOC( ZipEntry &rEntry ) rEntry.nOffset = aGrabber.getPosition() + nPathLen + nExtraLen; + sal_Int64 nEnd = {}; // avoid -Werror=maybe-uninitialized bool bBroken = false; try @@ -982,8 +986,133 @@ void ZipFile::readLOC( ZipEntry &rEntry ) rEntry.sPath = sLOCPath; } - bBroken = rEntry.nPathLen != nPathLen - || rEntry.sPath != sLOCPath; + if (rEntry.nPathLen != nPathLen || rEntry.sPath != sLOCPath) + { + SAL_INFO("package", "LOC inconsistent name: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + bool isZip64{false}; + ::std::optional oOffset64; + if (nExtraLen != 0) + { + Sequence aExtraBuffer; + aGrabber.readBytes(aExtraBuffer, nExtraLen); + MemoryByteGrabber extraMemGrabber(aExtraBuffer); + + isZip64 = readExtraFields(extraMemGrabber, nExtraLen, + nLocSize, nLocCompressedSize, oOffset64, &sLOCPath); + } + + // Just plain ignore bits 1 & 2 of the flag field - they are either + // purely informative, or even fully undefined (depending on method). + // Also ignore bit 11 ("Language encoding flag"): tdf125300.docx is + // example with mismatch - and the actual file names are compared in + // any case and required to be UTF-8. + if ((rEntry.nFlag & ~0x806U) != (nLocFlag & ~0x806U)) + { + SAL_INFO("package", "LOC inconsistent flag: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + // TODO: "older versions with encrypted streams write mismatching DEFLATE/STORE" ??? + if (rEntry.nMethod != nLocMethod) + { + SAL_INFO("package", "LOC inconsistent method: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (o3tl::checked_add(rEntry.nOffset, rEntry.nCompressedSize, nEnd)) + { + throw ZipException(u"Integer-overflow"_ustr); + } + + // read "data descriptor" - this can be 12, 16, 20, or 24 bytes in size + if ((rEntry.nFlag & 0x08) != 0) + { +#if 0 + if (nLocMethod == STORED) // example: fdo68983.odt has this :( + { + SAL_INFO("package", "LOC STORED with data descriptor: \"" << rEntry.sPath << "\""); + bBroken = true; + } + else +#endif + { + decltype(nLocCrc) nDDCrc; + decltype(nLocCompressedSize) nDDCompressedSize; + decltype(nLocSize) nDDSize; + aGrabber.seek(aGrabber.getPosition() + rEntry.nCompressedSize); + sal_uInt32 nTemp = aGrabber.ReadUInt32(); + if (nTemp == 0x08074b50) // APPNOTE says PK78 is optional??? + { + nDDCrc = aGrabber.ReadUInt32(); + } + else + { + nDDCrc = nTemp; + } + if (isZip64) + { + nDDCompressedSize = aGrabber.ReadUInt64(); + nDDSize = aGrabber.ReadUInt64(); + } + else + { + nDDCompressedSize = aGrabber.ReadUInt32(); + nDDSize = aGrabber.ReadUInt32(); + } + if (nEnd < aGrabber.getPosition()) + { + nEnd = aGrabber.getPosition(); + } + else + { + SAL_INFO("package", "LOC invalid size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + // tdf91429.docx has same values in LOC and in (superfluous) DD + if ((nLocCrc == 0 || nLocCrc == nDDCrc) + && (nLocCompressedSize == 0 || nLocCompressedSize == sal_uInt64(-1) || nLocCompressedSize == nDDCompressedSize) + && (nLocSize == 0 || nLocSize == sal_uInt64(-1) || nLocSize == nDDSize)) + + { + nLocCrc = nDDCrc; + nLocCompressedSize = nDDCompressedSize; + nLocSize = nDDSize; + } + else + { + SAL_INFO("package", "LOC non-0 with data descriptor: \"" << rEntry.sPath << "\""); + bBroken = true; + } + } + } + + // unit test file export64.zip has nLocCrc/nLocCS/nLocSize = 0 on mimetype + if (nLocCrc != 0 && static_cast(rEntry.nCrc) != nLocCrc) + { + SAL_INFO("package", "LOC inconsistent CRC: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (nLocCompressedSize != 0 && static_cast(rEntry.nCompressedSize) != nLocCompressedSize) + { + SAL_INFO("package", "LOC inconsistent compressed size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (nLocSize != 0 && static_cast(rEntry.nSize) != nLocSize) + { + SAL_INFO("package", "LOC inconsistent size: \"" << rEntry.sPath << "\""); + bBroken = true; + } + + if (oOffset64 && o3tl::make_unsigned(nPos) != *oOffset64) + { + SAL_INFO("package", "LOC inconsistent offset: \"" << rEntry.sPath << "\""); + bBroken = true; + } } catch(...) { @@ -992,6 +1121,8 @@ void ZipFile::readLOC( ZipEntry &rEntry ) if ( bBroken && !bRecoveryMode ) throw ZipIOException(u"The stream seems to be broken!"_ustr ); + + return nEnd; } sal_Int32 ZipFile::findEND() @@ -1079,7 +1210,7 @@ sal_Int32 ZipFile::readCEN() ZipEntry aEntry; sal_Int16 nCommentLen; - sal_Int64 nMinOffset{nEndPos}; + ::std::vector> unallocated = { { 0, nCenPos } }; aEntries.reserve(nTotal); for (nCount = 0 ; nCount < nTotal; nCount++) @@ -1136,7 +1267,12 @@ sal_Int32 ZipFile::readCEN() if (aEntry.nExtraLen>0) { - readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, nCompressedSize, &nOffset, &aEntry.sPath); + ::std::optional oOffset64; + readExtraFields(aMemGrabber, aEntry.nExtraLen, nSize, nCompressedSize, oOffset64, &aEntry.sPath); + if (oOffset64) + { + nOffset = *oOffset64; + } } aEntry.nCompressedSize = nCompressedSize; aEntry.nSize = nSize; @@ -1144,12 +1280,64 @@ sal_Int32 ZipFile::readCEN() if (o3tl::checked_add(aEntry.nOffset, nLocPos, aEntry.nOffset)) throw ZipException(u"Integer-overflow"_ustr); - nMinOffset = std::min(nMinOffset, aEntry.nOffset); if (o3tl::checked_multiply(aEntry.nOffset, -1, aEntry.nOffset)) throw ZipException(u"Integer-overflow"_ustr); aMemGrabber.skipBytes(nCommentLen); + // unfortunately readLOC is required now to check the consistency + assert(aEntry.nOffset <= 0); + sal_uInt64 const nStart{ o3tl::make_unsigned(-aEntry.nOffset) }; + sal_uInt64 const nEnd = readLOC(aEntry); + assert(nStart < nEnd); + for (auto it = unallocated.begin(); ; ++it) + { + if (it == unallocated.end()) + { + throw ZipException(u"overlapping entries"_ustr); + } + if (nStart < it->first) + { + throw ZipException(u"overlapping entries"_ustr); + } + else if (it->first == nStart) + { + if (it->second == nEnd) + { + unallocated.erase(it); + break; + } + else if (nEnd < it->second) + { + it->first = nEnd; + break; + } + else + { + throw ZipException(u"overlapping entries"_ustr); + } + } + else if (nStart < it->second) + { + if (nEnd < it->second) + { + auto const temp{it->first}; + it->first = nEnd; + unallocated.insert(it, { temp, nStart }); + break; + } + else if (nEnd == it->second) + { + it->second = nStart; + break; + } + else + { + throw ZipException(u"overlapping entries"_ustr); + } + } + } + // Is this a FAT-compatible empty entry? if (aEntry.nSize == 0 && (versionMadeBy & 0xff00) == 0) { @@ -1175,9 +1363,9 @@ sal_Int32 ZipFile::readCEN() if (nCount != nTotal) throw ZipException(u"Count != Total"_ustr ); - if (nMinOffset != 0) + if (!unallocated.empty()) { - throw ZipException(u"Extra bytes at beginning of zip file"_ustr); + throw ZipException(u"Zip file has holes! It will leak!"_ustr); } } catch ( IllegalArgumentException & ) @@ -1188,10 +1376,12 @@ sal_Int32 ZipFile::readCEN() return nCenPos; } -void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, +bool ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLen, sal_uInt64& nSize, sal_uInt64& nCompressedSize, - sal_uInt64* nOffset, OUString const*const pCENFilenameToCheck) + std::optional & roOffset, + OUString const*const pCENFilenameToCheck) { + bool isZip64{false}; while (nExtraLen > 0) // Extensible data fields { sal_Int16 nheaderID = aMemGrabber.ReadInt16(); @@ -1205,15 +1395,16 @@ void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLe { nCompressedSize = aMemGrabber.ReadUInt64(); nReadSize = 16; - if (dataSize >= 24 && nOffset) + if (dataSize >= 24 && roOffset) { - *nOffset = aMemGrabber.ReadUInt64(); + roOffset.emplace(aMemGrabber.ReadUInt64()); nReadSize = 24; // 4 byte should be "Disk Start Number" but we not need it } } if (dataSize > nReadSize) aMemGrabber.skipBytes(dataSize - nReadSize); + isZip64 = true; } // Info-ZIP Unicode Path Extra Field - pointless as we expect UTF-8 in CEN already else if (nheaderID == 0x7075 && pCENFilenameToCheck) // ignore in recovery mode @@ -1250,6 +1441,7 @@ void ZipFile::readExtraFields(MemoryByteGrabber& aMemGrabber, sal_Int16 nExtraLe } nExtraLen -= dataSize + 4; } + return isZip64; } // PK34: Local file header @@ -1311,7 +1503,8 @@ void ZipFile::HandlePK34(std::span data, sal_Int64 dataOffset, s MemoryByteGrabber aMemGrabberExtra(aExtraBuffer); if (aEntry.nExtraLen > 0) { - readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, nCompressedSize, nullptr, nullptr); + ::std::optional oOffset64; + readExtraFields(aMemGrabberExtra, aEntry.nExtraLen, nSize, nCompressedSize, oOffset64, nullptr); } } -- cgit