summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMiklos Vajna <vmiklos@collabora.com>2020-10-28 14:54:52 +0100
committerMiklos Vajna <vmiklos@collabora.com>2020-10-28 19:34:45 +0100
commitada07f303e7cd1e39c73abe0741aefe7d9d73a57 (patch)
treefd8c81e5404da2909394a250fce7b8ecdc77b28e
parentdb6c7a486395304f38e9ea52951f576f34749cbc (diff)
tdf#123476 filter: try to detect 0-byte files based on extension
A 0-byte ("empty") pptx file is obviously junk input, so it's not surprising if the catch-all generic_Text filter is chosen to open it in Writer at the end. But we can do better: if we really get an empty file URL with an extension we can recognize, that we can fake the filter type / filter name, so the empty "presentation" opens in Impress, and also a re-save works as expected. This builds on top of commit 8a201be240b6d408d15166be7ffc576b9e123634 (fdo#68903 Import .tsv and .xls plain text files in Calc by default, 2013-10-27), just the new way works for all supported file extensions and also with filters which would not handle empty input (e.g. pptx refuses the import if the ZIP storage is broken). Change-Id: Ie01650a5eb6ca42c35e090133965467b621bb526 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/104939 Reviewed-by: Miklos Vajna <vmiklos@collabora.com> Tested-by: Jenkins
-rw-r--r--filter/CppunitTest_filter_textfilterdetect.mk7
-rw-r--r--filter/Library_textfd.mk1
-rw-r--r--filter/qa/unit/data/empty.pptx0
-rw-r--r--filter/qa/unit/textfilterdetect.cxx46
-rw-r--r--filter/source/textfilterdetect/filterdetect.cxx32
-rw-r--r--include/tools/stream.hxx6
-rw-r--r--sfx2/source/doc/objstor.cxx6
-rw-r--r--tools/source/stream/stream.cxx20
8 files changed, 108 insertions, 10 deletions
diff --git a/filter/CppunitTest_filter_textfilterdetect.mk b/filter/CppunitTest_filter_textfilterdetect.mk
index a5a6263abdb8..4066c26591a3 100644
--- a/filter/CppunitTest_filter_textfilterdetect.mk
+++ b/filter/CppunitTest_filter_textfilterdetect.mk
@@ -36,12 +36,7 @@ $(eval $(call gb_CppunitTest_use_ure,filter_textfilterdetect))
$(eval $(call gb_CppunitTest_use_vcl,filter_textfilterdetect))
-$(eval $(call gb_CppunitTest_use_components,filter_textfilterdetect,\
- configmgr/source/configmgr \
- filter/source/textfilterdetect/textfd \
- ucb/source/core/ucb1 \
- ucb/source/ucp/file/ucpfile1 \
-))
+$(eval $(call gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
$(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
diff --git a/filter/Library_textfd.mk b/filter/Library_textfd.mk
index e6d3889410af..c6155f1e9876 100644
--- a/filter/Library_textfd.mk
+++ b/filter/Library_textfd.mk
@@ -23,6 +23,7 @@ $(eval $(call gb_Library_use_libraries,textfd,\
cppuhelper \
cppu \
sal \
+ sfx \
tl \
utl \
svt \
diff --git a/filter/qa/unit/data/empty.pptx b/filter/qa/unit/data/empty.pptx
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/filter/qa/unit/data/empty.pptx
diff --git a/filter/qa/unit/textfilterdetect.cxx b/filter/qa/unit/textfilterdetect.cxx
index 0995147c3b78..fb1ff1d2ac7e 100644
--- a/filter/qa/unit/textfilterdetect.cxx
+++ b/filter/qa/unit/textfilterdetect.cxx
@@ -7,10 +7,14 @@
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
+#include <test/bootstrapfixture.hxx>
+#include <unotest/macros_test.hxx>
+
#include <com/sun/star/document/XExtendedFilterDetection.hpp>
+#include <com/sun/star/frame/Desktop.hpp>
+#include <com/sun/star/lang/XServiceInfo.hpp>
#include <comphelper/propertyvalue.hxx>
-#include <test/bootstrapfixture.hxx>
#include <unotools/mediadescriptor.hxx>
#include <unotools/streamwrap.hxx>
#include <tools/stream.hxx>
@@ -25,10 +29,31 @@ using namespace com::sun::star;
namespace
{
/// Test class for PlainTextFilterDetect.
-class TextFilterDetectTest : public test::BootstrapFixture
+class TextFilterDetectTest : public test::BootstrapFixture, public unotest::MacrosTest
{
+ uno::Reference<lang::XComponent> mxComponent;
+
+public:
+ void setUp() override;
+ void tearDown() override;
+ uno::Reference<lang::XComponent>& getComponent() { return mxComponent; }
};
+void TextFilterDetectTest::setUp()
+{
+ test::BootstrapFixture::setUp();
+
+ mxDesktop.set(frame::Desktop::create(mxComponentContext));
+}
+
+void TextFilterDetectTest::tearDown()
+{
+ if (mxComponent.is())
+ mxComponent->dispose();
+
+ test::BootstrapFixture::tearDown();
+}
+
char const DATA_DIRECTORY[] = "/filter/qa/unit/data/";
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
@@ -50,6 +75,23 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
// This was empty, XML declaration caused HTML detect to not handle XHTML.
CPPUNIT_ASSERT_EQUAL(OUString("HTML (StarWriter)"), aFilterName);
}
+
+CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
+{
+ // Given an empty file, with a pptx extension
+ OUString aURL = m_directories.getURLFromSrc(DATA_DIRECTORY) + "empty.pptx";
+
+ // When loading the file
+ getComponent() = loadFromDesktop(aURL);
+
+ // Then make sure it is opened in Impress.
+ uno::Reference<lang::XServiceInfo> xServiceInfo(getComponent(), uno::UNO_QUERY);
+ CPPUNIT_ASSERT(xServiceInfo.is());
+
+ // Without the accompanying fix in place, this test would have failed, as it was opened in
+ // Writer instead.
+ CPPUNIT_ASSERT(xServiceInfo->supportsService("com.sun.star.presentation.PresentationDocument"));
+}
}
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/filter/source/textfilterdetect/filterdetect.cxx b/filter/source/textfilterdetect/filterdetect.cxx
index ec751ea13661..09efe5e595e0 100644
--- a/filter/source/textfilterdetect/filterdetect.cxx
+++ b/filter/source/textfilterdetect/filterdetect.cxx
@@ -20,6 +20,8 @@
#include <com/sun/star/io/XInputStream.hpp>
#include <cppuhelper/supportsservice.hxx>
#include <memory>
+#include <sfx2/fcontnr.hxx>
+#include <sfx2/docfilt.hxx>
#define WRITER_TEXT_FILTER "Text"
#define CALC_TEXT_FILTER "Text - txt - csv (StarCalc)"
@@ -128,6 +130,34 @@ bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
return GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != HtmlTokenId::NONE;
}
+/**
+ * Given an (empty) file URL in rMediaDesc and rExt, looks up the best filter type for it and
+ * writes the type name to rType, the filter name to rMediaDesc.
+ */
+bool HandleEmptyFileUrlByExtension(MediaDescriptor& rMediaDesc, const OUString& rExt,
+ OUString& rType)
+{
+ OUString aURL = rMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL(), OUString());
+ if (!tools::isEmptyFileUrl(aURL))
+ {
+ return false;
+ }
+
+ if (rExt.isEmpty())
+ {
+ return false;
+ }
+
+ std::shared_ptr<const SfxFilter> pFilter(SfxFilterMatcher().GetFilter4Extension(rExt));
+ if (!pFilter)
+ {
+ return false;
+ }
+
+ rMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= pFilter->GetFilterName();
+ rType = pFilter->GetTypeName();
+ return true;
+}
}
PlainTextFilterDetect::PlainTextFilterDetect() {}
@@ -193,7 +223,7 @@ OUString SAL_CALL PlainTextFilterDetect::detect(uno::Sequence<beans::PropertyVal
aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
else if (aExt == "csv" || aExt == "tsv" || aExt == "tab" || aExt == "xls" || aName.endsWith(".csv.gz"))
aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_TEXT_FILTER);
- else
+ else if (!HandleEmptyFileUrlByExtension(aMediaDesc, aExt, aType))
aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(WRITER_TEXT_FILTER);
}
diff --git a/include/tools/stream.hxx b/include/tools/stream.hxx
index 0d88c014da65..42084e6566fc 100644
--- a/include/tools/stream.hxx
+++ b/include/tools/stream.hxx
@@ -568,6 +568,12 @@ inline std::size_t write_uInt16_lenPrefixed_uInt8s_FromOUString(SvStream& rStrm,
[[nodiscard]] TOOLS_DLLPUBLIC bool checkSeek(SvStream &rSt, sal_uInt64 nOffset);
+namespace tools
+{
+/// Is rUrl a file:// URL with no contents?
+TOOLS_DLLPUBLIC bool isEmptyFileUrl(const OUString& rUrl);
+}
+
// FileStream
class TOOLS_DLLPUBLIC SvFileStream final : public SvStream
diff --git a/sfx2/source/doc/objstor.cxx b/sfx2/source/doc/objstor.cxx
index 479682be31db..2f8a2c5f7779 100644
--- a/sfx2/source/doc/objstor.cxx
+++ b/sfx2/source/doc/objstor.cxx
@@ -2248,7 +2248,11 @@ bool SfxObjectShell::ImportFrom(SfxMedium& rMedium,
// #i119492# During loading, some OLE objects like chart will be set
// modified flag, so needs to reset the flag to false after loading
- bool bRtn = xLoader->filter( aArgs );
+ bool bRtn = true;
+ if (!tools::isEmptyFileUrl(rMedium.GetName()))
+ {
+ bRtn = xLoader->filter(aArgs);
+ }
const uno::Sequence < OUString > aNames = GetEmbeddedObjectContainer().GetObjectNames();
for ( const auto& rName : aNames )
{
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index fe2470766be3..84dcef93bcd5 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -37,6 +37,7 @@
#include <tools/long.hxx>
#include <comphelper/fileformat.h>
+#include <comphelper/fileurl.hxx>
static void swapNibbles(unsigned char &c)
{
@@ -1414,6 +1415,25 @@ bool checkSeek(SvStream &rSt, sal_uInt64 nOffset)
return (nOffset <= nMaxSeek && rSt.Seek(nOffset) == nOffset);
}
+namespace tools
+{
+bool isEmptyFileUrl(const OUString& rUrl)
+{
+ if (!comphelper::isFileUrl(rUrl))
+ {
+ return false;
+ }
+
+ SvFileStream aStream(rUrl, StreamMode::READ);
+ if (!aStream.IsOpen())
+ {
+ return false;
+ }
+
+ return aStream.remainingSize() == 0;
+}
+}
+
//STREAM_SEEK_TO_END in some of the Seek backends is special cased to be
//efficient, in others e.g. SotStorageStream it's really horribly slow, and in
//those this should be overridden