tdf#125662: do parallel-zip in batches

In this approach the input stream is read one batch (of constant size) at a time and each batch is compressed by ThreadedDeflater. After we are done with a batch, the deflated buffer is processed straightaway (directed to file backed storage). Change-Id: I2d42f86cf5898e4d746836d94bf6009a8d3b0230 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/86596 Tested-by: Jenkins Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
author: Dennis Francis <dennis.francis@collabora.com> 2020-01-11 11:51:34 +0530
committer: Dennis Francis <dennis.francis@collabora.com> 2020-01-13 12:11:44 +0100
commit: 353d4528b8ad8abca9a13f3016632e42bab7afde (patch)
tree: a83950338ad79e77594638d9dd60bd073fdd2115 /package/inc
parent: 9dce33e6943dec5ff111802ec3e7c338abf56592 (diff)
1 files changed, 20 insertions, 9 deletions
diff --git a/package/inc/ThreadedDeflater.hxx b/package/inc/ThreadedDeflater.hxx
index 3bd7e4bc966a..f22a40a0c941 100644
--- a/package/inc/ThreadedDeflater.hxx
+++ b/package/inc/ThreadedDeflater.hxx
@@ -21,37 +21,48 @@
 #define INCLUDED_PACKAGE_THREADEDDEFLATER_HXX
 
 #include <com/sun/star/uno/Sequence.hxx>
+#include <com/sun/star/io/XInputStream.hpp>
+#include <com/sun/star/uno/Reference.hxx>
 #include <package/packagedllapi.hxx>
 #include <comphelper/threadpool.hxx>
 #include <atomic>
 #include <memory>
+#include <vector>
+#include <functional>
 
 namespace ZipUtils
 {
 /// Parallel compression a stream using the libz deflate algorithm.
 ///
-/// Almost a replacement for the Deflater class. Call startDeflate() with the data,
-/// check with finished() or waitForTasks() and retrieve result with getOutput().
-/// The class will internally split into multiple threads.
+/// Call deflateWrite() with the input stream and input/output processing functions.
+/// This will use multiple threads for compression on each batch of data from the stream.
 class ThreadedDeflater final
 {
     class Task;
     // Note: All this should be lock-less. Each task writes only to its part
-    // of the data, flags are atomic.
+    // of the data.
     std::vector<std::vector<sal_Int8>> outBuffers;
     std::shared_ptr<comphelper::ThreadTaskTag> threadTaskTag;
     css::uno::Sequence<sal_Int8> inBuffer;
+    css::uno::Sequence<sal_Int8> prevDataBlock;
+    std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> maProcessOutputFunc;
+    sal_Int64 totalIn;
+    sal_Int64 totalOut;
     int zlibLevel;
-    std::atomic<int> pendingTasksCount;
 
 public:
     // Unlike with Deflater class, bNoWrap is always true.
     ThreadedDeflater(sal_Int32 nSetLevel);
     ~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE;
-    void startDeflate(const css::uno::Sequence<sal_Int8>& rBuffer);
-    void waitForTasks();
-    bool finished() const;
-    css::uno::Sequence<sal_Int8> getOutput() const;
+    void deflateWrite(
+        const css::uno::Reference<css::io::XInputStream>& xInStream,
+        std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessInputFunc,
+        std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessOutputFunc);
+    sal_Int64 getTotalIn() const { return totalIn; }
+    sal_Int64 getTotalOut() const { return totalOut; }
+
+private:
+    void processDeflatedBuffers();
     void clear();
 };
author	Dennis Francis <dennis.francis@collabora.com>	2020-01-11 11:51:34 +0530
committer	Dennis Francis <dennis.francis@collabora.com>	2020-01-13 12:11:44 +0100
commit	353d4528b8ad8abca9a13f3016632e42bab7afde (patch)
tree	a83950338ad79e77594638d9dd60bd073fdd2115 /package/inc
parent	9dce33e6943dec5ff111802ec3e7c338abf56592 (diff)