diff options
author | Kohei Yoshida <kohei.yoshida@gmail.com> | 2012-05-25 16:01:52 -0400 |
---|---|---|
committer | Kohei Yoshida <kohei.yoshida@gmail.com> | 2012-05-25 16:01:52 -0400 |
commit | 0c782558aee08bfc24e03c715a504a234ab30307 (patch) | |
tree | c60d512a7230a4fe932bcd13ea8b86dcddff78d7 /filter | |
parent | 58652054727a29701795f2849c87f320de05c4dd (diff) |
Rank format types in order of complexity.
This way we can run detection services in the correct order; from
more complex to less complex, since the more complex structure is
much easier to test and reject.
Change-Id: Iee2d3e2e4f2834f95e6e89975f646e3928114b11
Diffstat (limited to 'filter')
-rw-r--r-- | filter/source/config/cache/typedetection.cxx | 118 |
1 files changed, 115 insertions, 3 deletions
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index 93e80151f538..672a09ff604e 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -109,8 +109,101 @@ TypeDetection::~TypeDetection() namespace { /** - * Types with matching pattern first, then extension, then types that are - * supported by the document service come next. + * Rank format types in order of complexity. More complex formats are + * ranked higher so that they get tested sooner over simpler formats. + * + * Guidelines to determine how complex a format is (subject to change): + * + * 1) compressed text (XML, HTML, etc) + * 2) binary + * 3) non-compressed text + * 3.1) structured text + * 3.1.1) dialect of a structured text (e.g. docbook XML) + * 3.1.2) generic structured text (e.g. generic XML) + * 3.2) non-structured text + * + * In each category, rank them from strictly-structured to + * loosely-structured. + */ +int getFlatTypeRank(const rtl::OUString& rType) +{ + // List formats from more complex to less complex. + // TODO: Add more. + static const char* ranks[] = { + // Compressed XML + "writer8_template", + "writer8", + "calc8_template", + "calc8", + "writer_OOXML_Text_Template", + "writer_OOXML", + "writer_MS_Word_2007_Template", + "writer_MS_Word_2007", + "Office Open XML Spreadsheet Template", + "Office Open XML Spreadsheet", + "MS Excel 2007 XML Template", + "MS Excel 2007 XML", + + // Compressed text + "pdf_Portable_Document_Format", + + // Binary + "writer_T602_Document", + "writer_WordPerfect_Document", + "writer_MS_Works_Document", + "writer_MS_Word_97_Vorlage", + "writer_MS_Word_97", + "writer_MS_Word_95_Vorlage", + "writer_MS_Word_95", + "writer_MS_WinWord_60", + "writer_MS_WinWord_5", + "MS Excel 2007 Binary", + "calc_MS_Excel_97_VorlageTemplate", + "calc_MS_Excel_97", + "calc_MS_Excel_95_VorlageTemplate", + "calc_MS_Excel_95", + "calc_MS_Excel_5095_VorlageTemplate", + "calc_MS_Excel_5095", + "calc_MS_Excel_40_VorlageTemplate", + "calc_MS_Excel_40", + "calc_Pocket_Excel_File", + "calc_Lotus", + "calc_QPro", + "calc_SYLK", + "calc_DIF", + "calc_dBase", + + + // Non-compressed XML + "writer_ODT_FlatXML", + "calc_ODS_FlatXML", + "calc_MS_Excel_2003_XML", + "writer_MS_Word_2003_XML", + "writer_DocBook_File", + "XHTML_File", + + // Non-compressed text + "writer_Rich_Text_Format", + "generic_HTML", + "generic_Text" + }; + + size_t n = SAL_N_ELEMENTS(ranks); + + for (size_t i = 0; i < n; ++i) + { + if (rType.equalsAscii(ranks[i])) + return n - i - 1; + } + + // Not ranked. Treat them equally. + return -1; +} + +/** + * Types with matching pattern first, then extension, then custom ranks by + * types, then types that are supported by the document service come next. + * Lastly, sort them alphabetically. */ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool> { @@ -122,7 +215,25 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec if (r1.bMatchByExtension != r2.bMatchByExtension) return r1.bMatchByExtension; - return r1.bPreselectedByDocumentService; + int rank1 = getFlatTypeRank(r1.sType); + int rank2 = getFlatTypeRank(r2.sType); + + if (rank1 != rank2) + return rank1 > rank2; + + if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService) + return r1.bPreselectedByDocumentService; + + // All things being equal, sort them alphabetically. + return r1.sType > r2.sType; + } +}; + +struct EqualByName : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool> +{ + bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const + { + return r1.sType == r2.sType; } }; @@ -177,6 +288,7 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec // Properly prioritize all candidate types. lFlatTypes.sort(SortByPriority()); + lFlatTypes.unique(EqualByName()); ::rtl::OUString sType ; ::rtl::OUString sLastChance; |