summaryrefslogtreecommitdiff
path: root/vcl/source/fontsubset
diff options
context:
space:
mode:
authorJan-Marek Glogowski <glogow@fbihome.de>2020-09-11 22:07:12 +0200
committerJan-Marek Glogowski <glogow@fbihome.de>2020-09-15 06:42:33 +0200
commitc7482bc2904401e7d975b5721ec861b8589253f9 (patch)
treedbb52861749112e67b338f0c6348a541f7b83d2a /vcl/source/fontsubset
parenta88c9752c6251dceb77c809b4592f2516e10f3e6 (diff)
Replace FindCmap with ParseCMAP
This introduces a potential performance regression, because FindCmap works on the existing font tables and just sets up a lookup function, while ParseCMAP creates some optimized, in-memory lookup table, which needs a bit more work, but is faster in its usage, I think. At least the initial usage is faster the old way, as the CMAPs aren't decoded at all. As you can see, the old code is just used on Windows and MacOS / iOS. Deep in the bowels of the PrintFontManager, the CMAP is also decoded using ParseCMAP... So I'm not sure this potential regression really exists. Most fonts will already have a decoded CMAP, so my guess is this is actually faster in the end. No idea, how to measure. Change-Id: I52caac1264cd3ff11a2a3fa6e9c800f67f146a79 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/102685 Tested-by: Jenkins Reviewed-by: Jan-Marek Glogowski <glogow@fbihome.de>
Diffstat (limited to 'vcl/source/fontsubset')
-rw-r--r--vcl/source/fontsubset/sft.cxx385
1 files changed, 11 insertions, 374 deletions
diff --git a/vcl/source/fontsubset/sft.cxx b/vcl/source/fontsubset/sft.cxx
index 6eabe8480608..812c539d6700 100644
--- a/vcl/source/fontsubset/sft.cxx
+++ b/vcl/source/fontsubset/sft.cxx
@@ -35,6 +35,7 @@
#include <unistd.h>
#endif
#include <sft.hxx>
+#include <impfontcharmap.hxx>
#include "ttcr.hxx"
#include "xlat.hxx"
#include <rtl/crc.h>
@@ -165,18 +166,6 @@ static sal_uInt32 GetUInt32(const sal_uInt8 *ptr, size_t offset)
return t;
}
-#if defined(OSL_BIGENDIAN)
-#define Int16FromMOTA(a) (a)
-#define Int32FromMOTA(a) (a)
-#else
-static sal_uInt16 Int16FromMOTA(sal_uInt16 a) {
- return static_cast<sal_uInt16>(static_cast<sal_uInt8>(a >> 8) | (static_cast<sal_uInt8>(a) << 8));
-}
-static sal_uInt32 Int32FromMOTA(sal_uInt32 a) {
- return ((a>>24)&0xFF) | (((a>>8)&0xFF00) | ((a&0xFF00)<<8) | ((a&0xFF)<<24));
-}
-#endif
-
static F16Dot16 fixedMul(F16Dot16 a, F16Dot16 b)
{
unsigned int a1, b1;
@@ -1014,332 +1003,6 @@ static void GetNames(TrueTypeFont *t)
}
}
-namespace {
-
-enum cmapType {
- CMAP_NOT_USABLE = -1,
- CMAP_MS_Symbol = 10,
- CMAP_MS_Unicode = 11,
- CMAP_MS_ShiftJIS = 12,
- CMAP_MS_PRC = 13,
- CMAP_MS_Big5 = 14,
- CMAP_MS_Wansung = 15,
- CMAP_MS_Johab = 16
-};
-
-}
-
-#define MISSING_GLYPH_INDEX 0
-
-static sal_uInt32 getGlyph0(const sal_uInt8* cmap, sal_uInt32, sal_uInt32 c) {
- if (c <= 255) {
- return *(cmap + 6 + c);
- } else {
- return MISSING_GLYPH_INDEX;
- }
-}
-
-namespace {
-
-struct subHeader2 {
- sal_uInt16 firstCode;
- sal_uInt16 entryCount;
- sal_uInt16 idDelta;
- sal_uInt16 idRangeOffset;
-};
-
-}
-
-static sal_uInt32 getGlyph2(const sal_uInt8 *cmap, const sal_uInt32 nMaxCmapSize, sal_uInt32 c) {
- sal_uInt16 const *CMAP2 = reinterpret_cast<sal_uInt16 const *>(cmap);
- sal_uInt8 theHighByte;
-
- sal_uInt8 theLowByte;
- subHeader2 const * subHeader2s;
- sal_uInt16 const * subHeader2Keys;
- sal_uInt16 firstCode;
- int k = -1;
- sal_uInt32 ToReturn;
-
- theHighByte = static_cast<sal_uInt8>((c >> 8) & 0x00ff);
- theLowByte = static_cast<sal_uInt8>(c & 0x00ff);
- subHeader2Keys = CMAP2 + 3;
- subHeader2s = reinterpret_cast<subHeader2 const *>(subHeader2Keys + 256);
- if(reinterpret_cast<sal_uInt8 const *>(&subHeader2Keys[theHighByte]) - cmap < int(nMaxCmapSize - 2))
- {
- k = Int16FromMOTA(subHeader2Keys[theHighByte]) / 8;
- // check if the subheader record fits into available space
- if(reinterpret_cast<sal_uInt8 const *>(&subHeader2s[k]) - cmap >= int(nMaxCmapSize - sizeof(subHeader2)))
- k = -1;
- }
-
- if(k == 0) {
- firstCode = Int16FromMOTA(subHeader2s[0].firstCode);
- if(theLowByte >= firstCode && theLowByte < (firstCode + Int16FromMOTA(subHeader2s[k].entryCount))) {
- sal_uInt16 const * pGlyph = (&(subHeader2s[0].idRangeOffset))
- + (Int16FromMOTA(subHeader2s[0].idRangeOffset)/2) /* + offset */
- + theLowByte /* + to_look */
- - firstCode
- ;
- if (reinterpret_cast<sal_uInt8 const *>(pGlyph) - cmap < int(nMaxCmapSize) - 4)
- return *pGlyph;
- else
- return MISSING_GLYPH_INDEX;
- } else {
- return MISSING_GLYPH_INDEX;
- }
- } else if (k > 0) {
- firstCode = Int16FromMOTA(subHeader2s[k].firstCode);
- if(theLowByte >= firstCode && theLowByte < (firstCode + Int16FromMOTA(subHeader2s[k].entryCount))) {
- ToReturn = *((&(subHeader2s[k].idRangeOffset))
- + (Int16FromMOTA(subHeader2s[k].idRangeOffset)/2)
- + theLowByte - firstCode);
- if(ToReturn == 0) {
- return MISSING_GLYPH_INDEX;
- } else {
- ToReturn += Int16FromMOTA(subHeader2s[k].idDelta);
- return (ToReturn & 0xFFFF);
- }
- } else {
- return MISSING_GLYPH_INDEX;
- }
- } else {
- return MISSING_GLYPH_INDEX;
- }
-}
-
-static sal_uInt32 getGlyph6(const sal_uInt8 *cmap, sal_uInt32, sal_uInt32 c) {
- sal_uInt16 firstCode, lastCode, count;
- sal_uInt16 const *CMAP6 = reinterpret_cast<sal_uInt16 const *>(cmap);
-
- firstCode = Int16FromMOTA(*(CMAP6 + 3));
- count = Int16FromMOTA(*(CMAP6 + 4));
- lastCode = firstCode + count - 1;
- if (c < firstCode || c > lastCode) {
- return MISSING_GLYPH_INDEX;
- } else {
- return *((CMAP6 + 5)/*glyphIdArray*/ + (c - firstCode));
- }
-}
-
-static sal_uInt16 GEbinsearch(sal_uInt16 const *ar, sal_uInt16 length, sal_uInt16 toSearch) {
- signed int low, high, lastfound = 0xffff;
- sal_uInt16 res;
- if(length == sal_uInt16(0) || length == sal_uInt16(0xFFFF)) {
- return sal_uInt16(0xFFFF);
- }
- low = 0;
- high = length - 1;
- while(high >= low) {
- int mid = (high + low)/2;
- res = Int16FromMOTA(*(ar+mid));
- if(res >= toSearch) {
- lastfound = mid;
- high = --mid;
- } else {
- low = ++mid;
- }
- }
- return static_cast<sal_uInt16>(lastfound);
-}
-
-static sal_uInt32 getGlyph4(const sal_uInt8 *cmap, const sal_uInt32 nMaxCmapSize, sal_uInt32 c) {
- sal_uInt16 i;
- int ToReturn;
- sal_uInt16 segCount;
- sal_uInt16 const * startCode;
- sal_uInt16 const * endCode;
- sal_uInt16 const * idDelta;
- /* sal_uInt16 * glyphIdArray; */
- sal_uInt16 const * idRangeOffset;
- /*sal_uInt16 * glyphIndexArray;*/
- sal_uInt16 const *CMAP4 = reinterpret_cast<sal_uInt16 const *>(cmap);
- /* sal_uInt16 GEbinsearch(sal_uInt16 *ar, sal_uInt16 length, sal_uInt16 toSearch); */
-
- segCount = Int16FromMOTA(*(CMAP4 + 3))/2;
- endCode = CMAP4 + 7;
- i = GEbinsearch(endCode, segCount, static_cast<sal_uInt16>(c));
-
- if (i == sal_uInt16(0xFFFF)) {
- return MISSING_GLYPH_INDEX;
- }
- startCode = endCode + segCount + 1;
-
- if((reinterpret_cast<sal_uInt8 const *>(&startCode[i]) - cmap >= int(nMaxCmapSize - 2)) || Int16FromMOTA(startCode[i]) > c) {
- return MISSING_GLYPH_INDEX;
- }
- idDelta = startCode + segCount;
- idRangeOffset = idDelta + segCount;
- /*glyphIndexArray = idRangeOffset + segCount;*/
-
- if((reinterpret_cast<sal_uInt8 const *>(&idRangeOffset[i]) - cmap < int(nMaxCmapSize - 2)) && Int16FromMOTA(idRangeOffset[i]) != 0) {
- sal_uInt16 const * pGlyphOffset = &(idRangeOffset[i]) + (Int16FromMOTA(idRangeOffset[i])/2 + (c - Int16FromMOTA(startCode[i])));
- if(reinterpret_cast<sal_uInt8 const *>(pGlyphOffset) - cmap >= int(nMaxCmapSize - 2))
- return MISSING_GLYPH_INDEX;
- c = Int16FromMOTA(*pGlyphOffset);
- }
-
- ToReturn = (Int16FromMOTA(idDelta[i]) + c) & 0xFFFF;
- return ToReturn;
-}
-
-static sal_uInt32 getGlyph12(const sal_uInt8 *pCmap, sal_uInt32, sal_uInt32 cChar) {
- const sal_uInt32* pCMAP12 = reinterpret_cast<const sal_uInt32*>(pCmap);
- int nLength = Int32FromMOTA( pCMAP12[1] );
- int nGroups = Int32FromMOTA( pCMAP12[3] );
- int nLower = 0;
- int nUpper = nGroups;
-
- if( nUpper > (nLength-16)/12 )
- nUpper = (nLength-16)/12;
-
- /* binary search in "segmented coverage" subtable */
- while( nLower < nUpper ) {
- int nIndex = (nLower + nUpper) / 2;
- const sal_uInt32* pEntry = &pCMAP12[ 4 + 3*nIndex ];
- sal_uInt32 cStart = Int32FromMOTA( pEntry[0] );
- sal_uInt32 cLast = Int32FromMOTA( pEntry[1] );
- if( cChar < cStart )
- nUpper = nIndex;
- else if( cChar > cLast )
- nLower = nIndex + 1;
- else { /* found matching entry! */
- sal_uInt32 nGlyph = Int32FromMOTA( pEntry[2] );
- nGlyph += cChar - cStart;
- return nGlyph;
- }
- }
-
- return MISSING_GLYPH_INDEX;
-}
-
-static void FindCmap(TrueTypeFont *ttf)
-{
- sal_uInt32 table_size;
- const sal_uInt8* table = ttf->table(O_cmap, table_size);
- if (table_size < 4)
- {
- SAL_WARN("vcl.fonts", "Parsing error in " << OUString::createFromAscii(ttf->fileName()) <<
- "cmap table size too short");
- return;
- }
- sal_uInt16 ncmaps = GetUInt16(table, 2);
- sal_uInt32 AppleUni = 0; // Apple Unicode
- sal_uInt32 ThreeZero = 0; /* MS Symbol */
- sal_uInt32 ThreeOne = 0; /* MS UCS-2 */
- sal_uInt32 ThreeTwo = 0; /* MS ShiftJIS */
- sal_uInt32 ThreeThree = 0; /* MS PRC */
- sal_uInt32 ThreeFour = 0; /* MS Big5 */
- sal_uInt32 ThreeFive = 0; /* MS Wansung */
- sal_uInt32 ThreeSix = 0; /* MS Johab */
-
- const sal_uInt32 remaining_table_size = table_size-4;
- const sal_uInt32 nMinRecordSize = 8;
- const sal_uInt32 nMaxRecords = remaining_table_size / nMinRecordSize;
- if (ncmaps > nMaxRecords)
- {
- SAL_WARN("vcl.fonts", "Parsing error in " << OUString::createFromAscii(ttf->fileName()) <<
- ": " << nMaxRecords << " max possible entries, but " <<
- ncmaps << " claimed, truncating");
- ncmaps = nMaxRecords;
- }
-
- for (unsigned int i = 0; i < ncmaps; i++) {
- /* sanity check, cmap entry must lie within table */
- sal_uInt32 nLargestFixedOffsetPos = 8 + i * 8;
- sal_uInt32 nMinSize = nLargestFixedOffsetPos + sizeof(sal_uInt32);
- if (nMinSize > table_size)
- {
- SAL_WARN( "vcl.fonts", "Font " << OUString::createFromAscii(ttf->fileName()) << " claimed to have "
- << ncmaps << " cmaps, but only space for " << i);
- break;
- }
-
- sal_uInt16 pID = GetUInt16(table, 4 + i * 8);
- sal_uInt16 eID = GetUInt16(table, 6 + i * 8);
- sal_uInt32 offset = GetUInt32(table, nLargestFixedOffsetPos);
-
- /* sanity check, cmap must lie within file */
- if( (table - ttf->ptr) + offset > static_cast<sal_uInt32>(ttf->fsize) )
- continue;
-
- /* Unicode tables in Apple fonts */
- if (pID == 0) {
- AppleUni = offset;
- }
-
- if (pID == 3) {
- switch (eID) {
- case 0: ThreeZero = offset; break;
- case 10: // UCS-4
- case 1: ThreeOne = offset; break;
- case 2: ThreeTwo = offset; break;
- case 3: ThreeThree = offset; break;
- case 4: ThreeFour = offset; break;
- case 5: ThreeFive = offset; break;
- case 6: ThreeSix = offset; break;
- }
- }
- }
-
- // fall back to AppleUnicode if there are no ThreeOne/Threezero tables
- if( AppleUni && !ThreeZero && !ThreeOne)
- ThreeOne = AppleUni;
-
- if (ThreeOne) {
- ttf->cmapType = CMAP_MS_Unicode;
- ttf->cmap = table + ThreeOne;
- } else if (ThreeTwo) {
- ttf->cmapType = CMAP_MS_ShiftJIS;
- ttf->cmap = table + ThreeTwo;
- } else if (ThreeThree) {
- ttf->cmapType = CMAP_MS_PRC;
- ttf->cmap = table + ThreeThree;
- } else if (ThreeFour) {
- ttf->cmapType = CMAP_MS_Big5;
- ttf->cmap = table + ThreeFour;
- } else if (ThreeFive) {
- ttf->cmapType = CMAP_MS_Wansung;
- ttf->cmap = table + ThreeFive;
- } else if (ThreeSix) {
- ttf->cmapType = CMAP_MS_Johab;
- ttf->cmap = table + ThreeSix;
- } else if (ThreeZero) {
- ttf->cmapType = CMAP_MS_Symbol;
- ttf->cmap = table + ThreeZero;
- } else {
- ttf->cmapType = CMAP_NOT_USABLE;
- ttf->cmap = nullptr;
- }
-
- if (ttf->cmapType != CMAP_NOT_USABLE) {
- if( (ttf->cmap - ttf->ptr + 2U) > static_cast<sal_uInt32>(ttf->fsize) ) {
- ttf->cmapType = CMAP_NOT_USABLE;
- ttf->cmap = nullptr;
- }
- }
-
- if (ttf->cmapType == CMAP_NOT_USABLE) return;
-
- switch (GetUInt16(ttf->cmap, 0)) {
- case 0: ttf->mapper = getGlyph0; break;
- case 2: ttf->mapper = getGlyph2; break;
- case 4: ttf->mapper = getGlyph4; break;
- case 6: ttf->mapper = getGlyph6; break;
- case 12: ttf->mapper= getGlyph12; break;
- default:
-#if OSL_DEBUG_LEVEL > 1
- /*- if the cmap table is really broken */
- SAL_WARN("vcl.fonts", ttf->fileName() << ": "
- << GetUInt16(ttf->cmap, 0)
- << " is not a recognized cmap format..");
-#endif
- ttf->cmapType = CMAP_NOT_USABLE;
- ttf->cmap = nullptr;
- ttf->mapper = nullptr;
- }
-}
-
/*- Public functions */
int CountTTCFonts(const char* fname)
@@ -1476,9 +1139,6 @@ TrueTypeFont::TrueTypeFont(const char* pFileName)
, subfamily(nullptr)
, usubfamily(nullptr)
, ntables(0)
- , cmap(nullptr)
- , cmapType(0)
- , mapper(nullptr)
{
}
@@ -1554,6 +1214,15 @@ SFErrCodes AbstractTrueTypeFont::indexGlyphData()
table = this->table(O_vhea, table_size);
m_nVertMetrics = (table && table_size >= 36) ? GetUInt16(table, 34) : 0;
+ if (!m_xCharMap.is())
+ {
+ CmapResult aCmapResult;
+ table = this->table(O_cmap, table_size);
+ if (!ParseCMAP(table, table_size, aCmapResult))
+ return SFErrCodes::TtFormat;
+ m_xCharMap = new FontCharMap(aCmapResult);
+ }
+
return SFErrCodes::Ok;
}
@@ -1695,16 +1364,11 @@ SFErrCodes TrueTypeFont::open(sal_uInt32 facenum)
/* At this point TrueTypeFont is constructed, now need to verify the font format
and read the basic font properties */
- /* The following tables are absolutely required:
- * maxp, head, name, cmap
- */
-
SFErrCodes ret = indexGlyphData();
if (ret != SFErrCodes::Ok)
return ret;
GetNames(this);
- FindCmap(this);
return SFErrCodes::Ok;
}
@@ -2268,33 +1932,6 @@ SFErrCodes CreateT42FromTTGlyphs(TrueTypeFont *ttf,
return SFErrCodes::Ok;
}
-#if defined(_WIN32) || defined(MACOSX) || defined(IOS)
-sal_uInt16 MapChar(TrueTypeFont const *ttf, sal_uInt16 ch)
-{
- switch (ttf->cmapType) {
- case CMAP_MS_Symbol:
- {
- const sal_uInt32 nMaxCmapSize = ttf->ptr + ttf->fsize - ttf->cmap;
- if( ttf->mapper == getGlyph0 && ( ch & 0xf000 ) == 0xf000 )
- ch &= 0x00ff;
- return static_cast<sal_uInt16>(ttf->mapper(ttf->cmap, nMaxCmapSize, ch ));
- }
-
- case CMAP_MS_Unicode: break;
- case CMAP_MS_ShiftJIS: ch = TranslateChar12(ch); break;
- case CMAP_MS_PRC: ch = TranslateChar13(ch); break;
- case CMAP_MS_Big5: ch = TranslateChar14(ch); break;
- case CMAP_MS_Wansung: ch = TranslateChar15(ch); break;
- case CMAP_MS_Johab: ch = TranslateChar16(ch); break;
- default: return 0;
- }
- const sal_uInt32 nMaxCmapSize = ttf->ptr + ttf->fsize - ttf->cmap;
- ch = static_cast<sal_uInt16>(ttf->mapper(ttf->cmap, nMaxCmapSize, ch));
- return ch;
-}
-#endif
-
-
std::unique_ptr<sal_uInt16[]> GetTTSimpleGlyphMetrics(AbstractTrueTypeFont const *ttf, const sal_uInt16 *glyphArray, int nGlyphs, bool vertical)
{
const sal_uInt8* pTable;
@@ -2391,7 +2028,7 @@ void GetTTGlobalFontInfo(TrueTypeFont *ttf, TTGlobalFontInfo *info)
info->subfamily = ttf->subfamily;
info->usubfamily = ttf->usubfamily;
info->psname = ttf->psname;
- info->symbolEncoded = (ttf->cmapType == CMAP_MS_Symbol);
+ info->symbolEncoded = ttf->GetCharMap()->isSymbolic();
sal_uInt32 table_size;
const sal_uInt8* table = ttf->table(O_OS2, table_size);