summaryrefslogtreecommitdiff
path: root/svl/source/misc/sharedstringpool.cxx
blob: d2d890004fbd1c03d2bb3c86ba8a8940ad444e8c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#include <svl/sharedstringpool.hxx>
#include <svl/sharedstring.hxx>
#include <unotools/charclass.hxx>
#include <osl/mutex.hxx>

#include <unordered_map>
#include <unordered_set>

namespace svl {

namespace {

sal_Int32 getRefCount( const rtl_uString* p )
{
    return (p->refCount & 0x3FFFFFFF);
}

}

struct SharedStringPool::Impl
{
    mutable osl::Mutex maMutex;
    // set of upper-case, so we can share these as the value in the maStrMap
    std::unordered_set<OUString> maStrPoolUpper;
    // map with rtl_uString* as value so we can avoid some ref-counting
    std::unordered_map<OUString,rtl_uString*> maStrMap;
    const CharClass& mrCharClass;

    explicit Impl( const CharClass& rCharClass ) : mrCharClass(rCharClass) {}
};

SharedStringPool::SharedStringPool( const CharClass& rCharClass ) :
    mpImpl(new Impl(rCharClass)) {}

SharedStringPool::~SharedStringPool()
{
}

SharedString SharedStringPool::intern( const OUString& rStr )
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);

    auto [mapIt,bInserted] = mpImpl->maStrMap.emplace(rStr, rStr.pData);
    if (bInserted)
    {
        // This is a new string insertion. Establish mapping to upper-case variant.
        OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
        if (aUpper == rStr)
        {
            auto insertResult = mpImpl->maStrPoolUpper.insert(rStr);
            // need to use the same underlying rtl_uString object so the
            // upper->upper detection in purge() works
            auto pData = insertResult.first->pData;
            // This is dodgy, but necessary. I don't want to do a delete/insert because
            // this class is very performance sensitive. This does not violate the internals
            // the map because the new key points to something with the same hash and equality
            // as the old key.
            const_cast<OUString&>(mapIt->first) = *insertResult.first;
            mapIt->second = pData;
        }
        else
        {
            auto insertResult = mpImpl->maStrPoolUpper.insert(aUpper);
            mapIt->second = insertResult.first->pData;
        }
    }
    return SharedString(mapIt->first.pData, mapIt->second);
}

void SharedStringPool::purge()
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);

    // Because we can have an uppercase entry mapped to itself,
    // and then a bunch of lowercase entries mapped to that same
    // upper-case entry, we need to scan the map twice - the first
    // time to remove lowercase entries, and then only can we
    // check for unused uppercase entries.

    auto it = mpImpl->maStrMap.begin();
    auto itEnd = mpImpl->maStrMap.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.pData;
        rtl_uString* p2 = it->second;
        if (p1 != p2)
        {
            // normal case - lowercase mapped to uppercase, which
            // means that the lowercase entry has one ref-counted
            // entry as the key in the map
            if (getRefCount(p1) == 1)
            {
                it = mpImpl->maStrMap.erase(it);
                // except that the uppercase entry may be mapped to
                // by other lower-case entries
                if (getRefCount(p2) == 1)
                    mpImpl->maStrPoolUpper.erase(OUString::unacquired(&p2));
                continue;
            }
        }
        ++it;
    }

    it = mpImpl->maStrMap.begin();
    itEnd = mpImpl->maStrMap.end();
    while (it != itEnd)
    {
        rtl_uString* p1 = it->first.pData;
        rtl_uString* p2 = it->second;
        if (p1 == p2)
        {
            // uppercase which is mapped to itself, which means
            // one ref-counted entry as the key in the map, and
            // one ref-counted entry in the set
            if (getRefCount(p1) == 2)
            {
                it = mpImpl->maStrMap.erase(it);
                mpImpl->maStrPoolUpper.erase(OUString::unacquired(&p1));
                continue;
            }
        }
        ++it;
    }
}

size_t SharedStringPool::getCount() const
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);
    return mpImpl->maStrMap.size();
}

size_t SharedStringPool::getCountIgnoreCase() const
{
    osl::MutexGuard aGuard(&mpImpl->maMutex);
    return mpImpl->maStrPoolUpper.size();
}

}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */