compilerplugins/clang/virtualdead.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148

#!/usr/bin/python3

import re
import io

callDict = dict() # callInfo tuple -> callValue
definitionToSourceLocationMap = dict()
paramSet = set() # paraminfo tuple

# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
    return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)

# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("workdir/loplugin.virtualdead.log", "r", encoding="ascii", errors="ignore", buffering=1024*1024) as txt:
    for line in txt:
        try:
            tokens = line.strip().split("\t")
            if tokens[0] == "virtual:":
                nameAndParams = normalizeTypeParams(tokens[1])
                sourceLocation = tokens[2]
                returnValue = tokens[3]
                callInfo = (nameAndParams, sourceLocation)
                if callInfo not in callDict:
                    callDict[callInfo] = set()
                callDict[callInfo].add(returnValue)
                definitionToSourceLocationMap[nameAndParams] = sourceLocation
            elif tokens[0] == "param:":
                name = normalizeTypeParams(tokens[1])
                if len(tokens)>2:
                    bitfield = tokens[2]
                    paramSet.add((name,bitfield))
            else:
                print( "unknown line: " + line)
        except IndexError:
            print("problem with line " + line.strip())
            raise

tmp1list = list()
for callInfo, callValues in iter(callDict.items()):
    nameAndParams = callInfo[1]
    if len(callValues) != 1:
        continue
    callValue = next(iter(callValues))
    if "unknown-stmt" in callValue:
        continue
    if "unknown2" in callValue:
        continue
    if "unknown3" in callValue:
        continue
    if "unknown4" in callValue:
        continue
    if "pure" in callValue:
        continue
    srcloc = callInfo[1]
    if srcloc.startswith("workdir/"):
        continue
    # ignore Qt stuff
    if srcloc.startswith("Gui/"):
        continue
    if srcloc.startswith("Widgets/"):
        continue
    if srcloc.startswith("Core/"):
        continue
    if srcloc.startswith("/Qt"):
        continue
    if srcloc.startswith("Qt"):
        continue
    if srcloc.startswith("64-"):
        continue
    functionSig = callInfo[0]
    tmp1list.append((srcloc, functionSig, callValue))

def merge_bitfield(a, b):
    if len(a) == 0:
        return b
    ret = ""
    for i, c in enumerate(b):
        if c == "1" or a[i] == "1":
            ret += "1"
        else:
            ret += "0"
    return ret
tmp2dict = dict()
tmp2list = list()
for paramInfo in paramSet:
    name = paramInfo[0]
    bitfield = paramInfo[1]
    if re.match( r"\w+ com::", name):
        continue
    if re.match( r"\w+ ooo::vba::", name):
        continue
    if re.match( r"\w+ orcus::", name):
        continue
    if re.match( r"\w+ std::", name):
        continue
    if name not in tmp2dict:
        tmp2dict[name] = bitfield
    else:
        tmp2dict[name] = merge_bitfield(tmp2dict[name], bitfield)
for name, bitfield in iter(tmp2dict.items()):
    srcloc = definitionToSourceLocationMap[name]
    # ignore Qt stuff
    if srcloc.startswith("Gui/"):
        continue
    if srcloc.startswith("Widgets/"):
        continue
    if srcloc.startswith("Core/"):
        continue
    if srcloc.startswith("/Qt"):
        continue
    if srcloc.startswith("Qt"):
        continue
    if srcloc.startswith("64-"):
        continue
    # ignore external stuff
    if srcloc.startswith("workdir/"):
        continue
    # referenced by generated code in workdir/
    if srcloc.startswith("sw/source/writerfilter/ooxml/OOXMLFactory.hxx"):
        continue
    if "0" in bitfield:
        tmp2list.append((srcloc, name, bitfield))

# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, s)]
# sort by both the source-line and the datatype, so the output file ordering is stable
# when we have multiple items on the same source line
def v_sort_key(v):
    return natural_sort_key(v[0]) + [v[1]]
tmp1list.sort(key=lambda v: v_sort_key(v))
tmp2list.sort(key=lambda v: v_sort_key(v))

# print out the results
with open("compilerplugins/clang/virtualdead.results", "wt") as f:
    for v in tmp1list:
        f.write(v[0] + "\n")
        f.write("    " + v[1] + "\n")
        f.write("    " + v[2] + "\n")
with open("compilerplugins/clang/virtualdead.unusedparams.results", "wt") as f:
    for v in tmp2list:
        f.write(v[0] + "\n")
        f.write("    " + v[1] + "\n")
        f.write("    " + v[2] + "\n")