compilerplugins/clang/virtualdead.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

#!/usr/bin/python3

import sys
import re
import io

callDict = dict() # callInfo tuple -> callValue
definitionToSourceLocationMap = dict()
paramSet = set() # paraminfo tuple

# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
    return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)

# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("workdir/loplugin.virtualdead.log", "r", encoding="ascii", errors="ignore", buffering=1024*1024) as txt:
    for line in txt:
        try:
            tokens = line.strip().split("\t")
            if tokens[0] == "virtual:":
                nameAndParams = normalizeTypeParams(tokens[1])
                sourceLocation = tokens[2]
                returnValue = tokens[3]
                callInfo = (nameAndParams, sourceLocation)
                if not callInfo in callDict:
                    callDict[callInfo] = set()
                callDict[callInfo].add(returnValue)
                definitionToSourceLocationMap[nameAndParams] = sourceLocation
            elif tokens[0] == "param:":
                name = normalizeTypeParams(tokens[1])
                if len(tokens)>2:
                    bitfield = tokens[2]
                    paramSet.add((name,bitfield))
            else:
                print( "unknown line: " + line)
        except IndexError:
            print("problem with line " + line.strip())
            raise

tmp1list = list()
for callInfo, callValues in iter(callDict.items()):
    nameAndParams = callInfo[1]
    if len(callValues) != 1:
        continue
    callValue = next(iter(callValues))
    if "unknown-stmt" in callValue:
        continue
    if "unknown2" in callValue:
        continue
    if "unknown3" in callValue:
        continue
    if "unknown4" in callValue:
        continue
    if "pure" in callValue:
        continue
    srcloc = callInfo[1]
    if srcloc.startswith("workdir/"): continue
    # ignore Qt stuff
    if srcloc.startswith("Gui/"): continue
    if srcloc.startswith("Widgets/"): continue
    if srcloc.startswith("Core/"): continue
    if srcloc.startswith("/Qt"): continue
    if srcloc.startswith("Qt"): continue
    if srcloc.startswith("64-"): continue
    functionSig = callInfo[0]
    tmp1list.append((srcloc, functionSig, callValue))

def merge_bitfield(a, b):
    if len(a) == 0: return b
    ret = ""
    for i, c in enumerate(b):
        if c == "1" or a[i] == "1":
            ret += "1"
        else:
            ret += "0"
    return ret;
tmp2dict = dict()
tmp2list = list()
for paramInfo in paramSet:
    name = paramInfo[0]
    bitfield = paramInfo[1]
    if re.match( r"\w+ com::", name): continue
    if re.match( r"\w+ ooo::vba::", name): continue
    if re.match( r"\w+ orcus::", name): continue
    if re.match( r"\w+ std::", name): continue
    if not name in tmp2dict:
        tmp2dict[name] = bitfield
    else:
        tmp2dict[name] = merge_bitfield(tmp2dict[name], bitfield)
for name, bitfield in iter(tmp2dict.items()):
    srcloc = definitionToSourceLocationMap[name]
    # ignore Qt stuff
    if srcloc.startswith("Gui/"): continue
    if srcloc.startswith("Widgets/"): continue
    if srcloc.startswith("Core/"): continue
    if srcloc.startswith("/Qt"): continue
    if srcloc.startswith("Qt"): continue
    if srcloc.startswith("64-"): continue
    # ignore external stuff
    if srcloc.startswith("workdir/"): continue
    # referenced by generated code in workdir/
    if srcloc.startswith("writerfilter/source/ooxml/OOXMLFactory.hxx"): continue
    if "0" in bitfield:
        tmp2list.append((srcloc, name, bitfield))

# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, s)]
# sort by both the source-line and the datatype, so the output file ordering is stable
# when we have multiple items on the same source line
def v_sort_key(v):
    return natural_sort_key(v[0]) + [v[1]]
tmp1list.sort(key=lambda v: v_sort_key(v))
tmp2list.sort(key=lambda v: v_sort_key(v))

# print out the results
with open("compilerplugins/clang/virtualdead.results", "wt") as f:
    for v in tmp1list:
        f.write(v[0] + "\n")
        f.write("    " + v[1] + "\n")
        f.write("    " + v[2] + "\n")
with open("compilerplugins/clang/virtualdead.unusedparams.results", "wt") as f:
    for v in tmp2list:
        f.write(v[0] + "\n")
        f.write("    " + v[1] + "\n")
        f.write("    " + v[2] + "\n")