summaryrefslogtreecommitdiff
path: root/compilerplugins/clang/singlevalfields.py
blob: f4eb3993030236a4cc3f7beeec09838b1020f260 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/python

import sys
import re
import io

definitionToSourceLocationMap = dict() # dict of tuple(parentClass, fieldName) to sourceLocation
fieldAssignDict = dict() # dict of tuple(parentClass, fieldName) to (set of values)

# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
    return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)

# reading as binary (since we known it is pure ascii) is much faster than reading as unicode
with io.open("singlevalfields.log", "rb", buffering=1024*1024) as txt:
    for line in txt:
        if line.startswith("defn:\t"):
            idx1 = line.find("\t")
            idx2 = line.find("\t",idx1+1)
            idx3 = line.find("\t",idx2+1)
            parentClass = normalizeTypeParams(line[idx1+1:idx2])
            fieldName = normalizeTypeParams(line[idx2+1:idx3])
            sourceLocation = line[idx3+1:].strip()
            fieldInfo = (parentClass, fieldName)
            definitionToSourceLocationMap[fieldInfo] = sourceLocation
        elif line.startswith("asgn:\t"):
            idx1 = line.find("\t")
            idx2 = line.find("\t",idx1+1)
            idx3 = line.find("\t",idx2+1)
            parentClass = normalizeTypeParams(line[idx1+1:idx2])
            fieldName = normalizeTypeParams(line[idx2+1:idx3])
            assignValue = line[idx3+1:].strip()
            fieldInfo = (parentClass, fieldName)
            if not fieldInfo in fieldAssignDict:
                fieldAssignDict[fieldInfo] = set()
            fieldAssignDict[fieldInfo].add(assignValue)

tmp1list = list()
for fieldInfo, assignValues in fieldAssignDict.iteritems():
    if len(assignValues) != 1:
        continue
    if "?" in assignValues:
        continue
    # if it contains anything other than this set, ignore it
    if len(assignValues - set(["0", "1", "-1", "nullptr"])) > 0:
        continue
    # ignore things which are locally declared but are actually redeclarations of things from 3rd party code
    parentClass = fieldInfo[0]
    if parentClass == "_mwmhints":
        continue
    # ignore things which are representations of on-disk structures
    if parentClass in ["SEPr", "WW8Dop", ]:
        continue
    v0 = fieldInfo[0] + " " + fieldInfo[1]
    v1 = (",".join(assignValues))
    v2 = ""
    if fieldInfo in definitionToSourceLocationMap:
        v2 = definitionToSourceLocationMap[fieldInfo]
    tmp1list.append((v0,v1,v2))

# sort results by filename:lineno
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
    return [int(text) if text.isdigit() else text.lower()
            for text in re.split(_nsre, s)]
tmp1list.sort(key=lambda v: natural_sort_key(v[2]))

# print out the results
with open("loplugin.singlevalfields", "wt") as f:
    for v in tmp1list:
        f.write(v[2] + "\n")
        f.write("    " + v[0] + "\n")
        f.write("    " + v[1] + "\n")