-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Expand file tree
/
Copy pathadd-overlay-annotations.py
More file actions
283 lines (228 loc) · 9.25 KB
/
add-overlay-annotations.py
File metadata and controls
283 lines (228 loc) · 9.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# This script is used to annotate .qll files without any existing overlay annotations
# with overlay[local?] and overlay[caller?] annotations. Maintenance of overlay annotations
# in annotated files will be handled by QL-for-QL queries.
# It will walk the directory tree and annotate most .qll files, skipping only
# some specific cases (e.g., empty files, files that configure dataflow for queries).
# The script takes a list of languages and processes the corresponding directories.
# If the optional --check argument is provided, the script checks for missing annotations,
# but does not modify any files.
# Usage: python3 add-overlay-annotations.py [--check] <language1> <language2> ...
# The script will modify the files in place and print the changes made.
# The script is designed to be run from the root of the repository.
#!/usr/bin/python3
import sys
import os
import re
from difflib import context_diff
OVERLAY_PATTERN = re.compile(r'overlay\[[a-zA-Z?_-]+\]')
def has_overlay_annotations(lines):
'''
Check whether the given lines contain any overlay[...] annotations.
'''
return any(OVERLAY_PATTERN.search(line) for line in lines)
def is_line_comment(line):
return line.startswith("//") or (line.startswith("/*") and line.endswith("*/"))
def find_file_level_module_declaration(lines):
'''
Returns the index of the existing file-level module declaration if one
exists. Returns None otherwise.
'''
comment = False
for i, line in enumerate(lines):
trimmed = line.strip()
if is_line_comment(trimmed):
continue
elif trimmed.startswith("/*"):
comment = True
elif comment and trimmed.endswith("*/"):
comment = False
elif not comment and trimmed.endswith("module;"):
return i
return None
def is_file_module_qldoc(i, lines):
'''
Assuming a qldoc ended on line i, determine if it belongs to the implicit
file-level module. If it is followed by another qldoc or imports, then it
does and if it is followed by any other non-empty, non-comment lines, then
we assume that is a declaration of some kind and the qldoc is attached to
that declaration.
'''
comment = False
for line in lines[i+1:]:
trimmed = line.strip()
if trimmed.startswith("import ") or trimmed.startswith("private import ") or trimmed.startswith("/**"):
return True
elif is_line_comment(trimmed) or not trimmed:
continue
elif trimmed.startswith("/*"):
comment = True
elif comment and trimmed.endswith("*/"):
comment = False
elif not comment and trimmed:
return False
return True
def find_file_module_qldoc_declaration(lines):
'''
Returns the index of last line of the implicit file module qldoc if one
exists. Returns None otherwise.
'''
qldoc = False
comment = False
for i, line in enumerate(lines):
trimmed = line.strip()
if trimmed.startswith("//"):
continue
elif (qldoc or trimmed.startswith("/**")) and trimmed.endswith("*/"):
# a qldoc just ended; determine if it belongs to the implicit file module
if is_file_module_qldoc(i, lines):
return i
else:
return None
elif trimmed.startswith("/**"):
qldoc = True
elif trimmed.startswith("/*"):
comment = True
elif comment and trimmed.endswith("*/"):
comment = False
elif (not qldoc and not comment) and trimmed:
return None
return None
def only_comments(lines):
'''
Returns true if the lines contain only comments and empty lines.
'''
comment = False
for line in lines:
trimmed = line.strip()
if not trimmed or is_line_comment(trimmed):
continue
elif trimmed.startswith("/*"):
comment = True
elif comment and trimmed.endswith("*/"):
comment = False
elif comment:
continue
elif trimmed:
return False
return True
def insert_toplevel_maybe_local_annotation(filename, lines):
'''
Find a suitable place to insert an overlay[local?] annotation at the top of the file.
Returns a pair consisting of description and the modified lines or None if no overlay
annotation is necessary (e.g., for files that only contain comments).
'''
if only_comments(lines):
return None
i = find_file_level_module_declaration(lines)
if not i == None:
out_lines = lines[:i]
out_lines.append("overlay[local?]\n")
out_lines.extend(lines[i:])
return (f"Annotating \"{filename}\" via existing file-level module statement", out_lines)
i = find_file_module_qldoc_declaration(lines)
if not i == None:
out_lines = lines[:i+1]
out_lines.append("overlay[local?]\n")
out_lines.append("module;\n")
out_lines.extend(lines[i+1:])
return (f"Annotating \"{filename}\" which has a file-level module qldoc", out_lines)
out_lines = ["overlay[local?]\n", "module;\n", "\n"] + lines
return (f"Annotating \"{filename}\" without file-level module qldoc", out_lines)
def insert_overlay_caller_annotations(lines):
'''
Mark pragma[inline] predicates as overlay[caller?] if they are not declared private.
'''
out_lines = []
for i, line in enumerate(lines):
trimmed = line.strip()
if trimmed == "pragma[inline]":
if i + 1 < len(lines) and not "private" in lines[i+1]:
whitespace = line[0: line.find(trimmed)]
out_lines.append(f"{whitespace}overlay[caller?]\n")
out_lines.append(line)
return out_lines
explicitly_global = set([
"java/ql/lib/semmle/code/java/dispatch/VirtualDispatch.qll",
"java/ql/lib/semmle/code/java/dispatch/DispatchFlow.qll",
"java/ql/lib/semmle/code/java/dispatch/ObjFlow.qll",
"java/ql/lib/semmle/code/java/dispatch/internal/Unification.qll",
])
def annotate_as_appropriate(filename, lines):
'''
Insert new overlay[...] annotations according to heuristics in files without existing
overlay annotations.
Returns None if no annotations are needed. Otherwise, returns a pair consisting of a
string describing the action taken and the modified content as a list of lines.
'''
if has_overlay_annotations(lines):
return None
# These simple heuristics filter out those .qll files that we no _not_ want to annotate
# as overlay[local?]. It is not clear that these heuristics are exactly what we want,
# but they seem to work well enough for now (as determined by speed and accuracy numbers).
if (filename.endswith("Test.qll") or
((filename.endswith("Query.qll") or filename.endswith("Config.qll")) and
any("implements DataFlow::ConfigSig" in line for line in lines))):
return None
elif filename in explicitly_global:
# These files are explicitly global and should not be annotated.
return None
elif not any(line for line in lines if line.strip()):
return None
lines = insert_overlay_caller_annotations(lines)
return insert_toplevel_maybe_local_annotation(filename, lines)
def process_single_file(write, filename):
'''
Process a single file, annotating it as appropriate.
If write is set, the changes are written back to the file.
Returns True if the file requires changes.
'''
with open(filename) as f:
old = [line for line in f]
annotate_result = annotate_as_appropriate(filename, old)
if annotate_result is None:
return False
if not write:
return True
new = annotate_result[1]
diff = context_diff(old, new, fromfile=filename, tofile=filename)
diff = [line for line in diff]
if diff:
print(annotate_result[0])
for line in diff:
print(line.rstrip())
with open(filename, "w") as out_file:
for line in new:
out_file.write(line)
return True
if len(sys.argv) > 1 and sys.argv[1] == "--check":
check = True
langs = sys.argv[2:]
else:
check = False
langs = sys.argv[1:]
dirs = []
for lang in langs:
if lang in ["cpp", "go", "csharp", "java", "javascript", "python", "ruby", "rust", "swift"]:
dirs.append(f"{lang}/ql/lib")
else:
raise Exception(f"Unknown language \"{lang}\".")
if dirs:
dirs.append("shared")
missingAnnotations = []
for roots in dirs:
for dirpath, dirnames, filenames in os.walk(roots):
for filename in filenames:
if filename.endswith(".qll") and not dirpath.endswith("tutorial"):
path = os.path.join(dirpath, filename)
res = process_single_file(not check, path)
if check and res:
missingAnnotations.append(path)
if len(missingAnnotations) > 0:
print("The following files have no overlay annotations:")
for path in missingAnnotations[:10]:
print("- " + path)
if len(missingAnnotations) > 10:
print("and " + str(len(missingAnnotations) - 10) + " additional files.")
print()
print("Please manually add overlay annotations or use the config/add-overlay-annotations.py script to automatically add sensible default overlay annotations.")
exit(1)