-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy path_parse_Python.py
More file actions
479 lines (420 loc) · 14.6 KB
/
_parse_Python.py
File metadata and controls
479 lines (420 loc) · 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
import ast
import os
import sys
import re
from typing import List, Dict, Set, Optional, Tuple, Any
from collections import defaultdict
class CompleteStructureCommenter:# beginclass
"""A more robust Python structure commenter that handles multi-block endings."""
def __init__(self):# beginmethod
self.source_lines = []
self.result_lines = []
self.begin_comments = {}# Line -> [comments]
self.end_comments = defaultdict(list)# Line -> [(comment, indentation, start_line)]
# endmethod
def add_comments(self, filename: str, output_filename: Optional[str] = None) -> str:# beginmethod
"""Add structural comments to a Python file."""
# Read the file
with open(filename, 'r', encoding='utf-8') as f:# beginwith
content = f.read()
# endwith
return self.add_comments_to_string(content, output_filename)
# endmethod
def add_comments_to_string(self, content: str, output_filename: Optional[str] = None) -> str:# beginmethod
"""Add structural comments to a Python string."""
# Parse the content
self.source_lines = content.splitlines()
# Parse the AST, first clean up content with special characters
try:# begintry
# Replace asterisks in identifiers for AST parsing only
clean_content = re.sub(r'\*([a-zA-Z0-9_]+)\*', r'\1', content)
tree = ast.parse(clean_content)
except SyntaxError as e:
print(f"Syntax error in input file: {e}")#// //
input("enter to continue") ;
return content
# endtry
# First pass: collect all the begin/end comments
self._collect_comments(tree)
# Second pass: apply the comments to the source lines
self._apply_comments()
# Create the modified content
modified_content = '\n'.join(self.result_lines)
# Write to output file if provided
if output_filename:# beginif
with open(output_filename, 'w', encoding='utf-8') as f:# beginwith
f.write(modified_content)
# endwith
# endif
return modified_content
# endmethod
def _get_indent(self, line_idx: int) -> str:# beginmethod
"""Get the indentation of a line."""
if line_idx < 0 or line_idx >= len(self.source_lines):# beginif
return ""
# endif
line = self.source_lines[line_idx]
return line[:len(line) - len(line.lstrip())]
# endmethod
def _collect_comments_for_node(self, node, node_type, begin_comment, end_comment):# beginmethod
"""Collect begin and end comments for a specific node."""
if not hasattr(node, 'lineno') or not hasattr(node, 'end_lineno'):# beginif
return
# endif
start_line = node.lineno - 1
end_line = node.end_lineno - 1
indent = self._get_indent(start_line)
# Add begin comment
if start_line not in self.begin_comments:# beginif
self.begin_comments[start_line] = []
# endif
self.begin_comments[start_line].append(begin_comment)
# Add end comment - store as (comment, indentation, start_line)
# The start_line is used for sorting end comments when multiple blocks end on the same line
self.end_comments[end_line].append((end_comment, indent, start_line))
# endmethod
def _collect_comments(self, tree):# beginmethod
"""First pass: collect all the begin/end comments."""
# Clear previous comments
self.begin_comments = {}
self.end_comments = defaultdict(list)
# Build a mapping of nodes to their parent
parent_map = {}
for parent in ast.walk(tree):# beginfor
for child in ast.iter_child_nodes(parent):# beginfor
parent_map[child] = parent
# endfor
# endfor
# Collect comments from all nodes in AST order
for node in ast.walk(tree):# beginfor
# Function definitions
if isinstance(node, ast.FunctionDef):# beginif
# Check if this is a method inside a class
parent = parent_map.get(node)
if parent and isinstance(parent, ast.ClassDef):# beginif
self._collect_comments_for_node(node, "method", "#beginmethod", "#endmethod")
else:
self._collect_comments_for_node(node, "function", "#beginfunc", "#endfunc")
# endif
# Class definitions
elif isinstance(node, ast.ClassDef):# beginelif
self._collect_comments_for_node(node, "class", "#beginclass", "#endclass")
# If statements
elif isinstance(node, ast.If):# beginelif
# Check if this is an elif by examining the source line
start_line = node.lineno - 1
if start_line < len(self.source_lines):# beginif
line = self.source_lines[start_line].strip()
if line.startswith("elif "):# beginif
self._collect_comments_for_node(node, "elif", "#beginelif", "#endlif")
else:
self._collect_comments_for_node(node, "if", "#beginif", "#endif")
# endif
else:
# Fallback if we can't determine
self._collect_comments_for_node(node, "if", "#beginif", "#endif")
# endif
# For loops
elif isinstance(node, ast.For):# beginelif
self._collect_comments_for_node(node, "for", "#beginfor", "#endfor")
# While loops
elif isinstance(node, ast.While):# beginelif
self._collect_comments_for_node(node, "while", "#beginwhile", "#endwhile")
# With statements
elif isinstance(node, ast.With):# beginelif
self._collect_comments_for_node(node, "with", "#beginwith", "#endwith")
# Try-except blocks
elif isinstance(node, ast.Try):# beginelif
self._collect_comments_for_node(node, "try", "#begintry", "#endtry")
# endlif
# endlif
# endlif
# endlif
# endlif
# endlif
# endif
# endfor
# endmethod
def _should_skip_comment(self, line, comment_tag):# beginmethod
"""
Check if we should skip adding a comment because it's already in the line,
but make sure we don't skip if it's only inside a string literal.
"""
if comment_tag not in line:# beginif
return False# Not in line at all, don't skip
# endif
# Identify positions of string literals in the line
str_positions = []
# Find double-quoted strings
for match in re.finditer(r'"[^"\\]*(?:\\.[^"\\]*)*"', line):# beginfor
str_positions.append((match.start(), match.end()))
# endfor
# Find single-quoted strings
for match in re.finditer(r"'[^'\\]*(?:\\.[^'\\]*)*'", line):# beginfor
str_positions.append((match.start(), match.end()))
# endfor
# Find all occurrences of the comment tag
for match in re.finditer(re.escape(comment_tag), line):# beginfor
tag_start = match.start()
tag_end = match.end()
# Check if this occurrence is inside any string literal
inside_string = False
for str_start, str_end in str_positions:# beginfor
if str_start <= tag_start and tag_end <= str_end:# beginif
inside_string = True
break
# endif
# endfor
if not inside_string:# beginif
return True# Found a real comment outside string literals, skip adding
# endif
# endfor
return False# All instances were inside string literals, don't skip
# endmethod
def _apply_comments(self):# beginmethod
"""Second pass: apply the comments to the source lines."""
self.result_lines = []
# Process each line
for i, line in enumerate(self.source_lines):# beginfor
# First add the original line (possibly with begin comments)
if i in self.begin_comments:# beginif
# Need to add begin comments
begin_comments = self.begin_comments[i]
begin_comment_str = " ".join(begin_comments)
# Check if line already has a comment
if '#' in line and not line.strip().startswith('#'):# beginif
# Check if any of our begin comments already exist in the line outside string literals
should_skip = any(self._should_skip_comment(line, comment) for comment in begin_comments)
if should_skip:# beginif
# If real comments exist, parse out existing comment
comment_pos = line.find('#')
code_part = line[:comment_pos].rstrip()
existing_comment = line[comment_pos:]
# Add our begin comments before the existing comment
#begin_comment_str = " ".join(begin_comments)
modified = f"{code_part} {begin_comment_str} {existing_comment}"
self.result_lines.append(modified)
else:
# No existing structure comments (or only in string literals)
self.result_lines.append(f"{line} {begin_comment_str}")
# endif
else:
# No existing comment, add to end
#begin_comment_str = " ".join(begin_comments)
self.result_lines.append(f"{line} {begin_comment_str}")
# endif
else:
# No begin comment to add
self.result_lines.append(line)
# endif
# Then check if we need to add end comments after this line
if i in self.end_comments:# beginif
# Sort end comments by their start line - this ensures proper nesting
# Blocks that started later (higher start_line) should be closed first
sorted_end_comments = sorted(#// //
self.end_comments[i],
key=lambda x: x[2],# Sort by start_line
reverse=True# Later blocks should be closed first
)#// //
# Add each end comment on its own line
for end_comment, indent, _ in sorted_end_comments:# beginfor
self.result_lines.append(f"{indent}{end_comment}")
# endfor
# endif
# endfor
# endmethod
# endclass
# Extended lists to include the new endif/endlif distinction
Ends = [
"endfunc",
"endmethod",
"endclass",
"endif",
#"endlif", # Added for elif statements# 0, # Added for elif statements
"endwith",
"endtry",
"endfor",
"endwhile",
]#// //
Begins = [#// //
"beginfunc",
"beginmethod",
"beginclass",
"beginif",
"beginelif",# Added for elif statements
"begintry",
"beginwith",
"beginwhile",
"beginfor",
]#// //
# Update the begin_type mapping to include beginelif
begin_type = {#//// ////
"beginfunc": "input",
"beginmethod": "input",
"beginclass": "event",
"beginif": "branch",
"beginelif": "branch",# Same type as beginif
"begintry": "branch",
"beginwith": "branch",
"beginwhile": "loop",
"beginfor": "loop",
}#// //
# Update the end_type mapping to include endlif
end_type = {#// //
"endfunc": "end",
"endmethod": "end",
"endclass": "end",
"endif": "bend",
#"endlif": "bend", # Same type as endif# 0: 0, # Same type as endif
"endwith": "bend",
"endtry": "bend",
"endfor": "lend",
"endwhile": "lend",
}#// //
path_type = [#// //
"elif",# for if
"else",# for if, try, loops
"except",# for try
"finally",# for try
]#// //
event_type = [#// //
"import",
"from",
]#// //
output_type = [#// //
"print",
".write",
]#// //
VFCSEPERATOR = ';//'
def is_path(line: str) -> bool:# beginfunc
"""
Return True if the first word of the given line is one of the path type.
"""
parts = line.strip().split(None, 1)
if not parts:# beginif
return False
# endif
if parts[0].lstrip().startswith(tuple(path_type)) :#if parts[0].strip(" :") in path_type:
return True
# endif
# endfunc
def replace_string_literals(input_string):# beginfunc
# Regular expression to match both single and double-quoted string literals
result = re.sub(r'(["\'])(.*?)(\1)', '0', input_string)
return result
# endfunc
def split_on_comment(input_string):# beginfunc
# Regular expression to match the comment outside of quotes
match = re.search(r'(?<!")#.*$', temp_str)
if match:# beginif
s1 = input_string.strip()# Everything before the comment
s2 = match.strip()# The comment itself
else:
s1, s2 = input_string.strip(), ""
# endif
return (s1, s2)
# endfunc
def split_string(input_string):# beginfunc
temp_str = replace_string_literals(input_string)
parts = temp_str.split("#", 1) # Split at the first occurrence of '#'# Split at the first occurrence of 0
# parts = split_on_comment(input_string)
s1 = input_string.strip()
if len(parts) > 1 :# beginif
s2 = parts[1]
s1 = s1.replace('#'+s2, "")
else:
s2 = ""# s2 holds the second part, or remains empty
# endif
return (s1, s2)
# endfunc
def get_marker( comment ):# beginfunc
parts = comment.strip().split(None, 1)
if not parts:# beginif
return "none"
# endif
marker = parts[0]
return marker
# endfunc
def get_VFC_type(code : str, line: str) -> Optional[str]:# beginfunc
"""
If the first word of `line` (without any leading '#') is in Begins or Ends,
returns its mapped type; otherwise returns None.
"""
token = code.strip().split(None, 1)[0] if len(code) > 1 else "none"
if token in event_type:# beginif
return "event"
# endif
if is_path(code):# beginif
return 'path'
# endif
parts = line.strip().split(None, 1)
if not parts:# beginif
return "set"
# endif
marker = parts[0]
if marker in Begins:# beginif
return begin_type[marker]
# endif
if marker in Ends:# beginif
return end_type[marker]
# endif
return "set"
# endfunc
def generate_VFC(input_string):# beginfunc
strings = input_string.split("\n")# Splits the input at each newline
VFC = ''
for string in strings:# beginfor
# Skip empty lines
if not string.strip():# beginif
continue
else:
string = ''.join(c if c.isascii() else ' ' for c in string)
# endif
code, comment = split_string(string)
code = code.strip()
type = get_VFC_type(code, comment)
marker = get_marker( comment )
## PRE FIX TOKENS# # PRE FIX TOKENS
if marker == "endclass" :# beginif
VFC += f"bend(){VFCSEPERATOR}\n"
# endif
VFC += f'{type}({code}){VFCSEPERATOR} { comment.replace( marker, "" , 1 ) }\n'#// //
## POST FIX TOKENS# # POST FIX TOKENS
if type == "branch":# beginif
VFC += f"path(){VFCSEPERATOR}\n"
# endif
if marker == "beginclass" :# beginif
VFC += f"branch(){VFCSEPERATOR}\n"
VFC += f"path(){VFCSEPERATOR}\n"
VFC += f"path(){VFCSEPERATOR}\n"
# endif
# endfor
return VFC
# endfunc
def main():# beginfunc
import argparse
parser = argparse.ArgumentParser(description='Add structure comments to Python code')
parser.add_argument('input_file', help='Input Python file')
parser.add_argument('-o', '--output', help='Output file (default: stdout)')
args = parser.parse_args()
commenter = CompleteStructureCommenter()
modified_code = commenter.add_comments(args.input_file, args.output)
VFC = generate_VFC(modified_code)
root_filename = os.path.splitext(os.path.basename( args.input_file ))[0] + '.py'# ////
with open(args.input_file+'.vfc', 'w') as VFC_output:# beginwith
VFC_output.write(VFC)
VFC_output.write(
";INSECTA EMBEDDED SESSION INFORMATION\n"+
"; 255 16777215 65280 16777088 16711680 13158600 16777088 0 255 255 65535 6946660 986895\n"+
f"; { root_filename } # '\n"+
"; notepad.exe\n"+
";INSECTA EMBEDDED ALTSESSION INFORMATION\n"+
"; 260 260 1130 1751 0 130 137 4294966452 python.key 0"
)
# endwith
return modified_code
# endfunc
if __name__ == '__main__':# beginif
t = main()
# endif
# Export Date: 08:13:13 PM - 30:Nov:2025.