-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathimplementation_analyzer.py
More file actions
860 lines (690 loc) · 31.9 KB
/
implementation_analyzer.py
File metadata and controls
860 lines (690 loc) · 31.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
#!/usr/bin/env python3
"""
Implementation Analyzer
This module extracts implementation features from code,
focusing on control flow, data operations, and algorithm patterns.
"""
import re
import logging
from typing import Dict, Any, List
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("implementation_analyzer")
class ImplementationAnalyzer:
"""
Analyzes code implementation features to understand actual behavior
"""
def __init__(self):
"""Initialize the implementation analyzer"""
# Algorithm patterns
self.algorithm_patterns = {
"sorting": [
(r'Arrays\.sort', 'java_builtin_sort'),
(r'Collections\.sort', 'java_builtin_sort'),
(r'for\s*\(.*?;.*?;.*?\)\s*\{[^{}]*for\s*\(.*?;.*?;.*?\)\s*\{[^{}]*(?:if\s*\([^{}]*(?:<|>)[^{}]*\))[^{}]*(?:=|swap)[^{}]*\}', 'nested_loop_comparison_swap')
],
"searching": [
(r'Arrays\.(?:binary)?search', 'java_builtin_search'),
(r'Collections\.(?:binary)?search', 'java_builtin_search'),
(r'while\s*\([^{}]*(?:start|begin|low)[^{}]*(?:<|<=)[^{}]*(?:end|high)[^{}]*\)[^{}]*(?:mid|middle)[^{}]*=', 'binary_search')
],
"data_processing": [
(r'.*?\.stream\(\).*?\.(?:map|filter|reduce)', 'java_stream_processing'),
(r'for\s*\(\s*(?:final\s+)?(?:\w+)(?:<[^>]*>)?\s+\w+\s*:\s*\w+\s*\)', 'for_each_loop')
]
}
# Data operation patterns
self.data_operation_patterns = {
"collection_modification": [
(r'(\w+)\.add\(', 'collection_add'),
(r'(\w+)\.remove\(', 'collection_remove'),
(r'(\w+)\.addAll\(', 'collection_add_all'),
(r'(\w+)\.clear\(', 'collection_clear')
],
"string_manipulation": [
(r'(\w+)\.concat\(', 'string_concat'),
(r'(\w+)\.substring\(', 'string_substring'),
(r'(\w+)\.replace\(', 'string_replace'),
(r'(\w+)\.split\(', 'string_split'),
(r'(\w+)\.toLowerCase\(', 'string_case_conversion'),
(r'(\w+)\.toUpperCase\(', 'string_case_conversion')
],
"object_creation": [
(r'new\s+(\w+)\(', 'object_instantiation'),
(r'(\w+)\.build\(', 'builder_pattern')
]
}
def extract_implementation_features(self, source_code: str, class_name: str, method_name: str) -> Dict[str, Any]:
"""
Extract all implementation features from the code
Parameters:
source_code (str): Source code
class_name (str): Class name
method_name (str): Method name
Returns:
dict: Implementation features
"""
try:
# If method_name matches class_name, analyze the entire class
if method_name == class_name:
logger.info(f"Analyzing entire class: {class_name}")
class_code = self._extract_class_code(source_code, class_name)
if not class_code:
logger.warning(f"Could not extract class code for {class_name}")
return {}
# Analyze all methods in the class
methods = self._extract_all_methods(class_code)
features = {
"class_name": class_name,
"methods": {}
}
for method_name, method_code in methods.items():
method_features = self._analyze_method_implementation(method_code)
features["methods"][method_name] = method_features
return features
# Otherwise, extract the specific method
method_code = self._extract_method_code(source_code, method_name)
if not method_code:
# Try with more flexible method extraction as fallback
method_code = self._extract_method_code_flexible(source_code, method_name)
if not method_code:
logger.warning(f"Could not extract method code for {method_name}")
return {}
return self._analyze_method_implementation(method_code)
except Exception as e:
logger.error(f"Error extracting implementation features: {str(e)}")
return {}
def _analyze_method_implementation(self, method_code: str) -> Dict[str, Any]:
"""
Analyze a method's implementation features
Parameters:
method_code (str): Method code
Returns:
dict: Method implementation features
"""
# Analyze control flow
control_flow = self._analyze_control_flow(method_code)
# Analyze data operations
data_operations = self._analyze_data_operations(method_code)
# Extract boundary conditions
boundary_conditions = self._analyze_boundary_conditions(method_code)
# Analyze error handling
error_handling = self._analyze_error_handling(method_code)
# Identify algorithm patterns
algorithmic_patterns = self._identify_algorithm_patterns(method_code)
return {
"control_flow": control_flow,
"data_operations": data_operations,
"boundary_conditions": boundary_conditions,
"error_handling": error_handling,
"algorithmic_patterns": algorithmic_patterns
}
def _extract_class_code(self, source_code: str, class_name: str) -> str:
"""
Extract entire class code from source
Parameters:
source_code (str): Source code
class_name (str): Class name
Returns:
str: Class code
"""
# Look for the class declaration
class_pattern = r'(?:public|private|protected)?\s+(?:abstract\s+)?(?:static\s+)?(?:final\s+)?class\s+' + \
re.escape(class_name) + \
r'(?:<[^>]+>)?(?:\s+extends\s+\w+(?:<[^>]+>)?)?(?:\s+implements\s+[^{]+)?'
class_match = re.search(class_pattern, source_code, re.DOTALL)
if not class_match:
# Try alternative pattern with just the class name
alt_pattern = r'\bclass\s+' + re.escape(class_name) + r'\b'
class_match = re.search(alt_pattern, source_code, re.DOTALL)
if not class_match:
return ""
# Find the position of the class start
class_start = class_match.start()
# Find the opening brace after the class declaration
opening_brace_pos = source_code.find('{', class_start)
if opening_brace_pos == -1:
return ""
# Count braces to find the matching closing brace
brace_count = 1
pos = opening_brace_pos + 1
while brace_count > 0 and pos < len(source_code):
if source_code[pos] == '{':
brace_count += 1
elif source_code[pos] == '}':
brace_count -= 1
pos += 1
if brace_count != 0:
return "" # Unbalanced braces
# Extract the full class including declaration and body
return source_code[class_start:pos]
def _extract_all_methods(self, class_code: str) -> Dict[str, str]:
"""
Extract all methods from a class
Parameters:
class_code (str): Class code
Returns:
dict: Dictionary mapping method names to their code
"""
methods = {}
# Pattern to match method declarations
method_pattern = r'(?:public|private|protected)?\s+(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:<[^>]+>\s+)?(?:\w+(?:<[^>]+>)?)\s+(\w+)\s*\([^)]*\)(?:\s+throws\s+[^{]+)?\s*\{((?:[^{}]|(?:\{(?:[^{}]|(?:\{[^{}]*\}[^{}]*))*\}))*)\}'
method_matches = re.finditer(method_pattern, class_code, re.DOTALL)
for match in method_matches:
method_name = match.group(1)
method_body = match.group(0) # Full method including signature
methods[method_name] = method_body
return methods
def _extract_method_code(self, source_code: str, method_name: str) -> str:
"""
Extract method code from source
Parameters:
source_code (str): Source code
method_name (str): Method name
Returns:
str: Method code
"""
# Look for the method signature line first
signature_pattern = r'(?:public|private|protected)?\s+(?:static\s+)?(?:final\s+)?(?:synchronized\s+)?(?:<[^>]+>\s+)?(?:\w+(?:<[^>]+>)?)\s+' + \
re.escape(method_name) + \
r'\s*\([^)]*\)(?:\s+throws\s+[^{]+)?'
signature_match = re.search(signature_pattern, source_code, re.DOTALL)
if not signature_match:
return ""
# Find the position of the method start
method_start = signature_match.start()
# Find the opening brace after the method signature
opening_brace_pos = source_code.find('{', method_start)
if opening_brace_pos == -1:
return ""
# Count braces to find the matching closing brace
brace_count = 1
pos = opening_brace_pos + 1
while brace_count > 0 and pos < len(source_code):
if source_code[pos] == '{':
brace_count += 1
elif source_code[pos] == '}':
brace_count -= 1
pos += 1
if brace_count != 0:
return "" # Unbalanced braces
# Extract the full method including signature and body
return source_code[method_start:pos]
def _extract_method_code_flexible(self, source_code: str, method_name: str) -> str:
"""
Extract method code with a more flexible approach
Parameters:
source_code (str): Source code
method_name (str): Method name
Returns:
str: Method code
"""
# Try to find the method name with its parameter list
method_pattern = method_name + r'\s*\([^)]*\)'
method_match = re.search(method_pattern, source_code)
if not method_match:
return ""
# Find the method start by searching backwards
method_start = method_match.start()
# Look back a reasonable amount to find modifiers
potential_start = max(0, method_start - 100)
method_text = source_code[potential_start:method_start]
# Look for method modifiers
modifiers = ["public", "private", "protected", "static", "final", "synchronized", "abstract", "native"]
for modifier in modifiers:
pattern = r'\b' + modifier + r'\b'
mod_match = re.search(pattern, method_text)
if mod_match:
method_start = potential_start + mod_match.start()
break
# Find the opening brace after the method name
opening_brace_pos = source_code.find('{', method_match.end())
if opening_brace_pos == -1:
return ""
# Count braces to find the matching closing brace
brace_count = 1
pos = opening_brace_pos + 1
while brace_count > 0 and pos < len(source_code):
if source_code[pos] == '{':
brace_count += 1
elif source_code[pos] == '}':
brace_count -= 1
pos += 1
if brace_count != 0:
return "" # Unbalanced braces
# Extract the full method
return source_code[method_start:pos]
def _analyze_control_flow(self, method_code: str) -> Dict[str, Any]:
"""
Analyze control flow structures
Parameters:
method_code (str): Method code
Returns:
dict: Control flow analysis
"""
# Count control structures
if_count = len(re.findall(r'\bif\s*\(', method_code))
else_count = len(re.findall(r'\belse\b', method_code))
for_count = len(re.findall(r'\bfor\s*\(', method_code))
while_count = len(re.findall(r'\bwhile\s*\(', method_code))
do_while_count = len(re.findall(r'\bdo\b', method_code))
switch_count = len(re.findall(r'\bswitch\s*\(', method_code))
return_count = len(re.findall(r'\breturn\b', method_code))
# Count nested control structures
nested_if_count = len(re.findall(r'\bif\s*\([^{]*\)\s*\{[^{}]*\bif\s*\(', method_code))
nested_loop_count = len(re.findall(r'\b(?:for|while)\s*\([^{]*\)\s*\{[^{}]*\b(?:for|while)\s*\(', method_code))
# Identify early returns
early_returns = self._has_early_returns(method_code)
# Calculate cyclomatic complexity (simplified)
complexity = 1 + if_count + for_count + while_count + do_while_count + switch_count
# Get conditional expressions
conditions = self._extract_conditions(method_code)
return {
"if_count": if_count,
"else_count": else_count,
"for_count": for_count,
"while_count": while_count,
"do_while_count": do_while_count,
"switch_count": switch_count,
"return_count": return_count,
"nested_if_count": nested_if_count,
"nested_loop_count": nested_loop_count,
"has_early_returns": early_returns,
"cyclomatic_complexity": complexity,
"conditions": conditions
}
def _analyze_data_operations(self, method_code: str) -> Dict[str, Any]:
"""
Analyze data operations
Parameters:
method_code (str): Method code
Returns:
dict: Data operations analysis
"""
# Extract variables and assignments
variables = self._extract_variables(method_code)
assignments = self._extract_assignments(method_code)
# Detect data operation patterns
data_ops = {}
for category, patterns in self.data_operation_patterns.items():
ops = []
for pattern, op_type in patterns:
matches = re.finditer(pattern, method_code)
for match in matches:
try:
ops.append({
"type": op_type,
"variable": match.group(1),
"operation": match.group(0)
})
except IndexError:
# Handle case where the pattern doesn't have a capture group
ops.append({
"type": op_type,
"operation": match.group(0)
})
if ops:
data_ops[category] = ops
# Detect collection operations
collection_ops = self._detect_collection_operations(method_code)
if collection_ops:
data_ops["collection_operations"] = collection_ops
# Detect string operations
string_ops = self._detect_string_operations(method_code)
if string_ops:
data_ops["string_operations"] = string_ops
# Detect math operations
math_ops = self._detect_math_operations(method_code)
if math_ops:
data_ops["math_operations"] = math_ops
return {
"variables": variables,
"assignments": assignments,
"operations": data_ops
}
def _analyze_boundary_conditions(self, method_code: str) -> List[Dict[str, Any]]:
"""
Analyze boundary conditions
Parameters:
method_code (str): Method code
Returns:
list: Boundary conditions
"""
boundary_conditions = []
# Look for boundary checks
if_pattern = r'if\s*\(\s*([^{};()]+?)\s*([<>=!]+)\s*([^{};()]+?)\s*\)'
conditions = re.finditer(if_pattern, method_code)
for match in conditions:
left = match.group(1).strip()
operator = match.group(2)
right = match.group(3).strip()
# Check if this looks like a boundary check
is_boundary = False
# Common boundary values
if (right in ["0", "1", "-1", "null", "true", "false"] or
left in ["0", "1", "-1", "null", "true", "false"] or
".length" in left or ".length" in right or
".size()" in left or ".size()" in right):
is_boundary = True
# Check for boundary-like expressions
if (re.search(r'\.length\s*-\s*1', left) or re.search(r'\.length\s*-\s*1', right) or
re.search(r'\.size\(\)\s*-\s*1', left) or re.search(r'\.size\(\)\s*-\s*1', right)):
is_boundary = True
if is_boundary:
boundary_conditions.append({
"type": "boundary_check",
"left": left,
"operator": operator,
"right": right,
"is_zero_check": right == "0" or left == "0",
"is_null_check": right == "null" or left == "null",
"is_length_check": ".length" in left or ".length" in right or ".size()" in left or ".size()" in right
})
# Look for array access with boundary checks
array_pattern = r'(\w+)\[([^]]+)]'
array_accesses = re.finditer(array_pattern, method_code)
for match in array_accesses:
array_name = match.group(1)
index_expr = match.group(2)
# Check if there's a boundary check for this array access
has_check = False
for condition in boundary_conditions:
if (condition["left"] == index_expr and condition["operator"] in ["<", "<="] and
f"{array_name}.length" in condition["right"]):
has_check = True
break
boundary_conditions.append({
"type": "array_access",
"array": array_name,
"index": index_expr,
"has_boundary_check": has_check
})
return boundary_conditions
def _analyze_error_handling(self, method_code: str) -> List[Dict[str, Any]]:
"""
Analyze error handling
Parameters:
method_code (str): Method code
Returns:
list: Error handling mechanisms
"""
error_handling = []
# Extract method signature to check for throws declarations
method_signature = self._extract_method_signature(method_code)
if method_signature:
throws_types = self._extract_throws_declarations(method_signature)
for exception_type in throws_types:
error_handling.append({
"type": "throws_declaration",
"exception_type": exception_type
})
# Check for try-catch blocks
try_pattern = r'try\s*\{((?:[^{}]|(?:\{[^{}]*\}))*)\}\s*catch\s*\(\s*(\w+(?:\.\w+)*)\s+\w+\s*\)\s*\{((?:[^{}]|(?:\{[^{}]*\}))*)\}'
try_blocks = re.finditer(try_pattern, method_code, re.DOTALL)
for match in try_blocks:
try_block = match.group(1)
exception_type = match.group(2)
catch_block = match.group(3)
error_handling.append({
"type": "try_catch",
"exception_type": exception_type,
"is_empty_catch": not bool(catch_block.strip()),
"handles_exception": "throw" in catch_block or "log" in catch_block or len(catch_block.strip()) > 0
})
# Check for throw statements
throw_pattern = r'throw\s+(?:new\s+)?(\w+(?:\.\w+)*)(?:\([^)]*\))?'
throws = re.finditer(throw_pattern, method_code)
for match in throws:
exception_type = match.group(1)
error_handling.append({
"type": "throw",
"exception_type": exception_type
})
# Check for null checks before operations
null_check_pattern = r'if\s*\(\s*(\w+)\s*(?:==|!=)\s*null\s*\)'
null_checks = re.finditer(null_check_pattern, method_code)
for match in null_checks:
var_name = match.group(1)
error_handling.append({
"type": "null_check",
"variable": var_name
})
# Check for EmptyStackException, RuntimeException, and other specific exceptions
specific_exceptions = [
"EmptyStackException", "RuntimeException", "IllegalArgumentException",
"NullPointerException", "IndexOutOfBoundsException"
]
for exception in specific_exceptions:
if exception in method_code:
error_handling.append({
"type": "specific_exception",
"exception_type": exception
})
return error_handling
def _extract_method_signature(self, method_code: str) -> str:
"""
Extract method signature from method code
Parameters:
method_code (str): Method code
Returns:
str: Method signature
"""
signature_pattern = r'^.*?\)\s*(?:throws\s+[^{]+)?'
signature_match = re.search(signature_pattern, method_code, re.DOTALL)
if signature_match:
return signature_match.group(0)
return ""
def _extract_throws_declarations(self, method_signature: str) -> List[str]:
"""
Extract exception types from throws declarations
Parameters:
method_signature (str): Method signature
Returns:
list: Exception types
"""
throws_pattern = r'throws\s+([\w\s,\.]+)'
throws_match = re.search(throws_pattern, method_signature)
if throws_match:
exceptions_str = throws_match.group(1)
# Split by comma and clean up whitespace
exceptions = [ex.strip() for ex in exceptions_str.split(',')]
return exceptions
return []
def _identify_algorithm_patterns(self, method_code: str) -> Dict[str, Any]:
"""
Identify algorithm patterns
Parameters:
method_code (str): Method code
Returns:
dict: Algorithm patterns
"""
detected_patterns = {}
for category, patterns in self.algorithm_patterns.items():
for pattern, pattern_type in patterns:
if re.search(pattern, method_code, re.DOTALL):
if category not in detected_patterns:
detected_patterns[category] = []
detected_patterns[category].append(pattern_type)
# Special case for sorting directions
if "sorting" in detected_patterns or "Arrays.sort" in method_code or "Collections.sort" in method_code:
sorting_direction = self._detect_sorting_direction(method_code)
if sorting_direction:
detected_patterns["sorting_direction"] = sorting_direction
# Special case for search patterns
if "searching" in detected_patterns:
search_target = self._detect_search_target(method_code)
if search_target:
detected_patterns["search_target"] = search_target
return detected_patterns
def _extract_variables(self, method_code: str) -> List[Dict[str, Any]]:
"""Extract variable declarations"""
variables = []
# Match variable declarations
var_pattern = r'(?:final\s+)?(\w+)(?:<[^>]+>)?\s+(\w+)\s*(?:=\s*([^;]+))?;'
var_matches = re.finditer(var_pattern, method_code)
for match in var_matches:
var_type = match.group(1)
var_name = match.group(2)
initializer = match.group(3) if match.group(3) else None
variables.append({
"name": var_name,
"type": var_type,
"has_initializer": bool(initializer),
"initializer": initializer
})
return variables
def _extract_assignments(self, method_code: str) -> List[Dict[str, Any]]:
"""Extract variable assignments"""
assignments = []
# Match assignments
assign_pattern = r'(\w+)\s*(?:=|\+=|-=|\*=|/=|%=|\^=|&=|\|=|<<=|>>=|>>>=)\s*([^;]+);'
assign_matches = re.finditer(assign_pattern, method_code)
for match in assign_matches:
var_name = match.group(1)
value = match.group(2)
assignments.append({
"variable": var_name,
"value": value
})
return assignments
def _extract_conditions(self, method_code: str) -> List[Dict[str, Any]]:
"""Extract conditional expressions"""
conditions = []
# Match if conditions
if_pattern = r'if\s*\(\s*([^{};()]+?)\s*\)'
if_matches = re.finditer(if_pattern, method_code)
for match in if_matches:
condition = match.group(1).strip()
# Check for comparison operators
has_comparison = any(op in condition for op in ['==', '!=', '<', '>', '<=', '>='])
# Check for logical operators
has_logical_op = '&&' in condition or '||' in condition
# Check for negation
has_negation = condition.startswith('!') or ' !' in condition
conditions.append({
"type": "if_condition",
"condition": condition,
"has_comparison": has_comparison,
"has_logical_op": has_logical_op,
"has_negation": has_negation
})
# Match loop conditions
loop_pattern = r'(?:while|for)\s*\(\s*([^{};()]+?)\s*\)'
loop_matches = re.finditer(loop_pattern, method_code)
for match in loop_matches:
condition = match.group(1).strip()
conditions.append({
"type": "loop_condition",
"condition": condition
})
return conditions
def _has_early_returns(self, method_code: str) -> bool:
"""Check if method has early returns"""
# Convert to lines for easier analysis
lines = method_code.split('\n')
if_return_pattern = r'\s*if\s*\([^{]*\)\s*\{\s*return\b'
for line in lines:
if re.search(if_return_pattern, line):
return True
return False
def _detect_collection_operations(self, method_code: str) -> List[Dict[str, str]]:
"""Detect operations on collections"""
operations = []
# Common collection operations
collection_ops = [
(r'(\w+)\.add\(([^)]+)\)', 'add'),
(r'(\w+)\.remove\(([^)]+)\)', 'remove'),
(r'(\w+)\.get\(([^)]+)\)', 'get'),
(r'(\w+)\.set\(([^)]+),\s*([^)]+)\)', 'set'),
(r'(\w+)\.contains\(([^)]+)\)', 'contains'),
(r'(\w+)\.isEmpty\(\)', 'isEmpty'),
(r'(\w+)\.clear\(\)', 'clear'),
(r'(\w+)\.size\(\)', 'size')
]
for pattern, op_type in collection_ops:
matches = re.finditer(pattern, method_code)
for match in matches:
collection_name = match.group(1)
operations.append({
"type": op_type,
"collection": collection_name
})
return operations
def _detect_string_operations(self, method_code: str) -> List[Dict[str, str]]:
"""Detect operations on strings"""
operations = []
# Common string operations
string_ops = [
(r'(\w+)\.charAt\(([^)]+)\)', 'charAt'),
(r'(\w+)\.substring\(([^)]+)\)', 'substring'),
(r'(\w+)\.length\(\)', 'length'),
(r'(\w+)\.equals\(([^)]+)\)', 'equals'),
(r'(\w+)\.startsWith\(([^)]+)\)', 'startsWith'),
(r'(\w+)\.endsWith\(([^)]+)\)', 'endsWith'),
(r'(\w+)\.replace\(([^)]+),\s*([^)]+)\)', 'replace'),
(r'(\w+)\.split\(([^)]+)\)', 'split')
]
for pattern, op_type in string_ops:
matches = re.finditer(pattern, method_code)
for match in matches:
string_name = match.group(1)
operations.append({
"type": op_type,
"string": string_name
})
return operations
def _detect_math_operations(self, method_code: str) -> List[Dict[str, str]]:
"""Detect math operations"""
operations = []
# Basic math operators
math_op_pattern = r'(\w+)\s*([+\-*/%])\s*(\w+)'
math_matches = re.finditer(math_op_pattern, method_code)
for match in math_matches:
left = match.group(1)
operator = match.group(2)
right = match.group(3)
operations.append({
"type": "basic_math",
"operator": operator,
"left": left,
"right": right
})
# Math library calls
math_lib_pattern = r'Math\.(\w+)\('
math_lib_matches = re.finditer(math_lib_pattern, method_code)
for match in math_lib_matches:
function = match.group(1)
operations.append({
"type": "math_library",
"function": function
})
return operations
def _detect_sorting_direction(self, method_code: str) -> str:
"""Detect sorting direction (ascending/descending)"""
# Check for comparison operators in sorting context
if re.search(r'if\s*\([^<>]*<[^<>]*\)[^{}]*(?:swap|=)', method_code):
return "ascending"
elif re.search(r'if\s*\([^<>]*>[^<>]*\)[^{}]*(?:swap|=)', method_code):
return "descending"
# Check for built-in sort with comparator
if re.search(r'Collections\.sort\(.*new\s+Comparator.*\{\s*(?:@Override)?\s*public\s+int\s+compare\s*\([^)]*\)\s*\{[^}]*<[^}]*\}\s*\}', method_code):
return "ascending"
elif re.search(r'Collections\.sort\(.*new\s+Comparator.*\{\s*(?:@Override)?\s*public\s+int\s+compare\s*\([^)]*\)\s*\{[^}]*>[^}]*\}\s*\}', method_code):
return "descending"
# Check for reverse order
if "Collections.reverseOrder()" in method_code:
return "descending"
# Default to ascending for standard API calls without explicit direction
if "Arrays.sort(" in method_code and "Collections.reverseOrder()" not in method_code:
return "ascending"
elif "Collections.sort(" in method_code and "Collections.reverseOrder()" not in method_code:
return "ascending"
return ""
def _detect_search_target(self, method_code: str) -> str:
"""Try to determine what is being searched for"""
# Check for common search patterns
search_pattern = r'(?:find|search|get)(?:\w+)?\(\s*(\w+)[^)]*\)'
search_match = re.search(search_pattern, method_code)
if search_match:
return search_match.group(1)
return ""