diff --git a/.gitignore b/.gitignore index 64796f3c..faac2c22 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -test/**/*gt/* +test/**/*generated/* **/__pycache__ /Dockerfile* env.bash diff --git a/src/__init__.py b/src/__init__.py index 80e5b721..72b077c8 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -9,6 +9,7 @@ class GameTime(object): """Contains methods and variables that allow a user to import GameTime as a module. """ + @staticmethod def analyze(project_config: ProjectConfiguration) -> Analyzer: """ @@ -25,4 +26,4 @@ def analyze(project_config: ProjectConfiguration) -> Analyzer: return gametime_analyzer except GameTimeError as e: logger.error(str(e)) - raise e \ No newline at end of file + raise e diff --git a/src/analyze_project.py b/src/analyze_project.py index 62350c62..cac9cd18 100644 --- a/src/analyze_project.py +++ b/src/analyze_project.py @@ -1,8 +1,9 @@ #!/usr/bin/env python + def main(): """Main function invoked when this script is run.""" if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/analyzer.py b/src/analyzer.py index f559b5ca..9da5ca53 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -41,6 +41,7 @@ for details on the GameTime license and authors. """ + class Analyzer(object): """ Maintains information about the code being analyzed, such as @@ -105,19 +106,21 @@ def __init__(self, project_config: ProjectConfiguration): self.dag_path: str = "" backend_dict = { - "flexpret": FlexpretBackend, - "x86": X86Backend, + "flexpret": FlexpretBackend, + "x86": X86Backend, "arm": ArmBackend, # Keep legacy capitalized names for backward compatibility - "Flexpret": FlexpretBackend, - "X86": X86Backend, - "ARM": ArmBackend + "Flexpret": FlexpretBackend, + "X86": X86Backend, + "ARM": ArmBackend, } - + if self.project_config.backend not in backend_dict: raise GameTimeError("No valid backend specified") - - self.backend: Backend = backend_dict[self.project_config.backend](self.project_config) + + self.backend: Backend = backend_dict[self.project_config.backend]( + self.project_config + ) # Finally, preprocess the file before analysis. self._preprocess() @@ -138,7 +141,7 @@ def _preprocess(self): shutil.rmtree(project_temp_dir) err_msg = "File to analyze not found: %s" % orig_file raise GameTimeError(err_msg) - + # Check if the additional files to be analyzed exists. additional_files = self.project_config.location_additional_files if additional_files: @@ -164,7 +167,7 @@ def _preprocess(self): else: os.mkdir(project_temp_dir) - os.chmod(project_temp_dir, 0o777) # make dir read and write by everyone + os.chmod(project_temp_dir, 0o777) # make dir read and write by everyone # Make a temporary copy of the original file to preprocess. preprocessed_file = self.project_config.location_temp_file @@ -172,21 +175,41 @@ def _preprocess(self): processing: str = "" - processing = clang_helper.compile_to_llvm_for_analysis(self.project_config.location_orig_file, self.project_config.location_temp_dir, - f"{self.project_config.name_orig_no_extension}gt", self.project_config.included, self.project_config.compile_flags) + processing = clang_helper.compile_to_llvm_for_analysis( + self.project_config.location_orig_file, + self.project_config.location_temp_dir, + f"{self.project_config.name_orig_no_extension}{config.TEMP_SUFFIX}", + self.project_config.included, + self.project_config.compile_flags, + ) + additional_files_processing = [] if additional_files: - additional_files_processing = clang_helper.compile_list_to_llvm_for_analysis(self.project_config.location_additional_files, self.project_config.location_temp_dir, - self.project_config.included, self.project_config.compile_flags) - + additional_files_processing = ( + clang_helper.compile_list_to_llvm_for_analysis( + self.project_config.location_additional_files, + self.project_config.location_temp_dir, + self.project_config.included, + self.project_config.compile_flags, + ) + ) + # Preprocessing pass: inline functions. - if self.project_config.inlined: # Note: This is made into a bool rather than a list - processing = self._run_inliner(input_file=processing, additional_files=additional_files_processing) + if ( + self.project_config.inlined + ): # Note: This is made into a bool rather than a list + processing = self._run_inliner( + input_file=processing, additional_files=additional_files_processing + ) # Preprocessing pass: unroll loops. if self.project_config.UNROLL_LOOPS: processing = self._run_loop_unroller(compiled_file=processing) - self.dag_path: str = clang_helper.generate_dot_file(processing, self.project_config.location_temp_dir) + + # Generate control-flow diagrams (CFGs), which are directed acyclic graphs (DAGs). + self.dag_path: str = clang_helper.generate_dot_file( + processing, self.project_config.location_temp_dir + ) self.preprocessed_path: str = processing # We are done with the preprocessing. logger.info("Preprocessing complete.") @@ -210,8 +233,11 @@ def _run_loop_unroller(self, compiled_file: str) -> str: preprocessed_file: str = self.project_config.location_temp_file # Infer the name of the file that results from the CIL preprocessing. - unrolled_file: str = unroller.unroll(compiled_file, self.project_config.location_temp_dir, - f"{self.project_config.name_orig_no_extension}") + unrolled_file: str = unroller.unroll( + compiled_file, + self.project_config.location_temp_dir, + f"{self.project_config.name_orig_no_extension}", + ) logger.info("Preprocessing the file: unrolling loops in the code...") @@ -220,7 +246,7 @@ def _run_loop_unroller(self, compiled_file: str) -> str: raise GameTimeError(err_msg) else: shutil.copyfile(unrolled_file, preprocessed_file) - + logger.info("") logger.info("Loops in the code have been unrolled.") @@ -246,13 +272,14 @@ def _run_inliner(self, input_file: str, additional_files: str): logger.info("Preprocessing the file: inlining...") - # inlined_file = clang_helper.inline_functions(input_file, self.project_config.location_temp_dir, - # f"{self.project_config.name_orig_no_extension}gt-inlined") - input_files = [input_file] + additional_files - inlined_file = inliner.inline_functions(input_files, self.project_config.location_temp_dir, - f"{self.project_config.name_orig_no_extension}", self.project_config.func) - + inlined_file = inliner.inline_functions( + input_files, + self.project_config.location_temp_dir, + f"{self.project_config.name_orig_no_extension}", + self.project_config.func, + ) + if not inlined_file: err_msg = "Error running the inliner." raise GameTimeError(err_msg) @@ -263,7 +290,7 @@ def _run_inliner(self, input_file: str, additional_files: str): logger.info("Inlining complete.") return inlined_file - + ### GRAPH FUNCTIONS ### def create_dag(self): """ @@ -274,39 +301,44 @@ def create_dag(self): """ logger.info("Generating the DAG and associated information...") - #TODO: add back construction dag from filepath + # TODO: add back construction dag from filepath # if nx_helper.construct_dag(self.dag_path): # err_msg = "Error running the Phoenix program analyzer." # raise GameTimeError(err_msg) - location = os.path.join(self.project_config.location_temp_dir, - f".{self.project_config.func}.dot") + location = os.path.join( + self.project_config.location_temp_dir, f".{self.project_config.func}.dot" + ) self.load_dag_from_dot_file(location) - bitcode = [] for node in self.dag.nodes: bitcode.append(self.dag.get_node_label(self.dag.nodes_indices[node])) find_labels("".join(bitcode), self.project_config.location_temp_dir) logger.info("All possible labels extracted.") - # special case for single node dag if self.dag.num_nodes == 1 and self.dag.num_edges == 0: self.path_dimension = 1 return num_edges_reduced = len(self.dag.edges_reduced) - # Checking num_basis_paths = num_edges - num_nodes + 2 + # Note: The number of feasible basis paths is bounded by (num_edges - num_nodes + 2) + # (Page 8 of "The Internals of GameTime" by Jonathan Kotker). self.path_dimension = self.dag.num_edges - self.dag.num_nodes + 2 + print( + f"num_edges_reduced = {num_edges_reduced}, self.path_dimension = {self.path_dimension}" + ) if num_edges_reduced != self.path_dimension: - err_msg = ("The number of non-special edges is different from the dimension of the path.") + err_msg = "The number of non-special edges is different from the dimension of the path." raise GameTimeError(err_msg) logger.info("DAG generated.") - - logger.info("The control-flow graph has %d nodes and %d edges, with at most %d possible paths." % - (self.dag.num_nodes, self.dag.num_edges, self.dag.num_paths)) + + logger.info( + "The control-flow graph has %d nodes and %d edges, with at most %d possible paths." + % (self.dag.num_nodes, self.dag.num_edges, self.dag.num_paths) + ) logger.info("There are at most %d possible basis paths." % self.path_dimension) logger.info("") @@ -320,17 +352,21 @@ def load_dag_from_dot_file(self, location: str): """ self.dag, modified = nx_helper.construct_dag(location) + print(f"num_edges in load_dag_from_dot_file = {self.dag.num_edges}") if modified: - modified_dag_location = os.path.join(self.project_config.location_temp_dir, - f".{self.project_config.func}_modified.dot") + modified_dag_location = os.path.join( + self.project_config.location_temp_dir, + f".{self.project_config.func}_modified.dot", + ) write_dag_to_dot_file(self.dag, modified_dag_location) - logger.info("New CFG outputed to folder.") + logger.info( + "Cycles detected in the DAG. Removed cycles and output modified CFG outputed to folder." + ) # Reset variables of this "Analyzer" object. self.reset_path_exclusive_constraints() self.reset_path_bundled_constraints() - ### BASIS MATRIX FUNCTIONS ### def _init_basis_matrix(self): """Initializes the basis matrix.""" @@ -342,7 +378,7 @@ def _randomize_basis_matrix(self): """ Randomizes the rows of the basis matrix using a Fisher-Yates shuffle. - + Precondition: The basis matrix has been initialized. """ for i in range(self.path_dimension, 0, -1): @@ -354,7 +390,7 @@ def _swap_basis_matrix_rows(self, i, j): Swaps two rows of the basis matrix. Parameters: - i: + i: Index of one row to swap. j: Index of other row to swap. @@ -370,7 +406,6 @@ def _swap_basis_matrix_rows(self, i, j): row_to_swap_out[k] = row_to_swap_in[k] row_to_swap_in[k] = temp_row_to_swap_out[k] - ### PATH GENERATION FUNCTIONS ### def add_path_exclusive_constraint(self, edges: List[Tuple[str, str]]): """ @@ -392,7 +427,7 @@ def add_path_bundled_constraint(self, edges: List[Tuple[str, str]]): constraints, if not already present. These edges must be taken together if at least one of them is taken along a path through the DAG. - + Parameters: edges: List[Tuple[str, str]] : List of edges to add to the list of path-bundled constraints. @@ -422,8 +457,7 @@ def _compress_path(self, path_edges: List[Tuple[str, str]]) -> List[float]: Returns: 0-1 vector that is 1 if a `non-special' edge is along the path, and 0 otherwise. """ - return [(1.0 if edge in path_edges else 0.0) - for edge in self.dag.edges_reduced] + return [(1.0 if edge in path_edges else 0.0) for edge in self.dag.edges_reduced] ####### Fuctions to FIX def generate_overcomplete_basis(self, k: int): @@ -445,8 +479,9 @@ def generate_overcomplete_basis(self, k: int): logger.info("Find minimal overcomplete basis") pulp_helper.find_minimal_overcomplete_basis(self, feasible, k) - - def iteratively_find_overcomplete_basis(self, initial_paths: List[List[Tuple[str, str]]], k: int): + def iteratively_find_overcomplete_basis( + self, initial_paths: List[List[Tuple[str, str]]], k: int + ): """ Generates overcomplete basis such the lenth of the longest feasible path is at most 'k'. The basis is computed by iteratively @@ -458,7 +493,7 @@ def iteratively_find_overcomplete_basis(self, initial_paths: List[List[Tuple[str Parameters: initial_paths: List[List[Tuple[str, str]]] : A list of initial paths to begin with. - + k: int : Maximum value of L1 norm. @@ -471,15 +506,19 @@ def iteratively_find_overcomplete_basis(self, initial_paths: List[List[Tuple[str start_time = time.perf_counter() while True: before_time = time.perf_counter() - length, path, ilp_problem = \ - pulp_helper.find_worst_expressible_path(self, self.basis_paths, 0) + length, path, ilp_problem = pulp_helper.find_worst_expressible_path( + self, self.basis_paths, 0 + ) after_time = time.perf_counter() - logger.info("Found a candidate path of length %.2f in %d seconds" % - (length, after_time - before_time)) + logger.info( + "Found a candidate path of length %.2f in %d seconds" + % (length, after_time - before_time) + ) optimal_bound = length # if the length of the longest path is within the given bound, stop - if length <= k: break + if length <= k: + break candidate_path_nodes = path candidate_path_edges = Dag.get_edges(candidate_path_nodes) @@ -487,7 +526,7 @@ def iteratively_find_overcomplete_basis(self, initial_paths: List[List[Tuple[str logger.info("Checking if the found path is feasible...") result_path = Path(ilp_problem=ilp_problem, nodes=candidate_path_nodes) value = self.measure_path(result_path) - if value < float('inf'): + if value < float("inf"): logger.info("Path is feasible.") self.basis_paths.append(result_path) edge_node_paths.append(candidate_path_edges) @@ -498,21 +537,22 @@ def iteratively_find_overcomplete_basis(self, initial_paths: List[List[Tuple[str unsat_core = result_path.smtQuery.unsatCore exclude_edges = result_path.get_edges_for_conditions(unsat_core) logger.info("Edges to be excluded found.") - logger.info("Adding a constraint to exclude " - "these edges...") + logger.info("Adding a constraint to exclude " "these edges...") if len(exclude_edges) > 0: self.add_path_exclusive_constraint(exclude_edges) else: self.add_path_exclusive_constraint(candidate_path_edges) logger.info("Constraint added.") - logger.info("Found overcomplete basis of size %d, yielding bound %.2f" % - (len(edge_node_paths), optimal_bound)) + logger.info( + "Found overcomplete basis of size %d, yielding bound %.2f" + % (len(edge_node_paths), optimal_bound) + ) self.basis_paths_nodes = [path.nodes for path in self.basis_paths] return self.basis_paths - def generate_basis_paths(self): + def generate_basis_paths(self): """ Generates a list of "Path" objects, each of which represents a basis path of the code being analyzed. The basis "Path" objects @@ -538,8 +578,9 @@ def generate_basis_paths(self): logger.info("Basis matrix initialized to") logger.info(self.basis_matrix) logger.info("") - logger.info("There are a maximum of %d possible basis paths." % - self.path_dimension) + logger.info( + "There are a maximum of %d possible basis paths." % self.path_dimension + ) logger.info("") def on_exit(start_time, infeasible): @@ -556,15 +597,17 @@ def on_exit(start_time, infeasible): infeasible : Set of infeasible paths. - Returns: + Returns: List of basis paths of the code being analyzed, each represented by an object of the Path class. """ self.basis_paths = basis_paths self.basis_paths_nodes = [path.nodes for path in basis_paths] # self.resetPathExclusiveConstraints() - logger.info("Time taken to generate paths: %.2f seconds." % - (time.perf_counter() - start_time)) + logger.info( + "Time taken to generate paths: %.2f seconds." + % (time.perf_counter() - start_time) + ) logger.info("Basis paths generated.") @@ -574,29 +617,33 @@ def on_exit(start_time, infeasible): logger.info("Iteratively improving the basis") for path in infeasible: self.add_path_exclusive_constraint(path) - edge_paths = \ - [Dag.get_edges(path.nodes) for path in self.basis_paths] + edge_paths = [Dag.get_edges(path.nodes) for path in self.basis_paths] result = self.iteratively_find_overcomplete_basis( - edge_paths, self.project_config.MAXIMUM_ERROR_SCALE_FACTOR) + edge_paths, self.project_config.MAXIMUM_ERROR_SCALE_FACTOR + ) logger.info("Number of paths generated: %d" % len(result)) - logger.info("Time taken to generate paths: %.2f seconds." % - (time.perf_counter() - start_time)) + logger.info( + "Time taken to generate paths: %.2f seconds." + % (time.perf_counter() - start_time) + ) return result else: return self.basis_paths - + if self.path_dimension == 1: - warn_msg = ("Basis matrix has dimensions 1x1. " - "There is only one path through the function " - "under analysis, which is the only basis path.") - logger.warning(warn_msg) - + warn_msg = ( + "Basis matrix has dimensions 1x1. " + "There is only one path through the function " + "under analysis, which is the only basis path." + ) + logger.warning(warn_msg) + if self.dag.num_nodes == 1 and self.dag.num_edges == 0: warn_msg = "Single node CFD with no edge. Only one possible path." logger.warning(warn_msg) basis_paths = [Path(nodes=[self.dag.source])] return on_exit(start_time, []) - + i = 0 # Collects all_temp_files infeasible paths discovered during the computation @@ -604,8 +651,14 @@ def on_exit(start_time, infeasible): current_row, num_paths_unsat = 0, 0 while current_row < (self.path_dimension - self.num_bad_rows): logger.info("Currently at row %d..." % (current_row + 1)) - logger.info("So far, the bottom %d rows of the basis matrix are `bad'." % self.num_bad_rows) - logger.info("So far, %d candidate paths were found to be unsatisfiable." % num_paths_unsat) + logger.info( + "So far, the bottom %d rows of the basis matrix are `bad'." + % self.num_bad_rows + ) + logger.info( + "So far, %d candidate paths were found to be unsatisfiable." + % num_paths_unsat + ) logger.info(f"Basis matrix is {self.basis_matrix}") logger.info("") @@ -622,7 +675,10 @@ def on_exit(start_time, infeasible): logger.info("") if ilp_problem.obj_val is None: - logger.info("Unable to find a candidate path to replace row %d." % (current_row + 1)) + logger.info( + "Unable to find a candidate path to replace row %d." + % (current_row + 1) + ) logger.info("Moving the bad row to the bottom of the basis matrix.") for k in range((current_row + 1), self.path_dimension): self._swap_basis_matrix_rows(k - 1, k) @@ -641,19 +697,27 @@ def on_exit(start_time, infeasible): self.basis_matrix[current_row] = compressed_path sign, new_basis_matrix_log_det = slogdet(self.basis_matrix) new_basis_matrix_det = exp(new_basis_matrix_log_det) - logger.info("Absolute value of the new determinant: %g" % new_basis_matrix_det) + logger.info( + "Absolute value of the new determinant: %g" % new_basis_matrix_det + ) logger.info("") DETERMINANT_THRESHOLD = self.project_config.DETERMINANT_THRESHOLD MAX_INFEASIBLE_PATHS = self.project_config.MAX_INFEASIBLE_PATHS - if ((sign == 0 and new_basis_matrix_log_det == float("-inf")) or - new_basis_matrix_det < DETERMINANT_THRESHOLD or - num_paths_unsat >= MAX_INFEASIBLE_PATHS): # If row is bad - - if (new_basis_matrix_det < DETERMINANT_THRESHOLD and not (sign == 0 and new_basis_matrix_log_det == float("-inf"))): + if ( + (sign == 0 and new_basis_matrix_log_det == float("-inf")) + or new_basis_matrix_det < DETERMINANT_THRESHOLD + or num_paths_unsat >= MAX_INFEASIBLE_PATHS + ): # If row is bad + + if new_basis_matrix_det < DETERMINANT_THRESHOLD and not ( + sign == 0 and new_basis_matrix_log_det == float("-inf") + ): logger.info("Determinant is too small.") else: - logger.info("Unable to find a path that makes the determinant non-zero.") + logger.info( + "Unable to find a path that makes the determinant non-zero." + ) logger.info("Moving the bad row to the bottom of the basis matrix.") @@ -667,11 +731,13 @@ def on_exit(start_time, infeasible): logger.info("Checking if replacement is feasible...") logger.info("") result_path = Path(ilp_problem=ilp_problem, nodes=candidate_path_nodes) - + # feasibility test - value = self.measure_path(result_path, f'gen-basis-path-row{current_row}-attempt{i}') + value = self.measure_path( + result_path, f"gen-basis-path-row{current_row}-attempt{i}" + ) i += 1 - if value < float('inf'): + if value < float("inf"): # Sanity check: # A row should not be replaced if it replaces a good row and decreases the determinant. However, replacing a bad row and decreasing the determinant is okay. (TODO: Are we actually doing this?) logger.info("Replacement is feasible.") @@ -699,15 +765,22 @@ def on_exit(start_time, infeasible): is_two_barycentric = False refinement_round = 0 while not is_two_barycentric: - logger.info("Currently in round %d of refinement..." % (refinement_round + 1)) + logger.info( + "Currently in round %d of refinement..." % (refinement_round + 1) + ) logger.info("") is_two_barycentric = True current_row, num_paths_unsat = 0, 0 - good_rows = (self.path_dimension - self.num_bad_rows) + good_rows = self.path_dimension - self.num_bad_rows while current_row < good_rows: - logger.info("Currently at row %d out of %d..." % (current_row + 1, good_rows)) - logger.info("So far, %d candidate paths were found to be unsatisfiable." % num_paths_unsat) + logger.info( + "Currently at row %d out of %d..." % (current_row + 1, good_rows) + ) + logger.info( + "So far, %d candidate paths were found to be unsatisfiable." + % num_paths_unsat + ) logger.info(f"Basis matrix is {self.basis_matrix}") logger.info("") @@ -718,13 +791,18 @@ def on_exit(start_time, infeasible): self.dag.edge_weights = self._calculate_subdets(current_row) logger.info("Calculation complete.") - logger.info("Finding a candidate path using an integer linear program...") + logger.info( + "Finding a candidate path using an integer linear program..." + ) logger.info("") candidate_path_nodes, ilp_problem = pulp_helper.find_extreme_path(self) logger.info("") if ilp_problem.obj_val is None: - logger.info("Unable to find a candidate path to replace row %d." % (current_row + 1)) + logger.info( + "Unable to find a candidate path to replace row %d." + % (current_row + 1) + ) current_row += 1 num_paths_unsat = 0 continue @@ -735,7 +813,9 @@ def on_exit(start_time, infeasible): sign, old_basis_matrix_log_det = slogdet(self.basis_matrix) old_basis_matrix_det = exp(old_basis_matrix_log_det) - logger.info("Absolute value of the old determinant: %g" % old_basis_matrix_det) + logger.info( + "Absolute value of the old determinant: %g" % old_basis_matrix_det + ) # Temporarily replace the row in the basis matrix # to calculate the new determinant. @@ -743,21 +823,28 @@ def on_exit(start_time, infeasible): self.basis_matrix[current_row] = compressed_path sign, new_basis_matrix_log_det = slogdet(self.basis_matrix) new_basis_matrix_det = exp(new_basis_matrix_log_det) - logger.info("Absolute value of the new determinant: %g" % new_basis_matrix_det) + logger.info( + "Absolute value of the new determinant: %g" % new_basis_matrix_det + ) if new_basis_matrix_det > 2 * old_basis_matrix_det: logger.info("Possible replacement for row found.") logger.info("Checking if replacement is feasible...") logger.info("") - result_path = Path(ilp_problem=ilp_problem, nodes=candidate_path_nodes) + result_path = Path( + ilp_problem=ilp_problem, nodes=candidate_path_nodes + ) basis_paths[current_row] = result_path current_row += 1 num_paths_unsat = 0 - - #feasibility test - value = self.measure_path(result_path, f'gen-basis-path-replace-candid-{current_row+1}-{good_rows}') - if value < float('inf'): + # feasibility test + value = self.measure_path( + result_path, + f"gen-basis-path-replace-candid-{current_row+1}-{good_rows}", + ) + + if value < float("inf"): logger.info("Replacement is feasible.") is_two_barycentric = False basis_paths[current_row] = result_path @@ -833,19 +920,17 @@ def _calculate_subdets(self, row: int) -> List[int]: return edge_weight_list - def estimate_edge_weights(self): """ Estimates the weights on the edges of the DAG, using the values of the basis "Path" objects. The result is stored in the instance variable "edgeWeights". - + Precondition: The basis paths have been generated and have values. """ self.dag.reset_edge_weights() - basis_values = [basis_path.measured_value for basis_path - in self.basis_paths] + basis_values = [basis_path.measured_value for basis_path in self.basis_paths] # By default, we assume a value of 0 for each of the rows in # the basis matrix that no replacement could be found for # (the `bad' rows in the basis matrix). @@ -860,8 +945,9 @@ def estimate_edge_weights(self): # programming problem will use. logger.info("Generating the list of weights on all_temp_files edges...") for reduced_edge_index, reduced_edge in enumerate(self.dag.edges_reduced): - self.dag.edge_weights[self.dag.edges_reduced_indices[reduced_edge]] = \ + self.dag.edge_weights[self.dag.edges_reduced_indices[reduced_edge]] = ( reduced_edge_weights[reduced_edge_index] + ) logger.info("List generated.") def generate_paths(self, *args, **kwargs): @@ -869,15 +955,14 @@ def generate_paths(self, *args, **kwargs): ### MEASUREMENT FUNCTIONS #### def measure_basis_paths(self): - """Measure all generated BASIS_PATHS again - """ + """Measure all generated BASIS_PATHS again""" for i in range(len(self.basis_paths)): p: Path = self.basis_paths[i] self.measure_path(p, f"basis_path{i}") def measure_path(self, path: Path, output_name: str) -> int: """ - Measure the Path if never measured before. If no name was set, the parameter output_name is used. + Measure the Path if never measured before. If no name was set, the parameter output_name is used. Parameters: path: Path : @@ -891,10 +976,12 @@ def measure_path(self, path: Path, output_name: str) -> int: if path.path_analyzer == None or path.name != output_name: path.name = output_name - path_analyzer: PathAnalyzer = PathAnalyzer(self.preprocessed_path, self.project_config, self.dag, path, output_name) + path_analyzer: PathAnalyzer = PathAnalyzer( + self.preprocessed_path, self.project_config, self.dag, path, output_name + ) path.path_analyzer = path_analyzer - - path_analyzer = path.path_analyzer + + # Measure the path (feasibility should have been checked already) value: int = path.measured_value value = max(value, path_analyzer.measure_path(self.backend)) path.set_measured_value(value) @@ -903,7 +990,7 @@ def measure_path(self, path: Path, output_name: str) -> int: def measure_paths(self, paths: list[Path], output_name_prefix: str) -> int: """ Measure the list of PATHS. Using prefix and index as name if none is given. - + Parameters: paths: list[Path] : List of paths to measure. @@ -915,7 +1002,6 @@ def measure_paths(self, paths: list[Path], output_name_prefix: str) -> int: """ result = [] for i in range(len(paths)): - output_name: str = f'{output_name_prefix}{i}' + output_name: str = f"{output_name_prefix}{i}" result.append(self.measure_path(paths[i], output_name)) return result - diff --git a/src/backend/arm_backend/arm_backend.py b/src/backend/arm_backend/arm_backend.py index afcbe072..a68896f8 100644 --- a/src/backend/arm_backend/arm_backend.py +++ b/src/backend/arm_backend/arm_backend.py @@ -10,6 +10,7 @@ from defaults import logger from gametime_error import GameTimeError + class ArmBackend(Backend): timing_func = """ static inline unsigned long long read_cycle_count() { @@ -18,12 +19,14 @@ class ArmBackend(Backend): unsigned long long nanoTime = time.tv_sec * 1000000000L + time.tv_nsec; return nanoTime; } -""" +""" def __init__(self, project_config: ProjectConfiguration): super(ArmBackend, self).__init__(project_config, "Arm") - def generate_executable(self, filepath: str, func_name: str, inputs: str, measure_folder: str) -> str: + def generate_executable( + self, filepath: str, func_name: str, inputs: str, measure_folder: str + ) -> str: """ Modifies the input program to use INPUTS and generates the executable code. Stored at MEASURE_FOLDER/driver @@ -38,14 +41,30 @@ def generate_executable(self, filepath: str, func_name: str, inputs: str, measur The folder to store generated executable. Returns: - str: + str: Path to the executable code. """ - exec_file = generate_executable(filepath, measure_folder, func_name, inputs, self.timing_func) - modified_bitcode_file_path = clang_helper.compile_to_llvm_for_exec(exec_file, measure_folder, "modified_output", self.project_config.included, self.project_config.compile_flags) - return clang_helper.bc_to_executable(modified_bitcode_file_path, measure_folder, "driver", self.project_config.included, self.project_config.compile_flags) - - def run_backend_and_parse_output(self, stored_folder: str, executable_path: str) -> int: + exec_file = generate_executable( + filepath, measure_folder, func_name, inputs, self.timing_func + ) + modified_bitcode_file_path = clang_helper.compile_to_llvm_for_exec( + exec_file, + measure_folder, + "modified_output", + self.project_config.included, + self.project_config.compile_flags, + ) + return clang_helper.bc_to_executable( + modified_bitcode_file_path, + measure_folder, + "driver", + self.project_config.included, + self.project_config.compile_flags, + ) + + def run_backend_and_parse_output( + self, stored_folder: str, executable_path: str + ) -> int: """ Runs the executable in EXECUTABLE_PATH in host machine and extracts the outputs from program. Temperaries are stored in STORED_FOLDER. @@ -65,21 +84,21 @@ def run_backend_and_parse_output(self, stored_folder: str, executable_path: str) out_file_path = os.path.join(stored_folder, "measure.out") while not os.path.exists(out_file_path): - print('Waiting for measure.out file') + print("Waiting for measure.out file") time.sleep(5) out_filepath = os.path.join(stored_folder, "measure.out") while not os.path.exists(out_filepath): - print('Waiting for measure.out file') + print("Waiting for measure.out file") time.sleep(5) with open(out_filepath, "r") as out_file: lines = out_file.readlines() - - last_line = lines[-1] if lines else '' - match = re.search(r'\d+$', last_line) - + last_line = lines[-1] if lines else "" + + match = re.search(r"\d+$", last_line) + if match: extracted_integer = int(match.group()) return extracted_integer @@ -103,11 +122,15 @@ def measure(self, inputs: str, measure_folder: str) -> int: stored_folder: str = measure_folder filepath: str = self.project_config.location_orig_file func_name: str = self.project_config.func - executable_path: str = self.generate_executable(filepath, func_name, inputs, measure_folder) + executable_path: str = self.generate_executable( + filepath, func_name, inputs, measure_folder + ) cycle_count: int = -1 try: - cycle_count: int = self.run_backend_and_parse_output(stored_folder, executable_path) + cycle_count: int = self.run_backend_and_parse_output( + stored_folder, executable_path + ) except EnvironmentError as e: - err_msg: str = ("Error in measuring the cycle count of a path in Arm: %s" % e) + err_msg: str = "Error in measuring the cycle count of a path in Arm: %s" % e logger.info(err_msg) - return cycle_count \ No newline at end of file + return cycle_count diff --git a/src/backend/backend.py b/src/backend/backend.py index fbcd73b5..786807af 100644 --- a/src/backend/backend.py +++ b/src/backend/backend.py @@ -6,6 +6,7 @@ """ from project_configuration import ProjectConfiguration from typing import List + """See the LICENSE file, located in the root directory of the source distribution and at http://verifun.eecs.berkeley.edu/gametime/about/LICENSE, @@ -19,7 +20,7 @@ class Backend(object): that is being analyzed. """ - def __init__(self, project_config: ProjectConfiguration, name: str=""): + def __init__(self, project_config: ProjectConfiguration, name: str = ""): """ Parameters: project_config: ProjectConfiguration: @@ -32,7 +33,7 @@ def __init__(self, project_config: ProjectConfiguration, name: str=""): #: GameTime project configuration for the code that is being analyzed. self.project_config: ProjectConfiguration = project_config - + def measure(self, inputs: str, measure_folder: str) -> int: """ Perform measurement using the backend. @@ -47,4 +48,4 @@ def measure(self, inputs: str, measure_folder: str) -> int: int: The measured value of path """ - return 0 \ No newline at end of file + return 0 diff --git a/src/backend/flexpret_backend/flexpret_backend.py b/src/backend/flexpret_backend/flexpret_backend.py index 94d71a7a..aab5ceb6 100644 --- a/src/backend/flexpret_backend/flexpret_backend.py +++ b/src/backend/flexpret_backend/flexpret_backend.py @@ -11,8 +11,9 @@ from gametime_error import GameTimeError import command_utils + class FlexpretBackend(Backend): - + timing_func = """ static inline unsigned long read_cycle_count() { return rdcycle(); @@ -22,7 +23,9 @@ class FlexpretBackend(Backend): def __init__(self, project_config: ProjectConfiguration): super(FlexpretBackend, self).__init__(project_config, "Flexpret") - def generate_executable_c(self, filepath: str, func_name: str, inputs: str, measure_folder: str) -> str: + def generate_executable_c( + self, filepath: str, func_name: str, inputs: str, measure_folder: str + ) -> str: """ Modifies the input program to use INPUTS and returns path to modifed C program. @@ -40,11 +43,12 @@ def generate_executable_c(self, filepath: str, func_name: str, inputs: str, meas str: Path to the modified C file. """ - exec_file = generate_executable(filepath, measure_folder, func_name, inputs, self.timing_func, True) + exec_file = generate_executable( + filepath, measure_folder, func_name, inputs, self.timing_func, True + ) return exec_file - - def compile_c_file (self, stored_folder: str, file_name: str) -> str: + def compile_c_file(self, stored_folder: str, file_name: str) -> str: """Use same Make file mechanism as Flexpret to generate .mem file from .c Parameters: @@ -57,7 +61,7 @@ def compile_c_file (self, stored_folder: str, file_name: str) -> str: int: Measured cycle count for C_FILEPATH. """ - + # Generate CMakeList.txt cmakelist_path = os.path.join(stored_folder, "CMakeLists.txt") cmakelist_content = f""" @@ -87,10 +91,10 @@ def compile_c_file (self, stored_folder: str, file_name: str) -> str: fp_add_outputs({file_name}) """ - - with open(cmakelist_path, 'w', encoding='utf-8') as f: + + with open(cmakelist_path, "w", encoding="utf-8") as f: f.write(cmakelist_content) - + # Run cmake cwd = os.getcwd() os.chdir(stored_folder) @@ -113,7 +117,7 @@ def run_backend_and_parse_output(self, file_name: str, build_folder: str) -> int Folder to put all the generated tempraries. mem_filepath: str : Path to the .mem file - :return the measurement value + :return the measurement value Returns: int: @@ -127,17 +131,17 @@ def run_backend_and_parse_output(self, file_name: str, build_folder: str) -> int out_filepath = os.path.join(build_folder, "measure.out") while not os.path.exists(out_filepath): - print('Waiting for measure.out file to be generated by FlexPRET') + print("Waiting for measure.out file to be generated by FlexPRET") time.sleep(5) with open(out_filepath, "r") as out_file: lines = out_file.readlines() - - #flexpret console output has two extra lines at the end - print_line = lines[0].split(" ")[-1] if lines else '' - match = re.search(r'\d+$', print_line) - + # flexpret console output has two extra lines at the end + print_line = lines[0].split(" ")[-1] if lines else "" + + match = re.search(r"\d+$", print_line) + if match: extracted_integer = int(match.group()) return extracted_integer @@ -162,7 +166,9 @@ def measure(self, inputs: str, measure_folder: str) -> int: stored_folder: str = measure_folder filepath: str = self.project_config.location_orig_file func_name: str = self.project_config.func - c_filepath: str = self.generate_executable_c(filepath, func_name, inputs, measure_folder) + c_filepath: str = self.generate_executable_c( + filepath, func_name, inputs, measure_folder + ) build_folder: str = self.compile_c_file(stored_folder, file_name) cycle_count: int = self.run_backend_and_parse_output(file_name, build_folder) - return cycle_count \ No newline at end of file + return cycle_count diff --git a/src/backend/generate_executable.py b/src/backend/generate_executable.py index 4f324689..7f9f0a2b 100644 --- a/src/backend/generate_executable.py +++ b/src/backend/generate_executable.py @@ -1,14 +1,37 @@ from pycparser import parse_file, c_generator -from pycparser.c_ast import FuncDef, Decl, FuncCall, ID, Compound, TypeDecl, IdentifierType, FuncDecl, ParamList, Return, Constant, Assignment, ExprList, BinaryOp, NamedInitializer, InitList, Struct, ArrayDecl, Cast, Typename +from pycparser.c_ast import ( + FuncDef, + Decl, + FuncCall, + ID, + Compound, + TypeDecl, + IdentifierType, + FuncDecl, + ParamList, + Return, + Constant, + Assignment, + ExprList, + BinaryOp, + NamedInitializer, + InitList, + Struct, + ArrayDecl, + Cast, + Typename, +) import os import pycparser_fake_libc pycparser_utils_path = pycparser_fake_libc.directory + class ExecutableTransformer(object): - """ + """ Transformation class to modify input C code with desired inputs. """ + def __init__(self, ast, function_name, hexvalues): self.ast = ast self.function_name = function_name @@ -29,7 +52,9 @@ def visit_func(self, node): """ self.arg_types, self.arg_names = self.visit(node) self.new_main = self.gen_main(self.arg_types, self.arg_names) - self.arguments = self.gen_arguments(self.arg_types, self.arg_names, self.hexvalues) + self.arguments = self.gen_arguments( + self.arg_types, self.arg_names, self.hexvalues + ) def visit(self, node): """ @@ -47,7 +72,7 @@ def visit(self, node): arg_types = [param.type for param in params] arg_names = [param.name for param in params] return arg_types, arg_names - + for _, child in node.children(): ret_val = self.visit(child) if ret_val: @@ -66,8 +91,8 @@ def gen_main(self, arg_types, arg_names): Returns: The new MAIN fucntion AST node. """ - main_arg_types = ['int', 'char **'] - main_arg_names = ['argc', 'argv'] + main_arg_types = ["int", "char **"] + main_arg_names = ["argc", "argv"] params = [] for m_arg_type, m_arg_name in zip(main_arg_types, main_arg_names): @@ -81,41 +106,42 @@ def gen_main(self, arg_types, arg_names): declname=m_arg_name, quals=[], type=IdentifierType(names=[m_arg_type]), - align=None, + align=None, ), init=None, bitsize=None, - align=None, + align=None, ) ) - + # Create the function declaration (return type and parameters) main_decl = FuncDecl( args=ParamList(params) if params else None, type=TypeDecl( - declname='main', + declname="main", quals=[], - type=IdentifierType(names=['int']), - align=None - ) + type=IdentifierType(names=["int"]), + align=None, + ), ) new_main = FuncDef( decl=Decl( - name='main', + name="main", quals=[], storage=[], funcspec=[], type=main_decl, # Use the function declaration created above init=None, bitsize=None, - align=None, + align=None, ), param_decls=None, - body=self.gen_main_body(arg_types, arg_names) # Here you would set the body of the function + body=self.gen_main_body( + arg_types, arg_names + ), # Here you would set the body of the function ) - return new_main def gen_main_body(self, arg_types, arg_names): @@ -132,96 +158,96 @@ def gen_main_body(self, arg_types, arg_names): The new MAIN function AST node. """ body_items = [] - body_items.append(Decl( - name="start", - quals=[], - storage=[], - funcspec=[], - type=TypeDecl( - declname="start", - quals=[], - type=IdentifierType( - names=["unsigned long long"] - ), - align=None, - ), - init=None, - bitsize=None, - align=None, - ) - ) - - body_items.append(Decl( - name="end", - quals=[], - storage=[], - funcspec=[], - type=TypeDecl( - declname="end", - quals=[], - type=IdentifierType( - names=["unsigned long long"] - ), - align=None, - ), - init=None, - bitsize=None, - align=None, - ) - ) - - body_items.append(Assignment( - op="=", - lvalue=ID(name="start"), - rvalue=FuncCall( - name=ID(name="read_cycle_count"), - args=None - ) - ) - ) - + body_items.append( + Decl( + name="start", + quals=[], + storage=[], + funcspec=[], + type=TypeDecl( + declname="start", + quals=[], + type=IdentifierType(names=["unsigned long long"]), + align=None, + ), + init=None, + bitsize=None, + align=None, + ) + ) + + body_items.append( + Decl( + name="end", + quals=[], + storage=[], + funcspec=[], + type=TypeDecl( + declname="end", + quals=[], + type=IdentifierType(names=["unsigned long long"]), + align=None, + ), + init=None, + bitsize=None, + align=None, + ) + ) + + body_items.append( + Assignment( + op="=", + lvalue=ID(name="start"), + rvalue=FuncCall(name=ID(name="read_cycle_count"), args=None), + ) + ) + # Calling the function of interest body_items.append( FuncCall( - name=ID(name=self.function_name), - args=ID(name=', '.join(arg_names)) + name=ID(name=self.function_name), args=ID(name=", ".join(arg_names)) ) ) - body_items.append(Assignment( - op="=", - lvalue=ID(name="end"), - rvalue=FuncCall( - name=ID(name="read_cycle_count"), - args=None - ) - ) - ) + body_items.append( + Assignment( + op="=", + lvalue=ID(name="end"), + rvalue=FuncCall(name=ID(name="read_cycle_count"), args=None), + ) + ) # printf in Flexpret doesn't support unsigned long long end_minus_start = BinaryOp(op="-", left=ID(name="end"), right=ID(name="start")) cast_expr = Cast( to_type=Typename( - name=None, - quals=[], + name=None, + quals=[], type=TypeDecl( - declname=None, - quals=[], + declname=None, + quals=[], type=IdentifierType(names=["uint32_t"]), align=None, ), align=None, ), - expr=end_minus_start + expr=end_minus_start, ) - body_items.append(FuncCall(name=ID(name="printf"), args=ExprList(exprs=[ - Constant(type="string", value='"%i\\n"'), - cast_expr, - ]))) + body_items.append( + FuncCall( + name=ID(name="printf"), + args=ExprList( + exprs=[ + Constant(type="string", value='"%li\\n"'), + cast_expr, + ] + ), + ) + ) - constant_zero = Constant(type='int', value='0') + constant_zero = Constant(type="int", value="0") body_items.append(Return(expr=constant_zero)) return Compound(block_items=body_items) @@ -236,7 +262,9 @@ def is_primitive(self, type_node): Returns: If TYPE_NODE is primitive. """ - return isinstance(type_node, TypeDecl) and isinstance(type_node.type, IdentifierType) + return isinstance(type_node, TypeDecl) and isinstance( + type_node.type, IdentifierType + ) def is_struct(self, type_node): """ @@ -263,7 +291,6 @@ def is_array(self, type_node): If TYPE_NODE is array. """ return isinstance(type_node, ArrayDecl) - def generate_primitive_declaration(self, name, type_node, value): """ @@ -295,12 +322,12 @@ def generate_struct_declaration(self, name, struct_type_node, value_dict): struct_type_node : Pycparser type node for this variable. value_dict : - Dict of struct field to value. + Dict of struct field to value. Returns: String representation of the variable declaration in C for struct variables. """ - #TODO: see how KLEE output structs + # TODO: see how KLEE output structs field_inits = ", ".join([f"{value}" for field, value in value_dict.items()]) return f"struct {struct_type_node.name} {name} = {{ {field_inits} }};" @@ -314,19 +341,19 @@ def generate_array_declaration(self, name, array_type_node, values): array_type_node : Pycparser type node for this variable. value_dict : - Dict of struct field to value. + Dict of struct field to value. Returns: String representation of the variable declaration in C for array variables. """ - #TODO: see how KLEE output nested array + # TODO: see how KLEE output nested array element_type_str = self.get_element_type_str(array_type_node) - #-2 to skip 0x and -1 to remove the trailing space + # -2 to skip 0x and -1 to remove the trailing space values_hex = values[2:-1] - #8 here because integer 32bit are 8 heximal degits + # 8 here because integer 32bit are 8 heximal degits values_chunk = [] for i in range(0, len(values_hex), 8): - values_chunk.append("0x" + values_hex[i:i+8]) + values_chunk.append("0x" + values_hex[i : i + 8]) values_str = ", ".join(values_chunk) return f"{element_type_str} {name}[] = {{ {values_str} }};" @@ -355,19 +382,19 @@ def gen_arguments(self, arg_types, arg_names, hex_values): Parameters: arg_types : List of argument types. - + arg_names : List of argument names. - + hex_values : List of hexidecimal values for arguments. - + Returns: String representation of the arguments declarations. """ declarations = [] - + for type_node, name, value in zip(arg_types, arg_names, hex_values): if self.is_primitive(type_node): decl = self.generate_primitive_declaration(name, type_node, value) @@ -376,64 +403,76 @@ def gen_arguments(self, arg_types, arg_names, hex_values): elif self.is_array(type_node): decl = self.generate_array_declaration(name, type_node, value) else: - raise NotImplementedError(f"Type handling not implemented for {type(type_node)}") - + raise NotImplementedError( + f"Type handling not implemented for {type(type_node)}" + ) + declarations.append(decl) return "\n".join(declarations) -def generate_executable(input_file, input_folder, function_name, hex_values_file, timing_function_body, include_flexpret = False): + +def generate_executable( + input_file, + input_folder, + function_name, + hex_values_file, + timing_function_body, + include_flexpret=False, +): """ Modify INPUT_FILE to ingest the values and generate an executable C file. Stored as INPUT_FOLDER/driver.c Parameters: input_file : Input C program. - + input_folder : The folder to output the modified C program. function_name : Name of function being analyzed. - + hex_values_file : Input values used to modify C program. Should be a list of hexidecimal values, 1 per line. - + timing_function_body : The timing function to use to get cycle count. Inserted before and after function call. - + include_flexpret : Flag for if we need to include Flexpret specific headers. (Default value = False) Returns: File path for the modified C program. """ - #hex_values should be a list and each if either an element (primitive type), a list (array), a dict (struct, key is field name and value is value) + # hex_values should be a list and each if either an element (primitive type), a list (array), a dict (struct, key is field name and value is value) hexvalues = [] - with open(hex_values_file, 'r') as file: + with open(hex_values_file, "r") as file: for line in file: hexvalues.append(line) - - ast = parse_file(input_file, use_cpp=True, - cpp_path='clang', - cpp_args=['-E', r'-I{}'.format(pycparser_utils_path)]) - + ast = parse_file( + input_file, + use_cpp=True, + cpp_path="clang", + cpp_args=["-E", r"-I{}".format(pycparser_utils_path)], + ) + transformer = ExecutableTransformer(ast, function_name, hexvalues) transformer.visit_func(ast) # Read the original C file content - with open(input_file, 'r') as file: + with open(input_file, "r") as file: original_c_content = file.read() # This must be included in we want to run flexpret backend (for printf) if include_flexpret: - original_c_content = "#include \n" + original_c_content + original_c_content = "#include \n" + original_c_content else: - original_c_content = "#include \n" + original_c_content + original_c_content = "#include \n" + original_c_content - #TODO: generate global variables, add the global timing function + # TODO: generate global variables, add the global timing function original_c_content += timing_function_body original_c_content += transformer.arguments @@ -441,10 +480,9 @@ def generate_executable(input_file, input_folder, function_name, hex_values_file original_c_content += generator.visit(transformer.new_main) # Write the modified code to a new file - output_file = os.path.join(input_folder + "/" + "driver.c") - with open(output_file, 'w') as f: + output_file = os.path.join(input_folder + "/" + "driver.c") + with open(output_file, "w") as f: f.write(original_c_content) print(f"Modified code written to {output_file}") return output_file - diff --git a/src/backend/x86_backend/x86_backend.py b/src/backend/x86_backend/x86_backend.py index 7b4766f0..51ee3b45 100644 --- a/src/backend/x86_backend/x86_backend.py +++ b/src/backend/x86_backend/x86_backend.py @@ -10,6 +10,7 @@ from defaults import logger from gametime_error import GameTimeError + class X86Backend(Backend): timing_func = """ static inline unsigned long long read_cycle_count() { @@ -22,7 +23,9 @@ class X86Backend(Backend): def __init__(self, project_config: ProjectConfiguration): super(X86Backend, self).__init__(project_config, "X86") - def generate_executable(self, filepath: str, func_name: str, inputs: str, measure_folder: str) -> str: + def generate_executable( + self, filepath: str, func_name: str, inputs: str, measure_folder: str + ) -> str: """ Modifies the input program to use INPUTS and generates the executable code. Stored at MEASURE_FOLDER/driver @@ -40,11 +43,27 @@ def generate_executable(self, filepath: str, func_name: str, inputs: str, measur str: Path to the executable code. """ - exec_file = generate_executable(filepath, measure_folder, func_name, inputs, self.timing_func) - modified_bitcode_file_path = clang_helper.compile_to_llvm_for_exec(exec_file, measure_folder, "modified_output", self.project_config.included, self.project_config.compile_flags) - return clang_helper.bc_to_executable(modified_bitcode_file_path, measure_folder, "driver", self.project_config.included, self.project_config.compile_flags) + exec_file = generate_executable( + filepath, measure_folder, func_name, inputs, self.timing_func + ) + modified_bitcode_file_path = clang_helper.compile_to_llvm_for_exec( + exec_file, + measure_folder, + "modified_output", + self.project_config.included, + self.project_config.compile_flags, + ) + return clang_helper.bc_to_executable( + modified_bitcode_file_path, + measure_folder, + "driver", + self.project_config.included, + self.project_config.compile_flags, + ) - def run_backend_and_parse_output(self, stored_folder: str, executable_path: str) -> int: + def run_backend_and_parse_output( + self, stored_folder: str, executable_path: str + ) -> int: """ Runs the executable in EXECUTABLE_PATH in host machine and extracts the outputs from program. Temperaries are stored in STORED_FOLDER. @@ -64,16 +83,16 @@ def run_backend_and_parse_output(self, stored_folder: str, executable_path: str) out_filepath = os.path.join(stored_folder, "measure.out") while not os.path.exists(out_filepath): - print('Waiting for measure.out file') + print("Waiting for measure.out file") time.sleep(5) with open(out_filepath, "r") as out_file: lines = out_file.readlines() - - last_line = lines[-1] if lines else '' - match = re.search(r'\d+$', last_line) - + last_line = lines[-1] if lines else "" + + match = re.search(r"\d+$", last_line) + if match: extracted_integer = int(match.group()) return extracted_integer @@ -97,11 +116,15 @@ def measure(self, inputs: str, measure_folder: str) -> int: stored_folder: str = measure_folder filepath: str = self.project_config.location_orig_file func_name: str = self.project_config.func - executable_path: str = self.generate_executable(filepath, func_name, inputs, measure_folder) + executable_path: str = self.generate_executable( + filepath, func_name, inputs, measure_folder + ) cycle_count: int = -1 try: - cycle_count: int = self.run_backend_and_parse_output(stored_folder, executable_path) + cycle_count: int = self.run_backend_and_parse_output( + stored_folder, executable_path + ) except EnvironmentError as e: - err_msg: str = ("Error in measuring the cycle count of a path in X86: %s" % e) + err_msg: str = "Error in measuring the cycle count of a path in X86: %s" % e logger.info(err_msg) - return cycle_count \ No newline at end of file + return cycle_count diff --git a/src/clang_helper.py b/src/clang_helper.py index f4dbfc47..1404da6d 100644 --- a/src/clang_helper.py +++ b/src/clang_helper.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -""" Functions to help interacting with clang on the command +"""Functions to help interacting with clang on the command line. Allows creation of dags """ import os @@ -8,12 +8,20 @@ from typing import List from defaults import logger +from defaults import config from file_helper import remove_files from project_configuration import ProjectConfiguration import command_utils -def compile_to_llvm_for_exec(c_filepath: str, output_file_folder: str, output_name: str, extra_libs: List[str]=[], extra_flags: List[str]=[], readable: bool = False) -> str: +def compile_to_llvm_for_exec( + c_filepath: str, + output_file_folder: str, + output_name: str, + extra_libs: List[str] = [], + extra_flags: List[str] = [], + readable: bool = False, +) -> str: """ Compile the C program into bitcode in OUTPUT_FILE_FOLDER. @@ -38,7 +46,15 @@ def compile_to_llvm_for_exec(c_filepath: str, output_file_folder: str, output_na file_to_compile: str = c_filepath output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") - commands: List[str] = ["clang", "-emit-llvm", "-O0", "-o", output_file, "-c", file_to_compile] + extra_flags + commands: List[str] = [ + "clang", + "-emit-llvm", + "-O0", + "-o", + output_file, + "-c", + file_to_compile, + ] + extra_flags for lib in extra_libs: commands.append(f"-I{lib}") command_utils.run(commands) @@ -51,13 +67,41 @@ def compile_to_llvm_for_exec(c_filepath: str, output_file_folder: str, output_na return output_file -def compile_list_to_llvm_for_analysis(c_filepaths: List[str] , output_file_folder: str, extra_libs: List[str]=[], extra_flags: List[str]=[], readable: bool = True) -> List[str]: + +def compile_list_to_llvm_for_analysis( + c_filepaths: List[str], + output_file_folder: str, + extra_libs: List[str] = [], + extra_flags: List[str] = [], + readable: bool = True, +) -> List[str]: compiled_files = [] for c_filepath in c_filepaths: - compiled_files.append(compile_to_llvm_for_analysis(c_filepath, output_file_folder, f"{c_filepath[:-2]}gt", extra_libs, extra_flags, readable)) + _, extension = os.path.splitext(c_filepath) + basename = os.path.basename(c_filepath)[ + : (-len(extension)) + ] # Base filename after removing the extension + compiled_files.append( + compile_to_llvm_for_analysis( + c_filepath, + output_file_folder, + f"{basename}{config.TEMP_SUFFIX}", + extra_libs, + extra_flags, + readable, + ) + ) return compiled_files -def compile_to_llvm_for_analysis(c_filepath: str , output_file_folder: str, output_name: str, extra_libs: List[str]=[], extra_flags: List[str]=[], readable: bool = True) -> str: + +def compile_to_llvm_for_analysis( + c_filepath: str, + output_file_folder: str, + output_name: str, + extra_libs: List[str] = [], + extra_flags: List[str] = [], + readable: bool = True, +) -> str: """ Compile the C program into bitcode in OUTPUT_FILE_FOLDER using -O0 option to preserve maximum structure. @@ -84,7 +128,17 @@ def compile_to_llvm_for_analysis(c_filepath: str , output_file_folder: str, outp # "-Wno-implicit-function-declaration" is required so that clang # does not report "undeclared function '__assert_fail'" - commands: List[str] = ["clang", "-emit-llvm", "-Xclang","-disable-O0-optnone", "-Wno-implicit-function-declaration", "-c", file_to_compile, "-o", output_file] + extra_flags + commands: List[str] = [ + "clang", + "-emit-llvm", + "-Xclang", + "-disable-O0-optnone", + "-Wno-implicit-function-declaration", + "-c", + file_to_compile, + "-o", + output_file, + ] + extra_flags for lib in extra_libs: commands.append(f"-I{lib}") command_utils.run(commands) @@ -96,7 +150,14 @@ def compile_to_llvm_for_analysis(c_filepath: str , output_file_folder: str, outp command_utils.run(commands) return output_file -def bc_to_executable(bc_filepath: str, output_folder: str, output_name: str, extra_libs: List[str]=[], extra_flags: List[str]=[]) -> str: + +def bc_to_executable( + bc_filepath: str, + output_folder: str, + output_name: str, + extra_libs: List[str] = [], + extra_flags: List[str] = [], +) -> str: """ Compile the LLVM bitcode program into executable in OUTPUT_FILE_FOLDER. @@ -151,11 +212,22 @@ def dump_object(object_filepath: str, output_folder: str, output_name: str) -> s output_file: str = os.path.join(output_folder, f"{output_name}.dmp") - commands: List[str] = ["riscv32-unknown-elf-objdump", "--target=riscv32", "-march=rv32i", object_filepath, "-c", "-o", output_file] + commands: List[str] = [ + "riscv32-unknown-elf-objdump", + "--target=riscv32", + "-march=rv32i", + object_filepath, + "-c", + "-o", + output_file, + ] command_utils.run(commands) return output_file -def generate_dot_file(bc_filename: str, bc_file_folder: str, output_name: str = "main") -> str: + +def generate_dot_file( + bc_filename: str, bc_file_folder: str, output_name: str = "main" +) -> str: """ Create dag from .bc file using opt through executing shell commands @@ -175,13 +247,21 @@ def generate_dot_file(bc_filename: str, bc_file_folder: str, output_name: str = output_file: str = f".{output_name}.dot" cur_cwd: str = os.getcwd() os.chdir(bc_file_folder) # opt generates .dot in cwd - commands: List[str] = ["opt", "-passes=dot-cfg", "-S", "-disable-output", bc_filename] + commands: List[str] = [ + "opt", + "-passes=dot-cfg", + "-S", + "-disable-output", + bc_filename, + ] command_utils.run(commands) os.chdir(cur_cwd) return output_file -def inline_functions(bc_filepath: str, output_file_folder: str, output_name: str) -> str: +def inline_functions( + bc_filepath: str, output_file_folder: str, output_name: str +) -> str: """ Unrolls the provided input file and output the unrolled version in the output file using llvm's opt utility. Could be unreliable if input_file @@ -198,7 +278,7 @@ def inline_functions(bc_filepath: str, output_file_folder: str, output_name: str output_name: str : file to write unrolled .bc file. Outputs in a human-readable form already. - + Returns: str: Path of the output .bc file @@ -206,17 +286,25 @@ def inline_functions(bc_filepath: str, output_file_folder: str, output_name: str """ output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") - commands: List[str] = ["opt", - "-passes=\"always-inline,inline\"" - "-inline-threshold=10000000", - "-S", bc_filepath, - "-o", output_file] - + commands: List[str] = [ + "opt", + '-passes="always-inline,inline"' "-inline-threshold=10000000", + "-S", + bc_filepath, + "-o", + output_file, + ] + command_utils.run(commands) return output_file -def unroll_loops(bc_filepath: str, output_file_folder: str, output_name: str, project_config: ProjectConfiguration) -> str: +def unroll_loops( + bc_filepath: str, + output_file_folder: str, + output_name: str, + project_config: ProjectConfiguration, +) -> str: """ Unrolls the provided input file and output the unrolled version in the output file using llvm's opt utility. Could be unreliable if input_file @@ -243,21 +331,28 @@ def unroll_loops(bc_filepath: str, output_file_folder: str, output_name: str, pr """ # return bc_filepath output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") - - # Related but unused passes: - # -unroll-threshold=10000000, -unroll-count=4, + + # Related but unused passes: + # -unroll-threshold=10000000, -unroll-count=4, # -unroll-allow-partial, -instcombine, # -reassociate, -indvars, -mem2reg - commands: List[str] = ["opt", - "-passes='simplifycfg,loops,lcssa,loop-simplify,loop-rotate,indvars,loop-unroll'" - "-S", bc_filepath, - "-o", output_file] + commands: List[str] = [ + "opt", + "-passes='simplifycfg,loops,lcssa,loop-simplify,loop-rotate,indvars,loop-unroll'" + "-S", + bc_filepath, + "-o", + output_file, + ] command_utils.run(commands) return output_file -def remove_temp_cil_files(project_config: ProjectConfiguration, all_temp_files=False) -> None: + +def remove_temp_cil_files( + project_config: ProjectConfiguration, all_temp_files=False +) -> None: """ Removes the temporary files created by CIL during its analysis. @@ -285,6 +380,5 @@ def remove_temp_cil_files(project_config: ProjectConfiguration, all_temp_files=F # By this point, we have files that are named the same as the # temporary file for GameTime, but that have different extensions. # Remove these files. - other_temp_files = r".*-gt\.[^c]+" + other_temp_files = rf".*{config.TEMP_SUFFIX}\.[^c]+" remove_files([other_temp_files], project_config.location_temp_dir) - diff --git a/src/cli.py b/src/cli.py index 46e8de31..d442268d 100644 --- a/src/cli.py +++ b/src/cli.py @@ -26,36 +26,41 @@ def find_config_file(path: str) -> Optional[str]: """ Find the config.yaml file in the given path. - + Parameters: path: str Path to search for config file (can be a directory or a file) - + Returns: Optional[str]: Path to config.yaml if found, None otherwise """ # If path is a file and it's a yaml file, return it - if os.path.isfile(path) and path.endswith(('.yaml', '.yml')): + if os.path.isfile(path) and path.endswith((".yaml", ".yml")): return path - + # If path is a directory, look for config.yaml if os.path.isdir(path): - config_path = os.path.join(path, 'config.yaml') + config_path = os.path.join(path, "config.yaml") if os.path.exists(config_path): return config_path - + # Also try config.yml - config_path = os.path.join(path, 'config.yml') + config_path = os.path.join(path, "config.yml") if os.path.exists(config_path): return config_path - + return None -def run_gametime(config_path: str, clean_temp: bool = True, backend: str = None, visualize_weights: bool = False) -> int: +def run_gametime( + config_path: str, + clean_temp: bool = True, + backend: str = None, + visualize_weights: bool = False, +) -> int: """ Run GameTime analysis on the specified configuration. - + Parameters: config_path: str Path to the configuration file @@ -63,121 +68,138 @@ def run_gametime(config_path: str, clean_temp: bool = True, backend: str = None, Whether to clean temporary files before running (default: True) backend: str Override backend from config (default: None, use config value) - + Returns: int: Exit code (0 for success, non-zero for failure) """ try: # Parse the configuration file logger.info(f"Loading configuration from: {config_path}") - project_config: ProjectConfiguration = YAMLConfigurationParser.parse(config_path) - + project_config: ProjectConfiguration = YAMLConfigurationParser.parse( + config_path + ) + # Override backend if specified if backend: logger.info(f"Overriding backend with: {backend}") project_config.backend = backend - + # Check if backend is specified if not project_config.backend: + # FIXME: Why does logger.error produce duplicate messages. logger.error("Error: No backend specified!") - logger.error("Please specify a backend in the config file or use --backend option") + logger.error( + "Please specify a backend in the config file or use --backend option" + ) logger.error("Available backends: flexpret, x86, arm") return 1 - + # Clean temporary directory if requested if clean_temp and os.path.exists(project_config.location_temp_dir): - logger.info(f"Cleaning temporary directory: {project_config.location_temp_dir}") + logger.info( + f"Cleaning temporary directory: {project_config.location_temp_dir}" + ) shutil.rmtree(project_config.location_temp_dir) - + # Create the analyzer logger.info("Creating analyzer...") analyzer: Analyzer = Analyzer(project_config) - + # Create the DAG logger.info("Creating control flow graph (DAG)...") analyzer.create_dag() - + # Generate basis paths logger.info("Generating basis paths...") basis_paths = analyzer.generate_basis_paths() - + if not basis_paths or len(basis_paths) == 0: logger.error("No basis paths were found!") return 1 - + logger.info(f"Found {len(basis_paths)} basis path(s)") - + # Measure basis paths logger.info("Measuring basis paths...") for i, path in enumerate(basis_paths): value = path.get_measured_value() logger.info(f" Basis path {i}: {path.name} = {value}") - + # Generate additional paths for analysis logger.info("Generating additional paths for analysis...") generated_paths = analyzer.generate_paths() - logger.info(f"Generated {len(generated_paths)} path(s) for measurement") - - # Measure generated paths - logger.info("Measuring generated paths...") + logger.info(f"Generated {len(generated_paths)} path(s)") + + # Collect results from already-measured paths + logger.info("Collecting measurement results...") results = [] for i, path in enumerate(generated_paths): - output_name: str = f'path{i}' - value = analyzer.measure_path(path, output_name) - results.append((path.name, value)) - + measured_value = path.measured_value + predicted_value = path.predicted_value + results.append((path.name, predicted_value, measured_value)) + # Display results - logger.info("\n" + "="*60) + logger.info("\n" + "=" * 60) logger.info("GAMETIME ANALYSIS RESULTS") - logger.info("="*60) + logger.info("=" * 60) logger.info(f"Function analyzed: {project_config.func}") - logger.info(f"Backend: {project_config.backend if project_config.backend else 'default'}") + logger.info( + f"Backend: {project_config.backend if project_config.backend else 'default'}" + ) logger.info(f"Number of basis paths: {len(basis_paths)}") logger.info(f"Number of generated paths: {len(generated_paths)}") - + logger.info("\nBasis Paths:") for i, path in enumerate(basis_paths): logger.info(f" {i}: {path.name} = {path.get_measured_value()}") - + if results: logger.info("\nGenerated Paths:") - max_value = max(value for _, value in results) - max_path = [name for name, value in results if value == max_value][0] - - for i, (name, value) in enumerate(results): - marker = " *WCET*" if value == max_value else "" - logger.info(f" {i}: {name} = {value}{marker}") - + max_value = max(measured_value for _, _, measured_value in results) + max_path = [ + name + for name, _, measured_value in results + if measured_value == max_value + ][0] + + for i, (name, predicted_value, measured_value) in enumerate(results): + marker = " *WCET*" if measured_value == max_value else "" + logger.info( + f" {i}: {name} = predicted: {predicted_value}, measured: {measured_value}{marker}" + ) + logger.info(f"\nWorst-Case Execution Time (WCET): {max_value}") logger.info(f"WCET Path: {max_path}") - - logger.info("="*60) - + + logger.info("=" * 60) + # Visualize weighted graph if requested if visualize_weights: - logger.info("\n" + "="*60) + logger.info("\n" + "=" * 60) logger.info("GENERATING WEIGHTED GRAPH DOT FILE") - logger.info("="*60) - + logger.info("=" * 60) + # Estimate edge weights logger.info("Estimating edge weights...") analyzer.estimate_edge_weights() - + # Create output directory for visualizations # Use the project root's visualizations directory (similar to test file) config_dir = os.path.dirname(os.path.abspath(config_path)) # Go up to project root and use visualizations directory there # This matches the test file's approach: ../../visualizations from test location - output_dir = os.path.join(config_dir, '..', '..', 'visualizations') + output_dir = os.path.join(config_dir, "..", "..", "visualizations") output_dir = os.path.abspath(output_dir) os.makedirs(output_dir, exist_ok=True) - + # Generate filename based on function name and backend - func_name = project_config.func.replace(' ', '_').lower() - backend_name = project_config.backend if project_config.backend else 'default' - dot_filename = f'{func_name}_{backend_name}_weighted_graph.dot' + func_name = project_config.func.replace(" ", "_").lower() + backend_name = ( + project_config.backend if project_config.backend else "default" + ) + dot_filename = f"{func_name}_{backend_name}_weighted_graph.dot" dot_path = os.path.join(output_dir, dot_filename) - + # Create edge labels with weights edge_labels = {} edge_list = list(analyzer.dag.all_edges) @@ -186,25 +208,26 @@ def run_gametime(config_path: str, clean_temp: bool = True, backend: str = None, edge = edge_list[i] weight = analyzer.dag.edge_weights[i] if abs(weight) > 0.01: # Only label non-zero weights - edge_labels[edge] = f'{weight:.2f}' + edge_labels[edge] = f"{weight:.2f}" else: # Add zero weights too to avoid KeyError - edge_labels[edge] = '0.00' - + edge_labels[edge] = "0.00" + logger.info(f"Creating weighted graph DOT file...") write_dag_to_dot_file(analyzer.dag, dot_path, edges_to_labels=edge_labels) logger.info(f"DOT file saved to: {dot_path}") - logger.info("="*60) - + logger.info("=" * 60) + logger.info("\nAnalysis completed successfully!") - + return 0 - + except GameTimeError as e: logger.error(f"GameTime Error: {str(e)}") return 1 except Exception as e: logger.error(f"Unexpected error: {str(e)}") import traceback + traceback.print_exc() return 1 @@ -212,7 +235,7 @@ def run_gametime(config_path: str, clean_temp: bool = True, backend: str = None, def main(): """Main entry point for the GameTime CLI.""" parser = argparse.ArgumentParser( - description='GameTime - Worst-Case Execution Time (WCET) Analysis Tool', + description="GameTime - Worst-Case Execution Time (WCET) Analysis Tool", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: @@ -230,63 +253,61 @@ def main(): # Run analysis and generate weighted graph visualization gametime /path/to/test/folder --visualize-weights - """ - ) - - parser.add_argument( - 'path', - help='Path to test folder (containing config.yaml) or path to config file' + """, ) - + parser.add_argument( - '--no-clean', - action='store_true', - help='Do not clean temporary files before running' + "path", + help="Path to test folder (containing config.yaml) or path to config file", ) - + parser.add_argument( - '-b', '--backend', - choices=['flexpret', 'x86', 'arm'], - help='Backend to use for execution (overrides config file)' + "--no-clean", + action="store_true", + help="Do not clean temporary files before running", ) - + parser.add_argument( - '--version', - action='version', - version='GameTime 0.1.0' + "-b", + "--backend", + choices=["flexpret", "x86", "arm"], + help="Backend to use for execution (overrides config file)", ) - + + parser.add_argument("--version", action="version", version="GameTime 0.1.0") + parser.add_argument( - '--visualize-weights', - action='store_true', - help='Generate a DOT file visualizing the weighted graph with estimated edge weights' + "--visualize-weights", + action="store_true", + help="Generate a DOT file visualizing the weighted graph with estimated edge weights", ) - + args = parser.parse_args() - + # Validate the path if not os.path.exists(args.path): logger.error(f"Error: Path does not exist: {args.path}") return 1 - + # Find the config file config_path = find_config_file(args.path) if not config_path: logger.error(f"Error: Could not find config.yaml in: {args.path}") - logger.error("Please ensure the directory contains a config.yaml or config.yml file") + logger.error( + "Please ensure the directory contains a config.yaml or config.yml file" + ) return 1 - + # Run GameTime analysis exit_code = run_gametime( config_path, clean_temp=not args.no_clean, backend=args.backend, - visualize_weights=args.visualize_weights + visualize_weights=args.visualize_weights, ) - + return exit_code -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) - diff --git a/src/command_utils.py b/src/command_utils.py index 5851b25e..faf7b22b 100644 --- a/src/command_utils.py +++ b/src/command_utils.py @@ -1,6 +1,7 @@ import subprocess import sys + def run(command, shell=False): print(f"==> Executing command: {' '.join(command)}") result = subprocess.run(command, shell=shell, check=True) @@ -9,4 +10,4 @@ def run(command, shell=False): print(result.stdout) print(result.stderr) sys.exit(1) - return result.stdout \ No newline at end of file + return result.stdout diff --git a/src/config.yaml.in b/src/config.yaml.in index d8d6dbb2..ce99ca62 100644 --- a/src/config.yaml.in +++ b/src/config.yaml.in @@ -25,9 +25,10 @@ IDENT_TEMPVAR: __gt ## temps TEMP_PROJECT_CONFIG: project-config +TEMP_FOLDER: generated TEMP_MERGED: merged TEMP_LOOP_CONFIG: loop-config -TEMP_SUFFIX: gt +TEMP_SUFFIX: _generated TEMP_SUFFIX_MERGED: merged TEMP_SUFFIX_UNROLLED: unrolled TEMP_SUFFIX_INLINED: inlined diff --git a/src/defaults.py b/src/defaults.py index 12393de2..4382c125 100644 --- a/src/defaults.py +++ b/src/defaults.py @@ -35,7 +35,9 @@ logger.info("") #: Default :class:`~gametime.configuration.GametimeConfiguration` object #: that represents the configuration of GameTime. -config: gametime_configuration.GametimeConfiguration = gametime_configuration.read_gametime_config_yaml(config_file) +config: gametime_configuration.GametimeConfiguration = ( + gametime_configuration.read_gametime_config_yaml(config_file) +) logger.info("Successfully configured GameTime.") logger.info("") diff --git a/src/file_helper.py b/src/file_helper.py index ffe4f7bb..78709547 100644 --- a/src/file_helper.py +++ b/src/file_helper.py @@ -38,8 +38,10 @@ def create_dir(location: str) -> None: os.makedirs(location) except EnvironmentError as e: if e.errno != errno.EEXIST: - raise GameTimeError("Cannot create directory located at %s: %s" % - (location, e)) + raise GameTimeError( + "Cannot create directory located at %s: %s" % (location, e) + ) + def remove_file(location: str) -> None: """ @@ -55,8 +57,8 @@ def remove_file(location: str) -> None: if os.path.exists(location): os.remove(location) except EnvironmentError as e: - raise GameTimeError("Cannot remove file located at %s: %s" % - (location, e)) + raise GameTimeError("Cannot remove file located at %s: %s" % (location, e)) + def remove_files(patterns: List[str], dir_location: str) -> None: """ @@ -75,6 +77,7 @@ def remove_files(patterns: List[str], dir_location: str) -> None: if re.search(pattern, filename): os.remove(os.path.join(dir_location, filename)) + def remove_all_except(patterns: List[str], dir_location: str) -> None: """ Removes all_temp_files of the files and directories from the directory whose @@ -100,7 +103,10 @@ def remove_all_except(patterns: List[str], dir_location: str) -> None: for dirname in dirs: shutil.rmtree(os.path.join(root, dirname)) -def move_files(patterns: List[str], source_dir: str, dest_dir: str, overwrite: bool = True) -> None: + +def move_files( + patterns: List[str], source_dir: str, dest_dir: str, overwrite: bool = True +) -> None: """ Moves the files, whose names match any of the patterns in the list provided, from the source directory whose location is provided to @@ -128,4 +134,4 @@ def move_files(patterns: List[str], source_dir: str, dest_dir: str, overwrite: b if overwrite and os.path.exists(dest_file): os.remove(dest_file) if overwrite or not os.path.exists(dest_file): - shutil.move(source_file, dest_file) \ No newline at end of file + shutil.move(source_file, dest_file) diff --git a/src/gametime_configuration.py b/src/gametime_configuration.py index ac22ddce..9b702e61 100644 --- a/src/gametime_configuration.py +++ b/src/gametime_configuration.py @@ -12,6 +12,7 @@ class Endianness(object): """This class represents the endianness of the target machine.""" + # Big-endian. BIG = 0 # Little-endian. @@ -77,6 +78,7 @@ def __init__(self): self.IDENT_TEMPVAR: str = "" self.TEMP_PROJECT_CONFIG: str = "" + self.TEMP_FOLDER: str = "" self.TEMP_MERGED: str = "" self.TEMP_LOOP_CONFIG: str = "" diff --git a/src/gametime_error.py b/src/gametime_error.py index 4f5ee0c4..d4d6f943 100644 --- a/src/gametime_error.py +++ b/src/gametime_error.py @@ -11,9 +11,11 @@ class GameTimeError(Exception): """Error that GameTime can throw.""" + pass class GameTimeWarning(Warning): """Warning that GameTime can throw.""" + pass diff --git a/src/histogram.py b/src/histogram.py index 1e431a94..e16726cc 100644 --- a/src/histogram.py +++ b/src/histogram.py @@ -18,7 +18,9 @@ from gametime_error import GameTimeError -def compute_histogram(paths: list[Path], bins=10, path_value_range=None, measured=False): +def compute_histogram( + paths: list[Path], bins=10, path_value_range=None, measured=False +): """ Computes a histogram from the values of a list of feasible paths generated by GameTime. This function is @@ -41,13 +43,17 @@ def compute_histogram(paths: list[Path], bins=10, path_value_range=None, measure Tuple, whose first element is an array of the values of the histogram, and whose second element is an array of the left edges of the bins. """ - path_values = [path.measured_value if measured else path.predicted_value - for path in paths] + path_values = [ + path.measured_value if measured else path.predicted_value for path in paths + ] if path_value_range is None: path_value_range = (min(path_values), max(path_values)) return np.histogram(path_values, bins=bins, range=path_value_range) -def write_histogram_to_file(location, paths, bins=10, path_value_range=None, measured=False): + +def write_histogram_to_file( + location, paths, bins=10, path_value_range=None, measured=False +): """ Create a histogram and write it to location. @@ -69,8 +75,10 @@ def write_histogram_to_file(location, paths, bins=10, path_value_range=None, mea try: histogram_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the histogram to the file located " - "at %s: %s" % (location, e)) + err_msg = "Error writing the histogram to the file located " "at %s: %s" % ( + location, + e, + ) raise GameTimeError(err_msg) else: with histogram_file_handler: diff --git a/src/index_expression.py b/src/index_expression.py index ade6924c..9a8f05c7 100644 --- a/src/index_expression.py +++ b/src/index_expression.py @@ -22,13 +22,11 @@ def __init__(self, name: str, indices: Tuple[int]): self.indices: Tuple[int] = indices def get_name(self) -> str: - """Name of the variable in the expression whose information is stored in this object. - """ + """Name of the variable in the expression whose information is stored in this object.""" return self.name def get_indices(self) -> Tuple[int]: - """Tuple of the temporary index numbers used as indices in the expression. - """ + """Tuple of the temporary index numbers used as indices in the expression.""" return self.indices def __eq__(self, other): @@ -48,6 +46,7 @@ class VariableIndexExpression(IndexExpression): a temporary index variable, where the expression represents a variable. """ + def __init__(self, name): IndexExpression.__init__(self, name, []) @@ -57,4 +56,5 @@ class ArrayIndexExpression(IndexExpression): a temporary index variable, where the expression represents an array (or aggregate) access. """ + pass diff --git a/src/inliner.py b/src/inliner.py index 92a7810f..3ba58064 100644 --- a/src/inliner.py +++ b/src/inliner.py @@ -3,8 +3,11 @@ import subprocess import sys + def run_command(command): - result = subprocess.run(command, shell=True, capture_output=True, text=True, errors='replace') + result = subprocess.run( + command, shell=True, capture_output=True, text=True, errors="replace" + ) if result.returncode != 0: print(f"Error running command: {command}") print(result.stdout) @@ -16,31 +19,46 @@ def run_command(command): def link_bitcode(bitcode_files, output_file): run_command(f"llvm-link {' '.join(bitcode_files)} -o {output_file}") + def disassemble_bitcode(input_file, output_file): run_command(f"llvm-dis {input_file} -o {output_file}") + def assemble_bitcode(input_file, output_file): run_command(f"llvm-as {input_file} -o {output_file}") + def inline_bitcode(input_file, output_file): - run_command(f"opt -passes=\"always-inline,inline\" -inline-threshold=10000000 {input_file} -o {output_file}") + run_command( + f'opt -passes="always-inline,inline" -inline-threshold=10000000 {input_file} -o {output_file}' + ) + def modify_llvm_ir(input_file, output_file, skip_function): current_dir = os.path.dirname(os.path.abspath(__file__)) - plugin_path = os.path.normpath(os.path.join(current_dir, "../src/custom_passes/custom_inline_pass.so")) + plugin_path = os.path.normpath( + os.path.join(current_dir, "../src/custom_passes/custom_inline_pass.so") + ) if not os.path.exists(plugin_path): print(plugin_path) print("Plugin Not Found") sys.exit(1) - run_command(f"opt -load-pass-plugin={plugin_path} -passes=custom-inline -analysed-func={skip_function} {input_file} -o {output_file} -S") + run_command( + f"opt -load-pass-plugin={plugin_path} -passes=custom-inline -analysed-func={skip_function} {input_file} -o {output_file} -S" + ) - -def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_name: str, analyzed_function: str) -> str: +def inline_functions( + bc_filepaths: list[str], + output_file_folder: str, + output_name: str, + analyzed_function: str, +) -> str: output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") file_to_analyze = bc_filepaths[0] + # FIXME: Use config.yaml.in to configure filenames. combined_bc = f"{file_to_analyze[:-3]}_linked.bc" combined_ll = f"{file_to_analyze[:-3]}_linked.ll" combined_mod_ll = f"{file_to_analyze[:-3]}_linked_mod.ll" @@ -48,8 +66,6 @@ def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_na combined_inlined_mod_bc = f"{file_to_analyze[:-3]}_linked_inlined_mod.bc" combined_inlined_mod_ll = f"{file_to_analyze[:-3]}_linked_inlined_mod.ll" - - if len(bc_filepaths) > 1: # Step 1: Link all bitcode files into a single combined bitcode file link_bitcode(bc_filepaths, combined_bc) @@ -67,8 +83,8 @@ def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_na # Step 5: Inline the functions in the modified bitcode inline_bitcode(combined_mod_bc, combined_inlined_mod_bc) - + # Step 6: Disassemble the combined bitcode file to LLVM IR for debugging disassemble_bitcode(combined_inlined_mod_bc, combined_inlined_mod_ll) - + return combined_inlined_mod_bc diff --git a/src/interval.py b/src/interval.py index c1cd6ea6..1e939858 100644 --- a/src/interval.py +++ b/src/interval.py @@ -16,18 +16,19 @@ class Interval(object): an interval of values. """ - def __init__(self, lower: int=None, upper: int=None): + def __init__(self, lower: int = None, upper: int = None): if lower is not None and upper is not None: lower, upper = sorted((lower, upper)) self.lower: int = lower self.upper: int = upper def __str__(self): - return ("%s%s, %s%s" % - ("(" if self.lower is None else "[", - (self.lower or "-Infinity"), - (self.upper or "Infinity"), - ")" if self.upper is None else "]")) + return "%s%s, %s%s" % ( + "(" if self.lower is None else "[", + (self.lower or "-Infinity"), + (self.upper or "Infinity"), + ")" if self.upper is None else "]", + ) @property def lower_bound(self) -> int: diff --git a/src/legacy_inliner.py b/src/legacy_inliner.py index 12b5608a..1bca7d1f 100644 --- a/src/legacy_inliner.py +++ b/src/legacy_inliner.py @@ -3,8 +3,11 @@ import subprocess import sys + def run_command(command): - result = subprocess.run(command, shell=True, capture_output=True, text=True, errors='replace') + result = subprocess.run( + command, shell=True, capture_output=True, text=True, errors="replace" + ) if result.returncode != 0: print(f"Error running command: {command}") print(result.stdout) @@ -12,15 +15,18 @@ def run_command(command): sys.exit(1) return result.stdout + def link_bitcode(bitcode_files, output_file): run_command(f"llvm-link {' '.join(bitcode_files)} -o {output_file}") + def disassemble_bitcode(input_file, output_file): run_command(f"llvm-dis {input_file} -o {output_file}") + def modify_llvm_ir(input_file, output_file, skip_function): # Read the LLVM IR code from the file - with open(input_file, 'r') as f: + with open(input_file, "r") as f: llvm_ir = f.read() # Split the content by lines for easier processing @@ -35,16 +41,18 @@ def modify_llvm_ir(input_file, output_file, skip_function): line = lines[i] # Check if the line is a function definition - func_match = re.match(r'define\s+\S+\s+@\S+\s*\(.*\)\s*(#\d+)\s*{', line) + func_match = re.match(r"define\s+\S+\s+@\S+\s*\(.*\)\s*(#\d+)\s*{", line) if func_match: func_tag = func_match.group(1) - if f'@{skip_function}(' in line: + if f"@{skip_function}(" in line: # If this function is the one to skip, record its tag skip_tags.add(func_tag) else: # Replace 'noinline' with 'alwaysinline' if not skipping - if 'noinline' in lines[i-1]: # Check previous line for noinline - modified_lines[-1] = modified_lines[-1].replace('noinline', 'alwaysinline') + if "noinline" in lines[i - 1]: # Check previous line for noinline + modified_lines[-1] = modified_lines[-1].replace( + "noinline", "alwaysinline" + ) # Add the processed line to the list of modified lines modified_lines.append(line) @@ -53,32 +61,42 @@ def modify_llvm_ir(input_file, output_file, skip_function): final_lines = [] for line in modified_lines: # Match the attributes definition - attr_match = re.match(r'attributes\s+(#\d+)\s*=\s*{', line) + attr_match = re.match(r"attributes\s+(#\d+)\s*=\s*{", line) if attr_match: attr_tag = attr_match.group(1) if attr_tag not in skip_tags: # Replace 'noinline' with 'alwaysinline' in the attributes if not skipping - line = line.replace('noinline', 'alwaysinline') + line = line.replace("noinline", "alwaysinline") final_lines.append(line) # Join the final lines back into a single string - modified_llvm_ir = '\n'.join(final_lines) + modified_llvm_ir = "\n".join(final_lines) # Write the modified LLVM IR back to the output file - with open(output_file, 'w') as f: + with open(output_file, "w") as f: f.write(modified_llvm_ir) + def assemble_bitcode(input_file, output_file): run_command(f"llvm-as {input_file} -o {output_file}") + def inline_bitcode(input_file, output_file): - run_command(f"opt -passes=\"always-inline,inline\" -inline-threshold=10000000 {input_file} -o {output_file}") + run_command( + f'opt -passes="always-inline,inline" -inline-threshold=10000000 {input_file} -o {output_file}' + ) + def generate_cfg(input_file): run_command(f"opt -dot-cfg {input_file}") -def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_name: str, analyzed_function: str) -> str: +def inline_functions( + bc_filepaths: list[str], + output_file_folder: str, + output_name: str, + analyzed_function: str, +) -> str: output_file: str = os.path.join(output_file_folder, f"{output_name}.bc") file_to_analyze = bc_filepaths[0] combined_bc = f"{file_to_analyze[:-3]}_linked.bc" @@ -88,8 +106,6 @@ def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_na combined_inlined_mod_bc = f"{file_to_analyze[:-3]}_linked_inlined_mod.bc" combined_inlined_mod_ll = f"{file_to_analyze[:-3]}_linked_inlined_mod.ll" - - if len(bc_filepaths) > 1: # Step 1: Link all bitcode files into a single combined bitcode file link_bitcode(bc_filepaths, combined_bc) @@ -107,9 +123,8 @@ def inline_functions(bc_filepaths: list[str], output_file_folder: str, output_na # Step 5: Inline the functions in the modified bitcode inline_bitcode(combined_mod_bc, combined_inlined_mod_bc) - + # Step 6: Disassemble the combined bitcode file to LLVM IR for debugging disassemble_bitcode(combined_inlined_mod_bc, combined_inlined_mod_ll) - - return combined_inlined_mod_bc + return combined_inlined_mod_bc diff --git a/src/nx_helper.py b/src/nx_helper.py index 02637d81..a26f8044 100644 --- a/src/nx_helper.py +++ b/src/nx_helper.py @@ -20,6 +20,7 @@ from defaults import logger from gametime_error import GameTimeError + def find_root_node(G): """ Parameters: @@ -35,10 +36,11 @@ def find_root_node(G): return node return None -def remove_back_edges_to_make_dag(G, root): + +def remove_back_edges_to_make_dag(G, root): """ Remove all back edges from G to make it a DAG. Assuming G is connected and rooted at ROOT. - + Parameters: G : The graph to remove root edges @@ -54,7 +56,7 @@ def remove_back_edges_to_make_dag(G, root): # Iteratively perform DFS on unvisited nodes stack = [(start_node, iter(G.neighbors(start_node)))] visited[start_node] = True - + while stack: parent, children = stack[-1] try: @@ -69,7 +71,11 @@ def remove_back_edges_to_make_dag(G, root): stack.pop() # Remove identified back edges + print(f"num_edges pre = {G.number_of_edges()}") G.remove_edges_from(back_edges) + # Update num_edges after edge removal + G.num_edges = G.number_of_edges() + print(f"num_edges post = {G.num_edges}") return G @@ -134,7 +140,7 @@ def __init__(self, *args, **kwargs): #: Dictionary that maps edges to their indices in the list of all_temp_files edges. #: This is maintained for efficiency purposes. - self.edges_indices: Dict[Tuple[str, str]: int] = {} + self.edges_indices: Dict[Tuple[str, str] : int] = {} #: List of non-special edges in the DAG. self.edges_reduced: List[Tuple[str, str]] = [] @@ -161,13 +167,16 @@ def initialize_dictionaries(self): # We assume there is only one source and one sink. self.source = [node for node in nodes if (self.in_degree(node) == 0)][0] self.sink = [node for node in nodes if (self.out_degree(node) == 0)][0] - self.nodes_except_sink = [node for node in self.all_nodes - if node != self.sink] - self.nodes_except_source_sink = [node for node in self.all_nodes - if (node != self.source and node != self.sink)] + self.nodes_except_sink = [node for node in self.all_nodes if node != self.sink] + self.nodes_except_source_sink = [ + node + for node in self.all_nodes + if (node != self.source and node != self.sink) + ] - self.num_paths = (0 if has_cycles(self) else - num_paths(self, self.source, self.sink)) + self.num_paths = ( + 0 if has_cycles(self) else num_paths(self, self.source, self.sink) + ) # Initialize dictionaries that map nodes and edges to their indices # in the node list and edge list, respectively. @@ -199,11 +208,12 @@ def reset_edge_weights(self): def _init_special_edges(self): """ - To reduce the dimensionality to b = n-m+2, each node, except for + To reduce the dimensionality to b = m-n+2 (b is the number of basis paths, + m the number of edges, n the number of nodes), each node, except for the source and sink, chooses a 'special' edge. This edge is taken if flow enters the node, but no outgoing edge is 'visibly' selected. In other words, it is the 'default' edge for the node. - + This method initializes all_temp_files of the data structures necessary to keep track of these special edges. @@ -247,10 +257,14 @@ def get_node_label(self, node: int) -> str: return self.all_nodes_with_description[node][1]["label"] -def write_dag_to_dot_file(dag: Dag, location: str, dag_name: str = "", - edges_to_labels: Dict[Tuple[str, str], str] = None, - highlighted_edges: List[Tuple[str, str]] = None, - highlight_color: str = "red"): +def write_dag_to_dot_file( + dag: Dag, + location: str, + dag_name: str = "", + edges_to_labels: Dict[Tuple[str, str], str] = None, + highlighted_edges: List[Tuple[str, str]] = None, + highlight_color: str = "red", +): """ Writes the directed acyclic graph provided to a file in DOT format. @@ -297,9 +311,9 @@ def write_dag_to_dot_file(dag: Dag, location: str, dag_name: str = "", line = " %s -> %s" % edge attributes = [] if edges_to_labels: - attributes.append("label = \"%s\"" % edges_to_labels[edge]) + attributes.append('label = "%s"' % edges_to_labels[edge]) if highlighted_edges and edge in highlighted_edges: - attributes.append("color = \"%s\"" % highlight_color) + attributes.append('color = "%s"' % highlight_color) if len(attributes) > 0: line += " [%s]" % ", ".join(attributes) contents.append("%s;" % line) @@ -308,8 +322,7 @@ def write_dag_to_dot_file(dag: Dag, location: str, dag_name: str = "", try: dag_dot_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the DAG to a file located at %s: %s" % - (location, e)) + err_msg = "Error writing the DAG to a file located at %s: %s" % (location, e) raise GameTimeError(err_msg) else: with dag_dot_file_handler: @@ -326,7 +339,7 @@ def construct_dag(location: str) -> tuple[Dag, bool]: Path to the file describing a directed acyclic graph in DOT format Returns: - `~gametime.src.nx_helper.Dag`: + `~gametime.src.nx_helper.Dag`: Object that represents the directed acyclic graph. """ @@ -335,30 +348,36 @@ def construct_dag(location: str) -> tuple[Dag, bool]: graph_from_dot: nx.Graph = nx.nx_agraph.read_dot(f) except EnvironmentError as e: - err_msg: str = ("Error opening the DOT file, located at %s, that contains " - "the directed acyclic graph to analyze: %s") % (location, e) + err_msg: str = ( + "Error opening the DOT file, located at %s, that contains " + "the directed acyclic graph to analyze: %s" + ) % (location, e) raise GameTimeError(err_msg) if not graph_from_dot.is_directed(): raise GameTimeError("CFG isn't directed") - + root = find_root_node(graph_from_dot) if root is None: raise GameTimeError("There is no node without incoming edge in CFG.") - - sink_nodes = [node for node, out_degree in graph_from_dot.out_degree() if out_degree == 0] + + sink_nodes = [ + node for node, out_degree in graph_from_dot.out_degree() if out_degree == 0 + ] if len(sink_nodes) != 1: raise GameTimeError("The number of sink nodes don't equal to 1.") - modifed=False + modified = False if len(list(nx.simple_cycles(graph_from_dot))) > 0: - logger.warning("The control-flow graph has cycles. Trying to remove them by removing back edges.") + logger.warning( + "The control-flow graph has cycles. Trying to remove them by removing back edges." + ) graph_from_dot = remove_back_edges_to_make_dag(graph_from_dot, root) - modifed = True + modified = True dag: Dag = Dag(graph_from_dot) dag.load_variables() - return dag, modifed + return dag, modified def num_paths(dag: Dag, source: str, sink: str) -> int: @@ -378,7 +397,7 @@ def num_paths(dag: Dag, source: str, sink: str) -> int: """ if has_cycles(dag): - err_msg = ("The dag has cycles, so number of path is infinite. Get rid of cycles before analyzing") + err_msg = "The dag has cycles, so number of path is infinite. Get rid of cycles before analyzing" raise GameTimeError(err_msg) # Dictionary that maps each node to the number of paths in the # DAG provided from the input source node to that node. @@ -387,8 +406,10 @@ def num_paths(dag: Dag, source: str, sink: str) -> int: # Topologically sort the nodes in the graph. nodes_to_visit: List[str] = list(nx.topological_sort(dag)) if nodes_to_visit.pop(0) != source: - err_msg = ("The source node should be the first node in " - "a topological sort of the control-flow graph.") + err_msg = ( + "The source node should be the first node in " + "a topological sort of the control-flow graph." + ) raise GameTimeError(err_msg) while len(nodes_to_visit) > 0: @@ -410,13 +431,13 @@ def get_random_path(dag: Dag, source: str, sink: str) -> List[str]: source: str : source to start path with sink: str : - sink to end path with + sink to end path with Returns: List[str]: Random path in the DAG provided from the input source node to the input sink node, represented as a list of nodes arranged in order of traversal from source to sink. - + """ result_path = [source] diff --git a/src/path.py b/src/path.py index 92a4b843..5d138ce5 100644 --- a/src/path.py +++ b/src/path.py @@ -12,7 +12,11 @@ from pulp import PulpError from gametime_error import GameTimeError -from index_expression import VariableIndexExpression, ArrayIndexExpression, IndexExpression +from index_expression import ( + VariableIndexExpression, + ArrayIndexExpression, + IndexExpression, +) from pulp_helper import IlpProblem """See the LICENSE file, located in the root directory of @@ -28,13 +32,20 @@ class Path(object): a single path in the code that is being analyzed. """ - def __init__(self, ilp_problem: IlpProblem = None, nodes: List[str] = None, - line_numbers: List[str] = None, - conditions: List[str] = None, condition_edges: Dict[int, Tuple[str]] = None, - condition_truths: Dict[str, bool] = None, - array_accesses: Dict[str, List[Tuple[int]]] = None, - agg_index_exprs: IndexExpression = None, assignments: Dict[str, str] = None, - predicted_value: float = 0, measured_value: float = 0): + def __init__( + self, + ilp_problem: IlpProblem = None, + nodes: List[str] = None, + line_numbers: List[str] = None, + conditions: List[str] = None, + condition_edges: Dict[int, Tuple[str]] = None, + condition_truths: Dict[str, bool] = None, + array_accesses: Dict[str, List[Tuple[int]]] = None, + agg_index_exprs: IndexExpression = None, + assignments: Dict[str, str] = None, + predicted_value: float = 0, + measured_value: float = 0, + ): #: Integer linear programming problem that, when solved, produced #: this path, represented as an ``IlpProblem`` object. self.ilp_problem = ilp_problem @@ -106,12 +117,16 @@ def write_ilp_problem_to_lp_file(self, location) -> None: try: self.ilp_problem.writeLP(location) except (PulpError, EnvironmentError) as e: - err_msg = ("Error writing the integer linear programming " - "problem to an LP file: %s") % e + err_msg = ( + "Error writing the integer linear programming " + "problem to an LP file: %s" + ) % e raise GameTimeError(err_msg) else: - err_msg = ("This path is not associated with an integer linear " - "programming problem.") + err_msg = ( + "This path is not associated with an integer linear " + "programming problem." + ) raise GameTimeError(err_msg) def get_nodes(self) -> str: @@ -132,13 +147,15 @@ def write_nodes_to_file(self, location: str) -> None: Parameters: location: str : Location of the file - + """ try: nodes_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the IDs of the nodes in " - "a directed acyclic graph along the path: %s") % e + err_msg = ( + "Error writing the IDs of the nodes in " + "a directed acyclic graph along the path: %s" + ) % e raise GameTimeError(err_msg) else: with nodes_file_handler: @@ -153,7 +170,7 @@ def read_nodes_from_file(location: str) -> List[str]: Parameters: location: str : Location of the file - + Returns: List[str] @@ -164,8 +181,10 @@ def read_nodes_from_file(location: str) -> List[str]: try: nodes_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading the IDs of the nodes in " - "a directed acyclic graph along a path: %s") % e + err_msg = ( + "Error reading the IDs of the nodes in " + "a directed acyclic graph along a path: %s" + ) % e raise GameTimeError(err_msg) else: with nodes_file_handler: @@ -196,8 +215,10 @@ def write_line_numbers_to_file(self, location: str) -> None: try: line_numbers_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing line numbers of the source-level " - "statements along the path: %s") % e + err_msg = ( + "Error writing line numbers of the source-level " + "statements along the path: %s" + ) % e raise GameTimeError(err_msg) else: with line_numbers_file_handler: @@ -222,8 +243,10 @@ def read_line_numbers_from_file(location: str) -> List[int]: try: line_numbers_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading line numbers of the source-level " - "statements along a path: %s") % e + err_msg = ( + "Error reading line numbers of the source-level " + "statements along a path: %s" + ) % e raise GameTimeError(err_msg) else: with line_numbers_file_handler: @@ -281,8 +304,7 @@ def read_conditions_from_file(location: str) -> List[str]: with conditions_file_handler: conditions = conditions_file_handler.readlines() conditions = [condition.strip() for condition in conditions] - conditions = [condition for condition in conditions - if condition != ""] + conditions = [condition for condition in conditions if condition != ""] return conditions def get_condition_edges(self) -> str: @@ -314,9 +336,11 @@ def write_condition_edges_to_file(self, location: str) -> None: try: condition_edges_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the numbers of the conditions along " - "the path, and the edges that are associated with " - "the conditions: %s") % e + err_msg = ( + "Error writing the numbers of the conditions along " + "the path, and the edges that are associated with " + "the conditions: %s" + ) % e raise GameTimeError(err_msg) else: with condition_edges_file_handler: @@ -342,9 +366,11 @@ def read_condition_edges_from_file(location: str) -> Dict[int, Tuple[str]]: try: condition_edges_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading the numbers of the conditions along " - "a path, and the edges that are associated with " - "the conditions: %s") % e + err_msg = ( + "Error reading the numbers of the conditions along " + "a path, and the edges that are associated with " + "the conditions: %s" + ) % e raise GameTimeError(err_msg) else: condition_edges = {} @@ -370,8 +396,9 @@ def get_edges_for_conditions(self, condition_nums: List[int]) -> List[Tuple[str] represented as a tuple, and appears only once in the list. """ - return list(set([self.condition_edges[conditionNum] - for conditionNum in condition_nums])) + return list( + set([self.condition_edges[conditionNum] for conditionNum in condition_nums]) + ) def get_condition_truths(self) -> str: """ @@ -401,9 +428,11 @@ def write_condition_truths_to_file(self, location: str) -> None: try: condition_truths_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing line numbers of the conditional " - "points in the code being analyzed, along with " - "their truth values: %s") % e + err_msg = ( + "Error writing line numbers of the conditional " + "points in the code being analyzed, along with " + "their truth values: %s" + ) % e raise GameTimeError(err_msg) else: with condition_truths_file_handler: @@ -419,7 +448,7 @@ def read_condition_truths_from_file(location: str) -> Dict[int, bool]: Parameters: location: str : Location of the file - + Returns: Dict[int, bool]: Dictionary that associates the line numbers of @@ -430,15 +459,16 @@ def read_condition_truths_from_file(location: str) -> Dict[int, bool]: try: condition_truths_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading line numbers of the conditional " - "points in the code being analyzed, along with " - "their truth values along a path: %s") % e + err_msg = ( + "Error reading line numbers of the conditional " + "points in the code being analyzed, along with " + "their truth values along a path: %s" + ) % e raise GameTimeError(err_msg) else: condition_truths = {} with condition_truths_file_handler: - condition_truths_file_lines = \ - condition_truths_file_handler.readlines() + condition_truths_file_lines = condition_truths_file_handler.readlines() for line in condition_truths_file_lines: (lineNumber, conditionTruth) = line.strip().split(": ") condition_truths[lineNumber] = conditionTruth == "True" @@ -471,15 +501,19 @@ def write_array_accesses_to_file(self, location: str) -> None: try: array_accesses_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing information about the array accesses " - "made in conditions along the path: %s") % e + err_msg = ( + "Error writing information about the array accesses " + "made in conditions along the path: %s" + ) % e raise GameTimeError(err_msg) else: with array_accesses_file_handler: array_accesses_file_handler.write(self.get_array_accesses()) @staticmethod - def read_array_accesses_from_file(location: str) -> Dict[str, List[Tuple[int, int]]]: + def read_array_accesses_from_file( + location: str, + ) -> Dict[str, List[Tuple[int, int]]]: """ Reads information about the array accesses made in conditions along a path from a file. @@ -487,7 +521,7 @@ def read_array_accesses_from_file(location: str) -> Dict[str, List[Tuple[int, in Parameters: location: str : Location of the file - + Returns: Dict[str, List[Tuple[int, int]]]: @@ -499,8 +533,10 @@ def read_array_accesses_from_file(location: str) -> Dict[str, List[Tuple[int, in try: array_accesses_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading information about the array accesses " - "made in conditions along a path: %s") % e + err_msg = ( + "Error reading information about the array accesses " + "made in conditions along a path: %s" + ) % e raise GameTimeError(err_msg) else: array_accesses = {} @@ -547,9 +583,11 @@ def write_agg_index_exprs_to_file(self, location: str) -> None: try: agg_index_exprs_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing information about the expressions " - "associated with the temporary index variables of " - "aggregate accesses along this path: %s") % e + err_msg = ( + "Error writing information about the expressions " + "associated with the temporary index variables of " + "aggregate accesses along this path: %s" + ) % e raise GameTimeError(err_msg) else: with agg_index_exprs_file_handler: @@ -564,7 +602,7 @@ def read_agg_index_exprs_from_file(location: str) -> Dict[int, IndexExpression]: Parameters: location: str : Location of the file - + Returns: Dict[int, IndexExpression]: @@ -575,9 +613,11 @@ def read_agg_index_exprs_from_file(location: str) -> Dict[int, IndexExpression]: try: agg_index_exprs_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading information about the expressions " - "associated with the temporary index variables of " - "aggregate accesses along a path: %s") % e + err_msg = ( + "Error reading information about the expressions " + "associated with the temporary index variables of " + "aggregate accesses along a path: %s" + ) % e raise GameTimeError(err_msg) else: agg_index_exprs = {} @@ -590,13 +630,15 @@ def read_agg_index_exprs_from_file(location: str) -> Dict[int, IndexExpression]: line_tokens = line_tokens[1].split() if len(line_tokens) == 1: var_name = line_tokens[0] - agg_index_exprs[temp_index_number] = \ - VariableIndexExpression(var_name) + agg_index_exprs[temp_index_number] = VariableIndexExpression( + var_name + ) else: array_var_name = line_tokens[0] indices = tuple(int(index) for index in line_tokens[1:]) - agg_index_exprs[temp_index_number] = \ - ArrayIndexExpression(array_var_name, indices) + agg_index_exprs[temp_index_number] = ArrayIndexExpression( + array_var_name, indices + ) return agg_index_exprs def get_assignments(self) -> str: @@ -626,9 +668,11 @@ def write_assignments_to_file(self, location: str) -> None: try: assignment_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the assignments to variables " - "that would drive an execution of the code " - "along the path: %s") % e + err_msg = ( + "Error writing the assignments to variables " + "that would drive an execution of the code " + "along the path: %s" + ) % e raise GameTimeError(err_msg) else: with assignment_file_handler: @@ -652,17 +696,18 @@ def read_assignments_from_file(location: str) -> Dict[str, str]: try: assignment_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading the assignments to variables " - "that would drive an execution of the code " - "along a path: %s") % e + err_msg = ( + "Error reading the assignments to variables " + "that would drive an execution of the code " + "along a path: %s" + ) % e raise GameTimeError(err_msg) else: assignments = {} with assignment_file_handler: assignment_file_lines = assignment_file_handler.readlines() for line in assignment_file_lines: - (variable, assignment) = \ - line.strip().replace(";", "").split(" = ") + (variable, assignment) = line.strip().replace(";", "").split(" = ") assignments[variable] = assignment return assignments @@ -690,8 +735,8 @@ def get_predicted_value(self) -> str: def write_predicted_value_to_file(self, location: str) -> None: """ - Writes the predicted value of this path to a file. - + Writes the predicted value of this path to a file. + Parameters: location: str : Location of the file @@ -699,8 +744,10 @@ def write_predicted_value_to_file(self, location: str) -> None: try: predicted_value_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the predicted value of the path " - "to the file located at %s: %s" % (location, e)) + err_msg = ( + "Error writing the predicted value of the path " + "to the file located at %s: %s" % (location, e) + ) raise GameTimeError(err_msg) else: with predicted_value_file_handler: @@ -724,8 +771,10 @@ def read_predicted_value_from_file(location: str) -> float: try: predicted_value_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading the predicted value of a path from " - "the file located at %s: %s" % (location, e)) + err_msg = ( + "Error reading the predicted value of a path from " + "the file located at %s: %s" % (location, e) + ) raise GameTimeError(err_msg) else: with predicted_value_file_handler: @@ -736,10 +785,11 @@ def read_predicted_value_from_file(location: str) -> float: try: result = float(line) except ValueError: - err_msg = ("The following line, in the file located " - "at %s, does not represent a valid " - "predicted value of a path: %s" % - (location, line)) + err_msg = ( + "The following line, in the file located " + "at %s, does not represent a valid " + "predicted value of a path: %s" % (location, line) + ) raise GameTimeError(err_msg) return result @@ -777,8 +827,10 @@ def write_measured_value_to_file(self, location: str) -> None: try: measured_value_file_handler = open(location, "w") except EnvironmentError as e: - err_msg = ("Error writing the measured value of the path " - "to the file located at %s: %s" % (location, e)) + err_msg = ( + "Error writing the measured value of the path " + "to the file located at %s: %s" % (location, e) + ) raise GameTimeError(err_msg) else: with measured_value_file_handler: @@ -802,8 +854,10 @@ def read_measured_value_from_file(location: str) -> float: try: measured_value_file_handler = open(location, "r") except EnvironmentError as e: - err_msg = ("Error reading the measured value of a path from " - "the file located at %s: %s" % (location, e)) + err_msg = ( + "Error reading the measured value of a path from " + "the file located at %s: %s" % (location, e) + ) raise GameTimeError(err_msg) else: with measured_value_file_handler: @@ -814,10 +868,11 @@ def read_measured_value_from_file(location: str) -> float: try: result = float(line) except ValueError: - err_msg = ("The following line, in the file located " - "at %s, does not represent a valid " - "measured value of a path: %s" % - (location, line)) + err_msg = ( + "The following line, in the file located " + "at %s, does not represent a valid " + "measured value of a path: %s" % (location, line) + ) raise GameTimeError(err_msg) return result diff --git a/src/path_analyzer.py b/src/path_analyzer.py index d7f16af3..c24773fe 100644 --- a/src/path_analyzer.py +++ b/src/path_analyzer.py @@ -8,9 +8,18 @@ from smt_solver.extract_labels import find_labels from smt_solver.smt import run_smt + class PathAnalyzer(object): - def __init__(self, preprocessed_path: str, project_config: ProjectConfiguration, dag: Dag, path: Path, path_name: str, repeat: int = 1): + def __init__( + self, + preprocessed_path: str, + project_config: ProjectConfiguration, + dag: Dag, + path: Path, + path_name: str, + repeat: int = 1, + ): """ used to run the entire simulation on the given path. @@ -24,16 +33,17 @@ def __init__(self, preprocessed_path: str, project_config: ProjectConfiguration, path : Path object corresponding to the path to drive path_name : - all output files will be in folder with path_name; all generated files will have name path_name + "-gt" + all output files will be in folder with path_name; all generated files will have name path_name + config.TEMP_SUFFIX """ - + self.preprocessed_path: str = preprocessed_path self.project_config: ProjectConfiguration = project_config self.dag = dag self.path: Path = path - self.output_folder: str = os.path.join(self.project_config.location_temp_dir, path_name) + self.output_folder: str = os.path.join( + self.project_config.location_temp_dir, path_name + ) self.path_name: str = path_name - self.output_name: str = f'{path_name}-gt' file_helper.create_dir(self.output_folder) self.measure_folders: dict[str, str] = {} bitcode = [] @@ -45,23 +55,46 @@ def __init__(self, preprocessed_path: str, project_config: ProjectConfiguration, with open(all_labels_file, "r") as out_file: lines = out_file.readlines() - all_labels_file = os.path.join(project_config.location_temp_dir, "labels_0.txt") total_num_labels = 0 - number_line_pattern = re.compile(r'^\s*\d+\s*$') + number_line_pattern = re.compile(r"^\s*\d+\s*$") with open(all_labels_file, "r") as out_file: - for line_number, line in enumerate(out_file, 1): # Using enumerate to get line number - if number_line_pattern.match(line): # Check if the line matches the pattern + for line_number, line in enumerate( + out_file, 1 + ): # Using enumerate to get line number + if number_line_pattern.match( + line + ): # Check if the line matches the pattern total_num_labels += 1 else: - raise ValueError(f"Error on line {line_number}: '{line.strip()}' is not a valid line with exactly one number.") + raise ValueError( + f"Error on line {line_number}: '{line.strip()}' is not a valid line with exactly one number." + ) - - self.is_valid = run_smt(self.project_config, labels_file, self.output_folder, total_num_labels) + # Store data needed for feasibility checking + self.labels_file = labels_file + self.total_num_labels = total_num_labels + self.is_valid = None # Will be set by check_feasibility() self.values_filepath = f"{self.output_folder}/klee_input_0_values.txt" self.repeat = repeat + def check_feasibility(self) -> bool: + """ + Check if the path is feasible using KLEE/SMT solver. + + Returns: + bool: True if the path is feasible, False otherwise. + """ + if self.is_valid is None: + self.is_valid = run_smt( + self.project_config, + self.labels_file, + self.output_folder, + self.total_num_labels, + ) + return self.is_valid + def measure_path(self, backend: Backend) -> int: """ run the entire simulation on the given path @@ -73,8 +106,12 @@ def measure_path(self, backend: Backend) -> int: Returns: the total measurement of path given by backend """ - if not self.is_valid: - return float('inf') + # Ensure feasibility has been checked + if self.is_valid is None: + self.check_feasibility() + + if self.is_valid is False: + return float("inf") temp_folder_backend: str = os.path.join(self.output_folder, backend.name) if backend.name not in self.measure_folders.keys(): @@ -83,6 +120,7 @@ def measure_path(self, backend: Backend) -> int: file_helper.create_dir(temp_folder_backend) measured_values = [] for _ in range(self.repeat): - measured_values.append(backend.measure(self.values_filepath, temp_folder_backend)) + measured_values.append( + backend.measure(self.values_filepath, temp_folder_backend) + ) return max(measured_values) - diff --git a/src/path_generator.py b/src/path_generator.py index 61084c5d..1003a9ea 100644 --- a/src/path_generator.py +++ b/src/path_generator.py @@ -20,6 +20,7 @@ from gametime_error import GameTimeError from nx_helper import Dag from path import Path +from path_analyzer import PathAnalyzer class PathType(object): @@ -45,62 +46,79 @@ def get_description(path_type): """ Parameters: - path_type : + path_type : One of the predefined path types - + Returns: One-word description of the path type provided. """ - return ("worst" if path_type is PathType.WORST_CASE else - "best" if path_type is PathType.BEST_CASE else - "random" if path_type is PathType.RANDOM else - "all-dec" if path_type is PathType.ALL_DECREASING else - "all-inc" if path_type is PathType.ALL_INCREASING else "") + return ( + "worst" + if path_type is PathType.WORST_CASE + else ( + "best" + if path_type is PathType.BEST_CASE + else ( + "random" + if path_type is PathType.RANDOM + else ( + "all-dec" + if path_type is PathType.ALL_DECREASING + else "all-inc" if path_type is PathType.ALL_INCREASING else "" + ) + ) + ) + ) class PathGenerator(object): """ Exposes static methods to generate objects of the ``Path`` class that represent different types of feasible paths in the code being analyzed. - + This class is closely related to the ``Analyzer`` class: except for the private helper methods, all of the static methods can also be accessed as instance methods of an ``Analyzer`` object. - + These methods are maintained in this class to keep the codebase cleaner and more modular. Instances of this class should not need to be made. """ @staticmethod - def generate_paths(analyzer, num_paths=10, path_type=PathType.WORST_CASE, - interval=None, use_ob_extraction=False): + def generate_paths( + analyzer, + num_paths=10, + path_type=PathType.WORST_CASE, + interval=None, + use_ob_extraction=False, + ): """ Generates a list of feasible paths of the code being analyzed, each represented by an object of the ``Path`` class. - + The type of the generated paths is determined by the path_type argument, which is a class variable of the ``PathType`` class. By default, this argument is ``PathType.WORST_CASE``. For a description of the types, refer to the documentation of the ``PathType`` class. - + The ``num_paths`` argument is an upper bound on how many paths should - be generated, which is 5 by default. This argument is ignored if + be generated, which is 10 by default. This argument is ignored if this method is used to generate all of the feasible paths of the code being analyzed. - + The ``interval`` argument is an ``Interval`` object that represents the interval of values that the generated paths can have. If no ``Interval`` object is provided, the interval of values is considered to be all real numbers. - + This method is idempotent: a second call to this method will produce the same list of ``Path`` objects as the first call, assuming that nothing has changed between the two calls. - + Precondition: The basis ``Path`` objects of the input ``Analyzer`` object have values associated with them. Refer to either the method ``loadBasisValuesFromFile`` or the method ``loadBasisValues`` in @@ -134,58 +152,71 @@ def generate_paths(analyzer, num_paths=10, path_type=PathType.WORST_CASE, paths = None if path_type == PathType.WORST_CASE: - logger.info("Generating %d worst-case feasible paths..." % - num_paths) - paths = \ - PathGenerator._generate_paths(analyzer, num_paths, - pulp_helper.Extremum.LONGEST, - interval, use_ob_extraction) + logger.info("Generating %d worst-case feasible paths..." % num_paths) + paths = PathGenerator._generate_paths( + analyzer, + num_paths, + pulp_helper.Extremum.LONGEST, + interval, + use_ob_extraction, + ) elif path_type == PathType.BEST_CASE: - logger.info("Generating %d best-case feasible paths..." % - num_paths) - paths = \ - PathGenerator._generate_paths(analyzer, num_paths, - pulp_helper.Extremum.SHORTEST, - interval, use_ob_extraction) + logger.info("Generating %d best-case feasible paths..." % num_paths) + paths = PathGenerator._generate_paths( + analyzer, + num_paths, + pulp_helper.Extremum.SHORTEST, + interval, + use_ob_extraction, + ) if paths is not None: - logger.info("%d of %d paths have been generated." % - (len(paths), num_paths)) + logger.info("%d of %d paths have been generated." % (len(paths), num_paths)) return paths if path_type == PathType.ALL_DECREASING: - logger.info("Generating all feasible paths in decreasing order " - "of value...") - paths = \ - PathGenerator._generate_paths(analyzer, analyzer.dag.num_paths, - pulp_helper.Extremum.LONGEST, - interval, use_ob_extraction) + logger.info( + "Generating all feasible paths in decreasing order " "of value..." + ) + paths = PathGenerator._generate_paths( + analyzer, + analyzer.dag.num_paths, + pulp_helper.Extremum.LONGEST, + interval, + use_ob_extraction, + ) elif path_type == PathType.ALL_INCREASING: - logger.info("Generating all feasible paths in increasing order " - "of value...") - paths = \ - PathGenerator._generate_paths(analyzer, analyzer.dag.num_paths, - pulp_helper.Extremum.SHORTEST, - interval, use_ob_extraction) + logger.info( + "Generating all feasible paths in increasing order " "of value..." + ) + paths = PathGenerator._generate_paths( + analyzer, + analyzer.dag.num_paths, + pulp_helper.Extremum.SHORTEST, + interval, + use_ob_extraction, + ) if paths is not None: logger.info("%d feasible paths have been generated." % len(paths)) return paths if path_type == PathType.RANDOM: logger.info("Generating random feasible paths...") - paths = \ - PathGenerator._generate_paths(analyzer, num_paths, None, interval) - logger.info("%d of %d paths have been generated." % - (len(paths), num_paths)) + paths = PathGenerator._generate_paths(analyzer, num_paths, None, interval) + logger.info("%d of %d paths have been generated." % (len(paths), num_paths)) return paths else: raise GameTimeError("Unrecognized path type: %d" % path_type) @staticmethod - def _generate_paths(analyzer, num_paths, - extremum=pulp_helper.Extremum.LONGEST, - interval=None, use_ob_extraction=False): + def _generate_paths( + analyzer, + num_paths, + extremum=pulp_helper.Extremum.LONGEST, + interval=None, + use_ob_extraction=False, + ): """ - Helper static method for the ``generatePaths`` static method. + Helper static method for the ``generate_paths`` static method. Generates a list of feasible paths of the code being analyzed, each represented by an object of the ``Path`` class. @@ -227,66 +258,85 @@ def _generate_paths(analyzer, num_paths, logger.info("Using the new algorithm to extract the longest path") logger.info("Finding Least Compatible Delta") mu_max = pulp_helper.find_least_compatible_mu_max( - analyzer, analyzer.basis_paths) - logger.info("Found the least mu_max compatible with measurements: " - "%.2f in %.2f seconds" % - (mu_max, time.perf_counter()- before_time)) + analyzer, analyzer.basis_paths + ) + logger.info( + "Found the least mu_max compatible with measurements: " + "%.2f in %.2f seconds" % (mu_max, time.perf_counter() - before_time) + ) analyzer.inferred_mu_max = mu_max before_time = time.perf_counter() logger.info("Calculating error bounds in the estimate") - analyzer.error_scale_factor, path, ilp_problem = \ + analyzer.error_scale_factor, path, ilp_problem = ( pulp_helper.find_worst_expressible_path( - analyzer, analyzer.basis_paths, 0) + analyzer, analyzer.basis_paths, 0 + ) + ) logger.info( - "Total maximal error in estimates is 2 x %.2f x %.2f = %.2f" % - (analyzer.error_scale_factor, mu_max, - 2 * analyzer.error_scale_factor * mu_max)) - logger.info("Calculated in %.2f ms" % (time.perf_counter()- before_time)) + "Total maximal error in estimates is 2 x %.2f x %.2f = %.2f" + % ( + analyzer.error_scale_factor, + mu_max, + 2 * analyzer.error_scale_factor * mu_max, + ) + ) + logger.info("Calculated in %.2f ms" % (time.perf_counter() - before_time)) else: analyzer.estimate_edge_weights() result_paths = [] current_path_num, num_paths_unsat, num_candidate_paths = 0, 0, 0 - while (current_path_num < num_paths and - num_candidate_paths < analyzer.dag.num_paths): - logger.info("Currently generating path %d..." % - (current_path_num+1)) - logger.info("So far, %d candidate paths were found to be " - "unsatisfiable." % num_paths_unsat) + while ( + current_path_num < num_paths + and num_candidate_paths < analyzer.dag.num_paths + ): + logger.info("Currently generating path %d..." % (current_path_num + 1)) + logger.info( + "So far, %d candidate paths were found to be " + "unsatisfiable." % num_paths_unsat + ) if analyzer.path_dimension == 1: - warn_msg = ("Basis matrix has dimensions 1x1. " - "There is only one path through the function " - "under analysis.") + warn_msg = ( + "Basis matrix has dimensions 1x1. " + "There is only one path through the function " + "under analysis." + ) logger.warning(warn_msg) - logger.info("Finding a candidate path using an integer " - "linear program...") + logger.info( + "Finding a candidate path using an integer " "linear program..." + ) logger.info("") if extremum is None: source, sink = analyzer.dag.source, analyzer.dag.sink - candidate_path_nodes = nx_helper.get_random_path(analyzer.dag, - source, sink) + candidate_path_nodes = nx_helper.get_random_path( + analyzer.dag, source, sink + ) candidate_path_edges = Dag.get_edges(candidate_path_nodes) analyzer.add_path_bundled_constraint(candidate_path_edges) if use_ob_extraction: - candidate_path_nodes, ilp_problem = \ + candidate_path_nodes, ilp_problem = ( pulp_helper.find_longest_path_with_delta( - analyzer, analyzer.basis_paths, mu_max, extremum) + analyzer, analyzer.basis_paths, mu_max, extremum + ) + ) else: - candidate_path_nodes, ilp_problem = \ - pulp_helper.find_extreme_path(analyzer, - extremum if extremum is not None - else pulp_helper.Extremum.LONGEST, - interval) + candidate_path_nodes, ilp_problem = pulp_helper.find_extreme_path( + analyzer, + extremum if extremum is not None else pulp_helper.Extremum.LONGEST, + interval, + ) logger.info("") if ilp_problem.obj_val is None: - if (extremum is not None or - num_candidate_paths == analyzer.dag.num_paths): + if ( + extremum is not None + or num_candidate_paths == analyzer.dag.num_paths + ): logger.info("Unable to find a new candidate path.") break elif extremum is None: @@ -296,33 +346,42 @@ def _generate_paths(analyzer, num_paths, continue logger.info("Candidate path found.") - logger.info("Running the candidate path to check feasibility and measure run time") candidate_path_edges = Dag.get_edges(candidate_path_nodes) candidate_path_value = ilp_problem.obj_val - result_path = Path(ilp_problem=ilp_problem, nodes=candidate_path_nodes) - logger.info("Candidate path is feasible.") - result_path.set_measured_value(0) + result_path = Path(ilp_problem=ilp_problem, nodes=candidate_path_nodes) result_path.set_predicted_value(candidate_path_value) - result_paths.append(result_path) - logger.info("Path %d generated." % (current_path_num+1)) - analyzer.add_path_exclusive_constraint(candidate_path_edges) - current_path_num += 1 - num_paths_unsat = 0 - value = analyzer.measure_path(result_path, f'feasible-path{current_path_num}') - - if value < float('inf'): + # Check path feasibility using KLEE/SMT solver + logger.info("Checking feasibility...") + path_name = f"feasible-path{current_path_num}" + path_analyzer = PathAnalyzer( + analyzer.preprocessed_path, + analyzer.project_config, + analyzer.dag, + result_path, + path_name, + ) + is_feasible = path_analyzer.check_feasibility() + + if is_feasible: logger.info("Candidate path is feasible.") + + # Measure the path (now that we know it's feasible) + logger.info("Measuring run time...") + result_path.path_analyzer = path_analyzer + result_path.name = path_name + value = path_analyzer.measure_path(analyzer.backend) result_path.set_measured_value(value) - result_path.set_predicted_value(candidate_path_value) result_paths.append(result_path) - logger.info("Path %d generated." % (current_path_num+1)) + + logger.info("Path %d generated." % (current_path_num + 1)) analyzer.add_path_exclusive_constraint(candidate_path_edges) current_path_num += 1 num_paths_unsat = 0 else: logger.info("Candidate path is infeasible.") + result_path.set_measured_value(float("inf")) analyzer.add_path_exclusive_constraint(candidate_path_edges) logger.info("Constraint added.") num_paths_unsat += 1 @@ -330,11 +389,11 @@ def _generate_paths(analyzer, num_paths, num_candidate_paths += 1 if extremum is None: analyzer.reset_path_bundled_constraints() - logger.info("") - logger.info("") analyzer.reset_path_exclusive_constraints() - logger.info("Time taken to generate paths: %.2f seconds." % - (time.perf_counter() - start_time)) - return result_paths \ No newline at end of file + logger.info( + "Time taken to generate paths: %.2f seconds." + % (time.perf_counter() - start_time) + ) + return result_paths diff --git a/src/project_configuration.py b/src/project_configuration.py index 9eae0efd..95f7cdf1 100644 --- a/src/project_configuration.py +++ b/src/project_configuration.py @@ -14,11 +14,19 @@ class DebugConfiguration(object): """ - def __init__(self, keep_cil_temps=False, dump_ir=False, - keep_ilp_solver_output=False, dump_instruction_trace=False, - dump_path=False, dump_all_paths=False, dump_smt_trace=False, - dump_all_queries=False, keep_parser_output=False, - keep_simulator_output=False): + def __init__( + self, + keep_cil_temps=False, + dump_ir=False, + keep_ilp_solver_output=False, + dump_instruction_trace=False, + dump_path=False, + dump_all_paths=False, + dump_smt_trace=False, + dump_all_queries=False, + keep_parser_output=False, + keep_simulator_output=False, + ): #: Keep the temporary files that CIL generates during its analysis. self.KEEP_CIL_TEMPS = keep_cil_temps @@ -62,21 +70,38 @@ class ProjectConfiguration(object): Stores information necessary to configure a GameTime project. """ - def __init__(self, location_file, func, location_additional_files=None, - start_label="", end_label="", included=None, merged=None, - inlined=None, unroll_loops=False, randomize_initial_basis=False, - maximum_error_scale_factor=10, - determinant_threshold=0.001, max_infeasible_paths=100, - model_as_nested_arrays=False, prevent_basis_refinement=False, - ilp_solver_name="", debug_config=None, gametime_flexpret_path="", - gametime_path="", gametime_file_path="", compile_flags=[], backend=""): + def __init__( + self, + location_file, + func, + location_additional_files=None, + start_label="", + end_label="", + included=None, + merged=None, + inlined=None, + unroll_loops=False, + randomize_initial_basis=False, + maximum_error_scale_factor=10, + determinant_threshold=0.001, + max_infeasible_paths=100, + model_as_nested_arrays=False, + prevent_basis_refinement=False, + ilp_solver_name="", + debug_config=None, + gametime_flexpret_path="", + gametime_path="", + gametime_file_path="", + compile_flags=[], + backend="", + ): ### FILE INFORMATION ### # Location of the directory that contains the file to be analyzed. self.location_orig_dir = "" # Location of the file to be analyzed. self.location_orig_file = location_file - + # Location of the additional files to be analyzed. self.location_additional_files = location_additional_files or [] @@ -181,33 +206,29 @@ def __init__(self, location_file, func, location_additional_files=None, self.debug_config = debug_config or DebugConfiguration() ### INITIALIZATION ### - # Infer the file path without the file extension. - location_orig_with_extension = self.location_orig_file - location_orig_no_extension, extension = \ - os.path.splitext(location_orig_with_extension) - - if extension.lower() == ".c": - self.location_orig_no_extension = location_orig_no_extension - else: - err_msg = ("Error running the project configuration " - "reader: the name of the file to analyze " - "does not end with a `.c` extension.") + _, extension = os.path.splitext(self.location_orig_file) + + if extension.lower() != ".c": + err_msg = ( + "Error running the project configuration " + "reader: the name of the file to analyze " + "does not end with a `.c` extension." + ) raise GameTimeError(err_msg) # Infer the directory that contains the file to analyze. - location_orig_dir = os.path.dirname(location_orig_with_extension) + location_orig_dir = os.path.dirname(self.location_orig_file) self.location_orig_dir = location_orig_dir # Infer the name of the file, both with # and without the extension. - name_orig_file = os.path.basename(location_orig_with_extension) + name_orig_file = os.path.basename(self.location_orig_file) self.name_orig_file = name_orig_file self.name_orig_no_extension = os.path.splitext(name_orig_file)[0] # Infer the name of the temporary directory where # GameTime stores its temporary files during its toolflow. - self.location_temp_dir = ("%s%s" % - (location_orig_no_extension, config.TEMP_SUFFIX)) + self.location_temp_dir = f"{self.location_orig_dir}/{config.TEMP_FOLDER}" # Create the temporary directory, if not already present. location_temp_dir = self.location_temp_dir @@ -217,14 +238,14 @@ def __init__(self, location_file, func, location_additional_files=None, # Infer the name and location of the temporary file to be analyzed # by GameTime, both with and without the extension. name_orig_no_extension = self.name_orig_no_extension - name_temp_no_extension = ("%s%s" % - (name_orig_no_extension, config.TEMP_SUFFIX)) + name_temp_no_extension = "%s%s" % (name_orig_no_extension, config.TEMP_SUFFIX) self.name_temp_no_extension = name_temp_no_extension name_temp_file = "%s.c" % name_temp_no_extension self.name_temp_file = name_temp_file - location_temp_file = \ - os.path.normpath(os.path.join(location_temp_dir, name_temp_file)) + location_temp_file = os.path.normpath( + os.path.join(location_temp_dir, name_temp_file) + ) self.location_temp_file = location_temp_file self.location_temp_no_extension = os.path.splitext(location_temp_file)[0] @@ -232,8 +253,9 @@ def __init__(self, location_file, func, location_additional_files=None, # stores the project configuration information. name_xml_file = "%s.xml" % config.TEMP_PROJECT_CONFIG self.name_xml_file = name_xml_file - self.location_xml_file = \ - os.path.normpath(os.path.join(location_temp_dir, name_xml_file)) + self.location_xml_file = os.path.normpath( + os.path.join(location_temp_dir, name_xml_file) + ) # Initialize the PuLP solver object that interfaces with # the ILP solver whose name is provided. @@ -309,4 +331,3 @@ def get_orig_filename_with_extension(self, extension: str, name: str = None) -> filename: str = name + extension orig_filename: str = os.path.join(self.location_orig_dir, filename) return orig_filename - diff --git a/src/project_configuration_parser.py b/src/project_configuration_parser.py index 24b18b3d..4c863e52 100644 --- a/src/project_configuration_parser.py +++ b/src/project_configuration_parser.py @@ -55,7 +55,9 @@ def parse(configuration_file_path: str) -> ProjectConfiguration: """ # Check configuration_file_path exits on the OS if not os.path.exists(configuration_file_path): - err_msg = "Cannot find project configuration file: %s" % configuration_file_path + err_msg = ( + "Cannot find project configuration file: %s" % configuration_file_path + ) raise GameTimeError(err_msg) # Read from configuration_file_path into a dict @@ -64,11 +66,14 @@ def parse(configuration_file_path: str) -> ProjectConfiguration: raw_config = load(raw_file, Loader=Loader) # Check if yaml file contains gametime-project - if 'gametime-project' not in raw_config.keys(): - err_msg = "Cannot find project configuration in file: %s" % configuration_file_path + if "gametime-project" not in raw_config.keys(): + err_msg = ( + "Cannot find project configuration in file: %s" + % configuration_file_path + ) raise GameTimeError(err_msg) - raw_config = raw_config['gametime-project'] + raw_config = raw_config["gametime-project"] # Find the directory that contains the project configuration YAML file. project_config_dir = os.path.dirname(os.path.abspath(configuration_file_path)) @@ -84,19 +89,29 @@ def parse(configuration_file_path: str) -> ProjectConfiguration: determinant_threshold, max_infeasible_paths = 0.001, 100 model_as_nested_arrays, prevent_basis_refinement = False, False ilp_solver_name = "" - gametime_flexpret_path, gametime_path, gametime_file_path = "","","" + gametime_flexpret_path, gametime_path, gametime_file_path = "", "", "" compile_flags = [] backend = "" - # Process information about the file to be analyzed. file_configs: dict[str, Any] = raw_config.get("file", {}) for key in file_configs.keys(): match key: case "location": - location_file = os.path.normpath(os.path.join(project_config_dir, file_configs[key])) + location_file = os.path.normpath( + os.path.join(project_config_dir, file_configs[key]) + ) case "additional-files": - location_additional_files = [os.path.normpath(os.path.join(project_config_dir, additional_file)) for additional_file in file_configs[key]] if file_configs[key] else [] + location_additional_files = ( + [ + os.path.normpath( + os.path.join(project_config_dir, additional_file) + ) + for additional_file in file_configs[key] + ] + if file_configs[key] + else [] + ) case "analysis-function": func = file_configs[key] case "start-label": @@ -114,10 +129,14 @@ def parse(configuration_file_path: str) -> ProjectConfiguration: unroll_loops = bool(preprocess_configs[key]) case "include": if preprocess_configs[key]: - included = get_dir_paths(preprocess_configs[key], project_config_dir) + included = get_dir_paths( + preprocess_configs[key], project_config_dir + ) case "merge": if preprocess_configs[key]: - merged = get_file_paths(preprocess_configs[key], project_config_dir) + merged = get_file_paths( + preprocess_configs[key], project_config_dir + ) case "inline": inlined = preprocess_configs[key] case "compile_flags": @@ -186,22 +205,42 @@ def parse(configuration_file_path: str) -> ProjectConfiguration: case _: warnings.warn("Unrecognized tag : %s" % key, GameTimeWarning) - debug_configuration: DebugConfiguration = DebugConfiguration(keep_cil_temps, dump_ir, - keep_ilp_solver_output, dump_instruction_trace, - dump_path, dump_all_paths, - dump_all_queries, keep_parser_output, - keep_simulator_output) - - project_config: ProjectConfiguration = ProjectConfiguration(location_file, func, location_additional_files, - start_label, end_label, included, - merged, inlined, unroll_loops, - randomize_initial_basis, - maximum_error_scale_factor, - determinant_threshold, max_infeasible_paths, - model_as_nested_arrays, prevent_basis_refinement, - ilp_solver_name, debug_configuration, - gametime_flexpret_path, gametime_path, - gametime_file_path, compile_flags, backend) + debug_configuration: DebugConfiguration = DebugConfiguration( + keep_cil_temps, + dump_ir, + keep_ilp_solver_output, + dump_instruction_trace, + dump_path, + dump_all_paths, + dump_all_queries, + keep_parser_output, + keep_simulator_output, + ) + + project_config: ProjectConfiguration = ProjectConfiguration( + location_file, + func, + location_additional_files, + start_label, + end_label, + included, + merged, + inlined, + unroll_loops, + randomize_initial_basis, + maximum_error_scale_factor, + determinant_threshold, + max_infeasible_paths, + model_as_nested_arrays, + prevent_basis_refinement, + ilp_solver_name, + debug_configuration, + gametime_flexpret_path, + gametime_path, + gametime_file_path, + compile_flags, + backend, + ) logger.info("Successfully loaded project.") logger.info("") return project_config @@ -261,4 +300,6 @@ def get_file_paths(file_paths_str: str, dir_location: str = None) -> List[str]: return result -extension_parser_map: dict[str, Type[ConfigurationParser]] = {".yaml": YAMLConfigurationParser} +extension_parser_map: dict[str, Type[ConfigurationParser]] = { + ".yaml": YAMLConfigurationParser +} diff --git a/src/pulp_helper.py b/src/pulp_helper.py index 8ea8dc23..dc13ec9e 100644 --- a/src/pulp_helper.py +++ b/src/pulp_helper.py @@ -39,30 +39,23 @@ class Extremum(object): #: Name of the integer linear program constructed. -_LP_NAME = "gt-FindExtremePath" +_LP_NAME = "FindExtremePath" #: Dictionary that maps the name of an integer linear programming solver to #: a list of the PuLP solver classes that can interface with the solver. _name_ilp_solver_map = { # Default integer linear programming solver of the PuLP package. - "": ([pulp.LpSolverDefault.__class__] if pulp.LpSolverDefault is not None - else []), - + "": ([pulp.LpSolverDefault.__class__] if pulp.LpSolverDefault is not None else []), # CBC mixed integer linear programming solver. "cbc": [pulp.COIN], - # Version of CBC provided with the PuLP package. "cbc-pulp": [pulp.PULP_CBC_CMD], - # IBM ILOG CPLEX Optimizer. "cplex": [pulp.CPLEX], - # GNU Linear Programming Kit (GLPK). "glpk": [pulp.GLPK, pulp.PYGLPK], - # Gurobi Optimizer. "gurobi": [pulp.GUROBI_CMD, pulp.GUROBI], - # FICO Xpress Optimizer. "xpress": [pulp.XPRESS], } @@ -79,10 +72,10 @@ class Extremum(object): } -def is_ilp_solver_name(name: str)->bool: +def is_ilp_solver_name(name: str) -> bool: """ Parameters: - name: str : + name: str : Possible name of an integer linear programming solver Returns: @@ -104,7 +97,9 @@ def get_ilp_solver_names() -> List[str]: return [name for name in _name_ilp_solver_map.keys() if name != ""] -def get_ilp_solver(ilp_solver_name: str, project_config: ProjectConfiguration) -> Optional[pulp.LpSolver]: +def get_ilp_solver( + ilp_solver_name: str, project_config: ProjectConfiguration +) -> Optional[pulp.LpSolver]: """ Parameters: @@ -124,8 +119,9 @@ def get_ilp_solver(ilp_solver_name: str, project_config: ProjectConfiguration) - keep_ilp_solver_output = project_config.debug_config.KEEP_ILP_SOLVER_OUTPUT for ilp_solver_class in _name_ilp_solver_map[ilp_solver_name]: - ilp_solver = ilp_solver_class(keepFiles=keep_ilp_solver_output, - msg=keep_ilp_solver_output) + ilp_solver = ilp_solver_class( + keepFiles=keep_ilp_solver_output, msg=keep_ilp_solver_output + ) if ilp_solver.available(): return ilp_solver return None @@ -196,9 +192,11 @@ def __init__(self, *args, **kwargs): self.obj_val = None -def _get_edge_flow_var(analyzer: Analyzer, - edge_flow_vars: Dict[int, pulp.LpVariable], - edge: Tuple[str, str]) -> pulp.LpVariable: +def _get_edge_flow_var( + analyzer: Analyzer, + edge_flow_vars: Dict[int, pulp.LpVariable], + edge: Tuple[str, str], +) -> pulp.LpVariable: """ Parameters: analyzer: @@ -211,7 +209,7 @@ def _get_edge_flow_var(analyzer: Analyzer, the input ``Analyzer`` object.) edge: Edge whose corresponding PuLP variable is needed. - + Returns: PuLP variable that corresponds to the input edge. @@ -220,9 +218,11 @@ def _get_edge_flow_var(analyzer: Analyzer, return edge_flow_vars[analyzer.dag.edges_indices[edge]] -def _get_edge_flow_vars(analyzer: Analyzer, - edge_flow_vars: Dict[int, pulp.LpVariable], - edges: List[Tuple[str, str]]) -> List[pulp.LpVariable]: +def _get_edge_flow_vars( + analyzer: Analyzer, + edge_flow_vars: Dict[int, pulp.LpVariable], + edges: List[Tuple[str, str]], +) -> List[pulp.LpVariable]: """ Parameters: @@ -236,7 +236,7 @@ def _get_edge_flow_vars(analyzer: Analyzer, the input analyzer.) edges: List of edges whose corresponding PuLP variables are needed. - + Returns: List of the PuLP variables that correspond to each of @@ -285,14 +285,15 @@ def find_least_compatible_mu_max(analyzer: Analyzer, paths): problem = IlpProblem(_LP_NAME) logger.info("Creating variables") - # Set up the variables that correspond to weights of each edge. + # Set up the variables that correspond to weights of each edge. # Each edge is restricted to be a nonnegative real number edge_weights = pulp.LpVariable.dicts("we", range(0, num_edges), 0) # Create the variable that shall correspond to the least delta delta = pulp.LpVariable("delta", 0) for path in paths: - path_weights = \ - _get_edge_flow_vars(analyzer, edge_weights, dag.get_edges(path.nodes)) + path_weights = _get_edge_flow_vars( + analyzer, edge_weights, dag.get_edges(path.nodes) + ) problem += pulp.lpSum(path_weights) <= delta + path.measured_value problem += pulp.lpSum(path_weights) >= -delta + path.measured_value print("LENGTH:", path.measured_value) @@ -318,8 +319,7 @@ def find_least_compatible_mu_max(analyzer: Analyzer, paths): # compact -def find_longest_path_with_delta(analyzer, paths, delta, - extremum=Extremum.LONGEST): +def find_longest_path_with_delta(analyzer, paths, delta, extremum=Extremum.LONGEST): """ This functions finds the longest/shortest path compatible with the measured lengths of paths, as given in 'paths', such the actual @@ -336,7 +336,7 @@ def find_longest_path_with_delta(analyzer, paths, delta, the maximal limit by which the length of a measured path is allowed to differ from the measured value extremum: - Specifies whether we are calculating Extremum.LONGEST or + Specifies whether we are calculating Extremum.LONGEST or Extremum.SHORTEST Returns: @@ -348,9 +348,13 @@ def find_longest_path_with_delta(analyzer, paths, delta, # ILP due to floating-point issues delta *= 1.01 val, result_path, problem = generate_and_solve_core_problem( - analyzer, paths, (lambda path: path.measured_value + delta), + analyzer, + paths, + (lambda path: path.measured_value + delta), (lambda path: path.measured_value - delta), - True, extremum=extremum) + True, + extremum=extremum, + ) return result_path, problem @@ -365,7 +369,7 @@ def make_compact(dag): dag: The graph that get compactified - Returns: + Returns: A mapping (vertex, vertex) -> edge_number so that the edge (vertex, vertex) in the original graph 'dag' gets mapped to the edge with number 'edge_number'. All edges on a simple path @@ -385,7 +389,8 @@ def dfs(node, edge_index): index = node neighbors = dag.neighbors(node) - if len(neighbors) == 0: return + if len(neighbors) == 0: + return if (len(neighbors) == 1) and (len(dag.predecessors(node)) == 1): # edge get compactified edge_map[(node, neighbors[0])] = edge_index @@ -404,9 +409,15 @@ def dfs(node, edge_index): return edge_map -def generate_and_solve_core_problem(analyzer, paths, path_function_upper, - path_function_lower, weights_positive, - print_problem=False, extremum=Extremum.LONGEST): +def generate_and_solve_core_problem( + analyzer, + paths, + path_function_upper, + path_function_lower, + weights_positive, + print_problem=False, + extremum=Extremum.LONGEST, +): """ This function actually constructs the ILP to find the longest path in the graph specified by 'analyzer' using the set of measured paths given @@ -436,10 +447,10 @@ def generate_and_solve_core_problem(analyzer, paths, path_function_upper, Boolean value used for debugging. If set to true, the generated ILP is printed. extremum: - Specifies whether we are calculating Extremum.LONGEST or + Specifies whether we are calculating Extremum.LONGEST or Extremum.SHORTEST - Returns: + Returns: Triple consisting of the length of the longest path found, the actual path and the ILP problem generated. @@ -470,7 +481,8 @@ def generate_and_solve_core_problem(analyzer, paths, path_function_upper, # Take M to be the maximum edge length. Add 1.0 to make sure there are # no problems due to rounding errors. m = max([path_function_upper(path) for path in paths] + [0]) + 1.0 - if not weights_positive: m *= num_edges + if not weights_positive: + m *= num_edges logger.info("Using value %.2f for M --- the maximum edge weight" % m) logger.info("Creating variables") @@ -482,10 +494,12 @@ def generate_and_solve_core_problem(analyzer, paths, path_function_upper, # Set up the variables that correspond to the flow through each edge. # Set each of the variables to be an integer binary variable. - edge_flows = pulp.LpVariable.dicts("EdgeFlow", range(0, new_edges), - 0, 1, pulp.LpBinary) + edge_flows = pulp.LpVariable.dicts( + "EdgeFlow", range(0, new_edges), 0, 1, pulp.LpBinary + ) edge_weights = pulp.LpVariable.dicts( - "we", range(0, new_edges), 0 if weights_positive else -m, m) + "we", range(0, new_edges), 0 if weights_positive else -m, m + ) # for a given 'path' in the original DAG returns the edgeFlow variables # corresponding to the edges along the same path in the compact DAG. @@ -495,15 +509,13 @@ def get_new_indices(compact, edge_flows, path): return path_weights for path in paths: - path_weights = \ - get_new_indices(compact, edge_weights, dag.get_edges(path.nodes)) + path_weights = get_new_indices(compact, edge_weights, dag.get_edges(path.nodes)) problem += pulp.lpSum(path_weights) <= path_function_upper(path) problem += pulp.lpSum(path_weights) >= path_function_lower(path) # Add a constraint for the flow from the source. The flow through all_temp_files of # the edges out of the source should sum up to exactly 1. - edge_flows_from_source = \ - get_new_indices(compact, edge_flows, dag.out_edges(source)) + edge_flows_from_source = get_new_indices(compact, edge_flows, dag.out_edges(source)) problem += pulp.lpSum(edge_flows_from_source) == 1, "Flows from source" # Add constraints for the rest of the nodes (except sink). The flow @@ -514,18 +526,16 @@ def get_new_indices(compact, edge_flows, path): for node in nodes_except_source_sink: if (dag.neighbors(node) == 1) and (dag.predecessors(node) == 1): continue - edge_flows_to_node = get_new_indices(compact, edge_flows, - dag.in_edges(node)) - edge_flows_from_node = get_new_indices(compact, edge_flows, - dag.out_edges(node)) - problem += \ - (pulp.lpSum(edge_flows_to_node) == pulp.lpSum(edge_flows_from_node), - "Flows through %s" % node) + edge_flows_to_node = get_new_indices(compact, edge_flows, dag.in_edges(node)) + edge_flows_from_node = get_new_indices(compact, edge_flows, dag.out_edges(node)) + problem += ( + pulp.lpSum(edge_flows_to_node) == pulp.lpSum(edge_flows_from_node), + "Flows through %s" % node, + ) # Add a constraint for the flow to the sink. The flow through all_temp_files of # the edges into the sink should sum up to exactly 1. - edge_flows_to_sink = get_new_indices(compact, edge_flows, - dag.in_edges(sink)) + edge_flows_to_sink = get_new_indices(compact, edge_flows, dag.in_edges(sink)) problem += pulp.lpSum(edge_flows_to_sink) == 1, "Flows to sink" # Add constraints for the exclusive path constraints. To ensure that @@ -536,9 +546,10 @@ def get_new_indices(compact, edge_flows, path): # This way, all_temp_files three of these edges can never be taken together. for constraint_num, path in enumerate(path_exclusive_constraints): edge_flows_in_constraint = get_new_indices(compact, edge_flows, path) - problem += (pulp.lpSum(edge_flows_in_constraint) <= - (len(edge_flows_in_constraint) - 1), - "Path exclusive constraint %d" % (constraint_num + 1)) + problem += ( + pulp.lpSum(edge_flows_in_constraint) <= (len(edge_flows_in_constraint) - 1), + "Path exclusive constraint %d" % (constraint_num + 1), + ) # Each product_vars[index] in the longest path should correspond to # edge_flows[index] * edge_weights[index] @@ -548,7 +559,9 @@ def get_new_indices(compact, edge_flows, path): problem += product_vars[index] <= edge_weights[index] problem += product_vars[index] <= m * edge_flows[index] else: - problem += product_vars[index] >= edge_weights[index] - m * (1.0 - edge_flows[index]) + problem += product_vars[index] >= edge_weights[index] - m * ( + 1.0 - edge_flows[index] + ) problem += product_vars[index] >= 0 objective = pulp.lpSum(product_vars) @@ -563,7 +576,8 @@ def get_new_indices(compact, edge_flows, path): problem.sense = pulp.LpMinimize problem_status = problem.solve(solver=project_config.ilp_solver) - if print_problem: logger.info(problem) + if print_problem: + logger.info(problem) if problem_status != pulp.LpStatusOptimal: logger.info("Maximum value not found.") @@ -575,17 +589,26 @@ def get_new_indices(compact, edge_flows, path): logger.info("Finding the path that corresponds to the maximum value...") # Determine the edges along the extreme path using the solution. - max_path = [edges[edge_num] for edge_num in edge_flows - if edge_flows[edge_num].value() > 0.1] + max_path = [ + edges[edge_num] for edge_num in edge_flows if edge_flows[edge_num].value() > 0.1 + ] logger.info("Path found.") - total_length = sum([product_vars[edge_num].value() for edge_num in edge_flows - if edge_flows[edge_num].value() == 1]) + total_length = sum( + [ + product_vars[edge_num].value() + for edge_num in edge_flows + if edge_flows[edge_num].value() == 1 + ] + ) logger.info("Total length of the path %.2f" % total_length) obj_val_max = total_length - max_path = [edge_num for edge_num in range(0, new_edges) - if edge_flows[edge_num].value() > 0.1] + max_path = [ + edge_num + for edge_num in range(0, new_edges) + if edge_flows[edge_num].value() > 0.1 + ] extreme_path = [] # reverse extreme_path according to the compact edgeMap for edge in max_path: @@ -596,8 +619,9 @@ def get_new_indices(compact, edge_flows, path): # from source to sink. result_path = [] - logger.info("Arranging the nodes along the chosen extreme path " - "in order of traversal...") + logger.info( + "Arranging the nodes along the chosen extreme path " "in order of traversal..." + ) # To do so, first construct a dictionary mapping a node along the path # to the edge from that node. extreme_path_dict = {} @@ -635,13 +659,14 @@ def find_worst_expressible_path(analyzer, paths, bound): bound: ??? - Returns: + Returns: Triple consisting of the length of the longsest path, the path itself and the ILP solved to find the path. """ return generate_and_solve_core_problem( - analyzer, paths, (lambda x: 1), (lambda x: -1), False) + analyzer, paths, (lambda x: 1), (lambda x: -1), False + ) def find_goodness_of_fit(analyzer, paths, basis): @@ -701,8 +726,7 @@ def find_goodness_of_fit(analyzer, paths, basis): problem += pulp.lpSum(all_coeff_expressing) <= bound # express path i as a linear combination of basis paths for edge in edges: - paths_containing_edge = \ - [j for j in range(num_basis) if (edge in basis[j])] + paths_containing_edge = [j for j in range(num_basis) if (edge in basis[j])] present_coeffs = [coeffs[(i, j)] for j in paths_containing_edge] present = 1 if (edge in paths[i]) else 0 problem += pulp.lpSum(present_coeffs) == present @@ -764,8 +788,7 @@ def find_minimal_overcomplete_basis(analyzer: Analyzer, paths, k): indices = [(i, j) for i in range(num_paths) for j in range(num_paths)] coeffs = pulp.LpVariable.dicts("c", indices, -k, k) abs_values = pulp.LpVariable.dicts("abs", indices, 0, k) - used_paths = pulp.LpVariable.dicts( - "used", range(num_paths), 0, 1, pulp.LpBinary) + used_paths = pulp.LpVariable.dicts("used", range(num_paths), 0, 1, pulp.LpBinary) logger.info("Adding used_paths") for i in range(num_paths): @@ -783,8 +806,7 @@ def find_minimal_overcomplete_basis(analyzer: Analyzer, paths, k): all_coeff_expressing = [abs_values[(i, j)] for j in range(num_paths)] problem += pulp.lpSum(all_coeff_expressing) <= k for edge in edges: - paths_containing_edge = \ - [j for j in range(num_paths) if (edge in paths[j])] + paths_containing_edge = [j for j in range(num_paths) if (edge in paths[j])] present_coeffs = [coeffs[(i, j)] for j in paths_containing_edge] present = 1 if (edge in paths[i]) else 0 problem += pulp.lpSum(present_coeffs) == present @@ -800,8 +822,9 @@ def find_minimal_overcomplete_basis(analyzer: Analyzer, paths, k): logger.info("Minimum value found: %g" % obj_val_min) - solution_paths = \ - [index for index in range(num_paths) if used_paths[index].value() == 1] + solution_paths = [ + index for index in range(num_paths) if used_paths[index].value() == 1 + ] return solution_paths @@ -822,13 +845,13 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): is provided, the interval of values is considered to be all_temp_files real numbers. - Returns: + Returns: Tuple whose first element is the longest or the shortest path through the DAG, as a list of nodes along the path (ordered by traversal from source to sink), and whose second element is the integer linear programming problem that was solved to obtain the path, as an object of the ``IlpProblem`` class. - + If no such path is feasible, given the constraints stored in the ``Analyzer`` object and the ``Interval`` object provided, the first element of the tuple is an empty list, and the second @@ -860,13 +883,15 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): # Set up the variables that correspond to the flow through each edge. # Set each of the variables to be an integer binary variable. - edge_flows = pulp.LpVariable.dicts("EdgeFlow", range(0, num_edges), - 0, 1, pulp.LpBinary) + edge_flows = pulp.LpVariable.dicts( + "EdgeFlow", range(0, num_edges), 0, 1, pulp.LpBinary + ) # Add a constraint for the flow from the source. The flow through all_temp_files of # the edges out of the source should sum up to exactly 1. - edge_flows_from_source = _get_edge_flow_vars(analyzer, edge_flows, - dag.out_edges(source)) + edge_flows_from_source = _get_edge_flow_vars( + analyzer, edge_flows, dag.out_edges(source) + ) problem += pulp.lpSum(edge_flows_from_source) == 1, "Flows from source" # Add constraints for the rest of the nodes (except sink). The flow @@ -875,18 +900,20 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): # enter a node, and e_k and e_l exit a node, the corresponding flow # equation is e_i + e_j = e_k + e_l. for node in nodes_except_source_sink: - edge_flows_to_node = _get_edge_flow_vars(analyzer, edge_flows, - dag.in_edges(node)) - edge_flows_from_node = _get_edge_flow_vars(analyzer, edge_flows, - dag.out_edges(node)) - problem += \ - (pulp.lpSum(edge_flows_to_node) == pulp.lpSum(edge_flows_from_node), - "Flows through %s" % node) + edge_flows_to_node = _get_edge_flow_vars( + analyzer, edge_flows, dag.in_edges(node) + ) + edge_flows_from_node = _get_edge_flow_vars( + analyzer, edge_flows, dag.out_edges(node) + ) + problem += ( + pulp.lpSum(edge_flows_to_node) == pulp.lpSum(edge_flows_from_node), + "Flows through %s" % node, + ) # Add a constraint for the flow to the sink. The flow through all_temp_files of # the edges into the sink should sum up to exactly 1. - edge_flows_to_sink = _get_edge_flow_vars(analyzer, edge_flows, - dag.in_edges(sink)) + edge_flows_to_sink = _get_edge_flow_vars(analyzer, edge_flows, dag.in_edges(sink)) problem += pulp.lpSum(edge_flows_to_sink) == 1, "Flows to sink" # Add constraints for the exclusive path constraints. To ensure that @@ -897,8 +924,10 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): # This way, all_temp_files three of these edges can never be taken together. for constraint_num, path in enumerate(path_exclusive_constraints): edge_flows_in_constraint = _get_edge_flow_vars(analyzer, edge_flows, path) - problem += (pulp.lpSum(edge_flows_in_constraint) <= (len(path) - 1), - "Path exclusive constraint %d" % (constraint_num + 1)) + problem += ( + pulp.lpSum(edge_flows_in_constraint) <= (len(path) - 1), + "Path exclusive constraint %d" % (constraint_num + 1), + ) # Add constraints for the bundled path constraints. If a constraint # contains edges e_a, e_b, e_c, e_d, and each edge *must* be taken, @@ -910,9 +939,10 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): first_edge = path[0] first_edge_flow = _get_edge_flow_var(analyzer, edge_flows, first_edge) edge_flows_for_rest = _get_edge_flow_vars(analyzer, edge_flows, path[1:]) - problem += \ - (pulp.lpSum(edge_flows_for_rest) == (len(path) - 1) * first_edge_flow, - "Path bundled constraint %d" % (constraint_num + 1)) + problem += ( + pulp.lpSum(edge_flows_for_rest) == (len(path) - 1) * first_edge_flow, + "Path bundled constraint %d" % (constraint_num + 1), + ) # There may be bounds on the values of the paths that are generated # by this function: we add constraints for these bounds. For this, @@ -924,11 +954,9 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): weighted_edge_flow_vars.append(edge_weight * edge_flow_var) interval = interval or Interval() if interval.has_finite_lower_bound(): - problem += \ - (pulp.lpSum(weighted_edge_flow_vars) >= interval.lower_bound) + problem += pulp.lpSum(weighted_edge_flow_vars) >= interval.lower_bound if interval.has_finite_upper_bound(): - problem += \ - (pulp.lpSum(weighted_edge_flow_vars) <= interval.upper_bound) + problem += pulp.lpSum(weighted_edge_flow_vars) <= interval.upper_bound logger.info("Variables created and constraints added.") @@ -943,7 +971,9 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): logger.info("Finding the maximum value of the objective function...") problem.sense = pulp.LpMaximize - problem_status = problem.solve(solver=get_ilp_solver(project_config.ilp_solver, project_config)) + problem_status = problem.solve( + solver=get_ilp_solver(project_config.ilp_solver, project_config) + ) if problem_status != pulp.LpStatusOptimal: logger.info("Maximum value not found.") return [], problem @@ -954,13 +984,19 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): logger.info("Finding the path that corresponds to the maximum value...") # Determine the edges along the extreme path using the solution. - max_path = [edges[edge_num] for edge_num in edge_flows.keys() if (edge_flows[edge_num].value() == 1)] + max_path = [ + edges[edge_num] + for edge_num in edge_flows.keys() + if (edge_flows[edge_num].value() == 1) + ] logger.info("Path found.") logger.info("Finding the minimum value of the objective function...") problem.sense = pulp.LpMinimize - problem_status = problem.solve(solver=get_ilp_solver(project_config.ilp_solver, project_config)) + problem_status = problem.solve( + solver=get_ilp_solver(project_config.ilp_solver, project_config) + ) if problem_status != pulp.LpStatusOptimal: logger.info("Minimum value not found.") return [], problem @@ -970,8 +1006,9 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): logger.info("Finding the path that corresponds to the minimum value...") # Determine the edges along the extreme path using the solution. - min_path = [edges[edge_num] for edge_num in edge_flows - if edge_flows[edge_num].value() == 1] + min_path = [ + edges[edge_num] for edge_num in edge_flows if edge_flows[edge_num].value() == 1 + ] logger.info("Path found.") # Choose the correct extreme path based on the optimal solutions @@ -979,21 +1016,20 @@ def find_extreme_path(analyzer, extremum=Extremum.LONGEST, interval=None): abs_max, abs_min = abs(obj_val_max), abs(obj_val_min) if extremum is Extremum.LONGEST: extreme_path = max_path if abs_max >= abs_min else min_path - problem.sense = (pulp.LpMaximize if abs_max >= abs_min - else pulp.LpMinimize) + problem.sense = pulp.LpMaximize if abs_max >= abs_min else pulp.LpMinimize problem.obj_val = max(abs_max, abs_min) elif extremum is Extremum.SHORTEST: extreme_path = min_path if abs_max >= abs_min else max_path - problem.sense = (pulp.LpMinimize if abs_max >= abs_min - else pulp.LpMaximize) + problem.sense = pulp.LpMinimize if abs_max >= abs_min else pulp.LpMaximize problem.obj_val = min(abs_max, abs_min) # Arrange the nodes along the extreme path in order of traversal # from source to sink. result_path = [] - logger.info("Arranging the nodes along the chosen extreme path " - "in order of traversal...") + logger.info( + "Arranging the nodes along the chosen extreme path " "in order of traversal..." + ) # To do so, first construct a dictionary mapping a node along the path # to the edge from that node. extreme_path_dict = {} @@ -1024,14 +1060,21 @@ def _move_ilp_files(source_dir, dest_dir): to the destination directory whose location is provided. Parameters: - source_dir : + source_dir : Location of the source directory dest_dir : Location of the destination directory """ - move_files([_LP_NAME + r"-pulp\.lp", _LP_NAME + r"-pulp\.mps", - _LP_NAME + r"-pulp\.prt", _LP_NAME + r"-pulp\.sol"], - source_dir, dest_dir) + move_files( + [ + _LP_NAME + r"-pulp\.lp", + _LP_NAME + r"-pulp\.mps", + _LP_NAME + r"-pulp\.prt", + _LP_NAME + r"-pulp\.sol", + ], + source_dir, + dest_dir, + ) def _remove_temp_ilp_files(): diff --git a/src/smt_solver/extract_klee_input.py b/src/smt_solver/extract_klee_input.py index 2bf386a3..ff5dc7d0 100644 --- a/src/smt_solver/extract_klee_input.py +++ b/src/smt_solver/extract_klee_input.py @@ -13,10 +13,10 @@ def write_klee_input_to_file(filename): Path to the KLEE test input file. """ # Define a regular expression pattern to extract hex values - pattern = re.compile(r'object \d+: hex : (0x[0-9a-fA-F]+)') + pattern = re.compile(r"object \d+: hex : (0x[0-9a-fA-F]+)") # Open the input file - with open(filename, 'r') as infile: + with open(filename, "r") as infile: data = infile.read() # Find all hex values using regex @@ -27,16 +27,16 @@ def write_klee_input_to_file(filename): for value in hex_values: hex_str = value[2:] # remove '0x' if len(hex_str) % 2 != 0: - hex_str = '0' + hex_str # pad to even length - bytes_list = [hex_str[i:i+2] for i in range(0, len(hex_str), 2)] + hex_str = "0" + hex_str # pad to even length + bytes_list = [hex_str[i : i + 2] for i in range(0, len(hex_str), 2)] bytes_list.reverse() - big_endian = ''.join(bytes_list) - big_endian_values.append('0x' + big_endian) + big_endian = "".join(bytes_list) + big_endian_values.append("0x" + big_endian) # Write big endian hex values to a new text file values_filename = filename[:-4] + "_values.txt" - with open(values_filename, 'w') as outfile: + with open(values_filename, "w") as outfile: for value in big_endian_values: - outfile.write(value + '\n') + outfile.write(value + "\n") print(f"Big endian hex values extracted and written to {values_filename}") @@ -57,10 +57,9 @@ def find_test_file(klee_last_dir): for root, dirs, files in os.walk(klee_last_dir): for file in files: # Check if the file is a KLEE test case input file - if file.endswith('.ktest'): + if file.endswith(".ktest"): ktest_file = os.path.join(root, file) - assert_err_file = os.path.splitext( - ktest_file)[0] + '.assert.err' + assert_err_file = os.path.splitext(ktest_file)[0] + ".assert.err" # Check if the assert.err file exists if not os.path.exists(assert_err_file): return ktest_file @@ -78,8 +77,8 @@ def run_ktest_tool(ktest_file, output_file): Path to the file where the output will be saved. """ # Run ktest-tool on the ktest file and save the output to the output file - with open(output_file, 'w') as f: - subprocess.run(['ktest-tool', ktest_file], stdout=f, text=True) + with open(output_file, "w") as f: + subprocess.run(["ktest-tool", ktest_file], stdout=f, text=True) def find_and_run_test(c_file_gt_dir, output_dir): diff --git a/src/smt_solver/extract_labels.py b/src/smt_solver/extract_labels.py index e6bd2359..90d69221 100644 --- a/src/smt_solver/extract_labels.py +++ b/src/smt_solver/extract_labels.py @@ -1,10 +1,11 @@ import re import os + def find_labels(bitcode_string, output_dir): """ Extract labels from bitcode string representation of Path. - + Parameters: filename : str A file containing all of the basic block labels of the path to be analyzed, @@ -15,13 +16,13 @@ def find_labels(bitcode_string, output_dir): """ # Use regular expression to find the labels - labels = re.findall(r'%(\d+):', bitcode_string) + labels = re.findall(r"%(\d+):", bitcode_string) # Convert labels to integers and store them in a list i = 0 while True: filename = os.path.join(output_dir, f"labels_{i}.txt") try: - with open(filename, 'x') as f: + with open(filename, "x") as f: for label in labels: f.write(f"{label}\n") break diff --git a/src/smt_solver/smt.py b/src/smt_solver/smt.py index 914ef17e..6ee04e10 100644 --- a/src/smt_solver/smt.py +++ b/src/smt_solver/smt.py @@ -8,7 +8,19 @@ from project_configuration import ProjectConfiguration import unroller -def compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_file, input_c_file, additional_files, c_filename, labels_file, all_labels_file, func_name, output_dir, project_config: ProjectConfiguration): + +def compile_and_run_cplusplus( + modify_bit_code_cpp_file, + modify_bit_code_exec_file, + input_c_file, + additional_files, + c_filename, + labels_file, + all_labels_file, + func_name, + output_dir, + project_config: ProjectConfiguration, +): """ Compile and run a C++ file that modifies LLVM bitcode, then process the C file through several steps. @@ -33,34 +45,70 @@ def compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_fil Configuration object containing project settings, such as included files and compilation flags. """ # Get llvm-config flags - llvm_config_command = ['llvm-config', '--cxxflags', '--ldflags', '--libs', 'core', 'support', 'bitreader', 'bitwriter', 'irreader'] - llvm_config_output = subprocess.run(llvm_config_command, capture_output=True, text=True, check=True).stdout.strip().split() + llvm_config_command = [ + "llvm-config", + "--cxxflags", + "--ldflags", + "--libs", + "core", + "support", + "bitreader", + "bitwriter", + "irreader", + ] + llvm_config_output = ( + subprocess.run(llvm_config_command, capture_output=True, text=True, check=True) + .stdout.strip() + .split() + ) # Compile C++ file - compile_command = ['clang++', '-o', modify_bit_code_exec_file, modify_bit_code_cpp_file] + llvm_config_output + compile_command = [ + "clang++", + "-o", + modify_bit_code_exec_file, + modify_bit_code_cpp_file, + ] + llvm_config_output subprocess.run(compile_command, check=True) - #TODO: add extra flag and includes through project configuration - compiled_file = clang_helper.compile_to_llvm_for_analysis(input_c_file, output_dir, c_filename, project_config.included, project_config.compile_flags) - if project_config.inlined : + # TODO: add extra flag and includes through project configuration + compiled_file = clang_helper.compile_to_llvm_for_analysis( + input_c_file, + output_dir, + c_filename, + project_config.included, + project_config.compile_flags, + ) + if project_config.inlined: compiled_additional_files = [] if additional_files: - compiled_additional_files = clang_helper.compile_list_to_llvm_for_analysis(additional_files, output_dir) + compiled_additional_files = clang_helper.compile_list_to_llvm_for_analysis( + additional_files, output_dir + ) compiled_files = [compiled_file] + compiled_additional_files - input_bc_file = inliner.inline_functions(compiled_files, output_dir, c_filename, func_name) - else: + input_bc_file = inliner.inline_functions( + compiled_files, output_dir, c_filename, func_name + ) + else: input_bc_file = compiled_file # input_bc_file = clang_helper.unroll_loops(inlined_file, output_dir, - # f"{c_filename}-unrolled", project_config) - if project_config.UNROLL_LOOPS : + # f"{c_filename}-unrolled", project_config) + if project_config.UNROLL_LOOPS: input_bc_file = unroller.unroll(input_bc_file, output_dir, c_filename) # Run the compiled program # TODO: change modify bc to take in bc file, not c file - run_command = [modify_bit_code_exec_file, input_bc_file, labels_file, all_labels_file, func_name] + run_command = [ + modify_bit_code_exec_file, + input_bc_file, + labels_file, + all_labels_file, + func_name, + ] subprocess.run(run_command, check=True) return f"{input_bc_file[:-3]}_gvMod.bc" + def run_klee(klee_file): """ Run KLEE with the specified file. @@ -69,9 +117,10 @@ def run_klee(klee_file): klee_file : str Path to the file modified for KLEE execution. """ - run_klee_command = ['klee', klee_file] + run_klee_command = ["klee", klee_file] subprocess.run(run_klee_command, check=True) + def extract_labels_from_file(filename): """ Extracts the block labels from the labels file corresponding to the specific path. @@ -85,7 +134,7 @@ def extract_labels_from_file(filename): A List of basic block labels """ labels = [] - with open(filename, 'r') as file: + with open(filename, "r") as file: for line in file: try: label = float(line.strip()) @@ -94,14 +143,20 @@ def extract_labels_from_file(filename): print(f"Ignoring non-numeric value: {line.strip()}") return labels -def run_smt(project_config: ProjectConfiguration, labels_file: str, output_dir: str, total_number_of_labels: int): + +def run_smt( + project_config: ProjectConfiguration, + labels_file: str, + output_dir: str, + total_number_of_labels: int, +): """ This function generates the input for the program to be analzed to drive down the given path. The input is generated by utilizing the symbolic execution engine KLEE, which uses SMT-Solvers like Z3 unde the hood. Before inputting the file into KLEE we need preprocess the file, which involves modifiying the source code, to add the KLEE specific function calls and guide KLEE to only return the input for the path given. - + Parameters: project_config :class:`~gametime.projectConfiguration.ProjectConfiguration` @@ -124,20 +179,36 @@ def run_smt(project_config: ProjectConfiguration, labels_file: str, output_dir: labels = extract_labels_from_file(labels_file) number_of_labels = len(labels) - # format c file to klee - klee_file_path = format_for_klee(c_file, c_file_path, output_dir, number_of_labels, total_number_of_labels, project_config.func) + # format c file to klee + klee_file_path = format_for_klee( + c_file, + c_file_path, + output_dir, + number_of_labels, + total_number_of_labels, + project_config.func, + ) # insert assignments of global variables # Get the path to the smt_solver directory relative to this file smt_solver_dir = os.path.dirname(os.path.abspath(__file__)) - modify_bit_code_cpp_file = os.path.join(smt_solver_dir, 'modify_bitcode.cpp') - modify_bit_code_exec_file = os.path.join(smt_solver_dir, 'modify_bitcode') - modified_klee_file_bc = compile_and_run_cplusplus(modify_bit_code_cpp_file, modify_bit_code_exec_file, klee_file_path, additional_files_path, c_file + "_klee_format", labels_file, os.path.join(project_config.location_temp_dir, "labels_0.txt"), project_config.func, output_dir, project_config) - + modify_bit_code_cpp_file = os.path.join(smt_solver_dir, "modify_bitcode.cpp") + modify_bit_code_exec_file = os.path.join(smt_solver_dir, "modify_bitcode") + modified_klee_file_bc = compile_and_run_cplusplus( + modify_bit_code_cpp_file, + modify_bit_code_exec_file, + klee_file_path, + additional_files_path, + c_file + "_klee_format", + labels_file, + os.path.join(project_config.location_temp_dir, "labels_0.txt"), + project_config.func, + output_dir, + project_config, + ) + # run klee run_klee(modified_klee_file_bc) # extract klee input return find_and_run_test(output_dir, output_dir) - - diff --git a/src/smt_solver/to_klee_format.py b/src/smt_solver/to_klee_format.py index 4b66c5da..338d693b 100644 --- a/src/smt_solver/to_klee_format.py +++ b/src/smt_solver/to_klee_format.py @@ -2,9 +2,11 @@ import os -def format_for_klee(c_file, c_file_path, c_file_gt_dir, n, total_number_of_labels, func_name): +def format_for_klee( + c_file, c_file_path, c_file_gt_dir, n, total_number_of_labels, func_name +): # Read the original C file - with open(c_file_path, 'r') as f: + with open(c_file_path, "r") as f: c_code = f.read() # Regular expression pattern to find header includes (excluding commented lines) @@ -14,12 +16,13 @@ def format_for_klee(c_file, c_file_path, c_file_gt_dir, n, total_number_of_label header_matches = re.findall(header_pattern, c_code, flags=re.MULTILINE) # Separate headers from the rest of the code - c_code = re.sub(header_pattern, lambda match: '', - c_code, flags=re.MULTILINE) - c_code = c_code.lstrip('\n') + c_code = re.sub(header_pattern, lambda match: "", c_code, flags=re.MULTILINE) + c_code = c_code.lstrip("\n") # Generate KLEE headers - klee_headers = "#include \n#include \n" + klee_headers = ( + "#include \n#include \n" + ) # Generate global boolean variables and initialize them to false/true global_booleans = "\n" @@ -31,17 +34,17 @@ def format_for_klee(c_file, c_file_path, c_file_gt_dir, n, total_number_of_label # Generate main function main_function = "int main() {\n" - function_pattern = rf'^[^\n\S]*\w+\s+{func_name}\s*\(([^)]*)\)\s*\{{' + function_pattern = rf"^[^\n\S]*\w+\s+{func_name}\s*\(([^)]*)\)\s*\{{" match = re.search(function_pattern, c_code, flags=re.MULTILINE) if match: function_declaration = match.group() args_str = match.group(1) # Split arguments by comma, handle possible extra spaces - arguments = [arg.strip() for arg in args_str.split(',') if arg.strip()] + arguments = [arg.strip() for arg in args_str.split(",") if arg.strip()] arg_names = [] for arg in arguments: # Improved regex: captures type, name, and array dimensions (if any) - m = re.match(r'([\w\s\*]+?)\s+(\w+)(\s*\[[^\]]*\])?', arg) + m = re.match(r"([\w\s\*]+?)\s+(\w+)(\s*\[[^\]]*\])?", arg) if m: arg_type = m.group(1).strip() arg_name = m.group(2).strip() @@ -50,25 +53,25 @@ def format_for_klee(c_file, c_file_path, c_file_gt_dir, n, total_number_of_label main_function += f" {arg_type} {arg_name}{array_dim};\n" # Symbolic initialization if array_dim: - main_function += f" klee_make_symbolic({arg_name}, sizeof({arg_name}), \"{arg_name}\");\n" + main_function += f' klee_make_symbolic({arg_name}, sizeof({arg_name}), "{arg_name}");\n' else: - main_function += f" klee_make_symbolic(&{arg_name}, sizeof({arg_name}), \"{arg_name}\");\n" + main_function += f' klee_make_symbolic(&{arg_name}, sizeof({arg_name}), "{arg_name}");\n' # For function call, just use the variable name (no brackets or array_dim) arg_names.append(arg_name) # Call the original function with symbolic variables main_function += f" {func_name}(" - main_function += ', '.join(arg_names) + ");\n" + main_function += ", ".join(arg_names) + ");\n" # Add KLEE assertions for global variables for i in range(total_number_of_labels): main_function += f" klee_assert(conditional_var_{i});\n" main_function += " return 0;\n}" # Write the formatted code to the output file klee_file = os.path.join(c_file_gt_dir, c_file + "_klee_format.c") - with open(klee_file, 'w') as f: - f.write(klee_headers + '\n') + with open(klee_file, "w") as f: + f.write(klee_headers + "\n") for header in header_matches: f.write(f"#include {header}\n") # Write header includes - f.write(global_booleans + '\n' + c_code + '\n' + main_function) + f.write(global_booleans + "\n" + c_code + "\n" + main_function) return klee_file else: print("No function found in the input file.") diff --git a/src/unroller.py b/src/unroller.py index 386cc559..5a49d1ba 100644 --- a/src/unroller.py +++ b/src/unroller.py @@ -3,6 +3,7 @@ import sys import re + def run_command(command, description): """ Run a shell command and handle errors. @@ -15,6 +16,7 @@ def run_command(command, description): print(f"Error: {description} failed with exit code {e.returncode}.") sys.exit(1) + def compile_to_bitcode(c_file, output_bc): """ Compile the C file to LLVM bitcode (.bc file) using clang. @@ -22,6 +24,7 @@ def compile_to_bitcode(c_file, output_bc): command = f"clang -emit-llvm -Xclang -disable-O0-optnone -c {c_file} -o {output_bc}" run_command(command, f"Compiling {c_file} to LLVM bitcode") + def generate_llvm_ir(output_bc, output_ir): """ Generate LLVM Intermediate Representation (.ll file) from LLVM bitcode. @@ -29,6 +32,7 @@ def generate_llvm_ir(output_bc, output_ir): command = f"llvm-dis {output_bc} -o {output_ir}" run_command(command, f"Generating LLVM IR from bitcode {output_bc}") + def unroll_llvm_ir(input_ir, output_ir): """ Generate LLVM Intermediate Representation (.ll file) from LLVM bitcode. @@ -36,6 +40,7 @@ def unroll_llvm_ir(input_ir, output_ir): command = f"opt -passes=loop-unroll -S {input_ir} -o {output_ir}" run_command(command, f"Generating LLVM IR from {input_ir}") + def generate_llvm_dag(output_bc): """ Generate LLVM DAG (.dot file) using opt. @@ -43,6 +48,7 @@ def generate_llvm_dag(output_bc): command = f"opt -passes=dot-cfg -S -disable-output {output_bc}" run_command(command, f"Generating LLVM DAG from bitcode {output_bc}") + def modify_loop_branches_to_next_block(input_file_path, output_file_path): """ Modifies an LLVM IR file to replace branches with `!llvm.loop` annotations @@ -53,20 +59,22 @@ def modify_loop_branches_to_next_block(input_file_path, output_file_path): input_file_path (str): Path to the input LLVM IR file. output_file_path (str): Path to save the modified LLVM IR file. """ - with open(input_file_path, 'r') as file: + with open(input_file_path, "r") as file: llvm_ir_code = file.read() # Define regex patterns for identifying branch instructions with !llvm.loop and block labels - block_pattern = re.compile(r'^(\d+):', re.MULTILINE) # Matches lines with block labels (e.g., "3:") - branch_with_loop_pattern = re.compile(r'br label %(\d+), !llvm.loop') + block_pattern = re.compile( + r"^(\d+):", re.MULTILINE + ) # Matches lines with block labels (e.g., "3:") + branch_with_loop_pattern = re.compile(r"br label %(\d+), !llvm.loop") # Find all blocks in the order they appear blocks = [int(match.group(1)) for match in block_pattern.finditer(llvm_ir_code)] - + # Split the code into lines for processing lines = llvm_ir_code.splitlines() new_lines = [] - + # Iterate through each line, looking for `!llvm.loop` branches current_block = None for i, line in enumerate(lines): @@ -85,8 +93,11 @@ def modify_loop_branches_to_next_block(input_file_path, output_file_path): # Get the label of the next block next_block_num = blocks[current_block_index + 1] # Replace the branch target to point to this next block - line = line.replace(f'br label %{loop_branch_match.group(1)}', f'br label %{next_block_num}') - + line = line.replace( + f"br label %{loop_branch_match.group(1)}", + f"br label %{next_block_num}", + ) + # Append the modified or unmodified line to the result new_lines.append(line) @@ -94,13 +105,16 @@ def modify_loop_branches_to_next_block(input_file_path, output_file_path): modified_llvm_ir_code = "\n".join(new_lines) # Write the modified LLVM IR code to the output file - with open(output_file_path, 'w') as file: + with open(output_file_path, "w") as file: file.write(modified_llvm_ir_code) print(f"Modified LLVM IR code saved to {output_file_path}") + def assemble_bitcode(input_file, output_file): - run_command(f"llvm-as {input_file} -o {output_file}", "Assemble LLVM IR after unrolling") + run_command( + f"llvm-as {input_file} -o {output_file}", "Assemble LLVM IR after unrolling" + ) def unroll(bc_filepath: str, output_file_folder: str, output_name: str): @@ -108,14 +122,14 @@ def unroll(bc_filepath: str, output_file_folder: str, output_name: str): output_ir = f"{bc_filepath[:-3]}.ll" unrolled_output_ir = f"{bc_filepath[:-3]}_unrolled.ll" unrolled_mod_output_ir = f"{bc_filepath[:-3]}_unrolled_mod.ll" - unrolled_mod_output_bc = f"{bc_filepath[:-3]}_unrolled_mod.bc" + unrolled_mod_output_bc = f"{bc_filepath[:-3]}_unrolled_mod.bc" generate_llvm_ir(bc_filepath, output_ir) - + unroll_llvm_ir(output_ir, unrolled_output_ir) - + modify_loop_branches_to_next_block(unrolled_output_ir, unrolled_mod_output_ir) - + assemble_bitcode(unrolled_mod_output_ir, unrolled_mod_output_bc) - - return unrolled_mod_output_bc \ No newline at end of file + + return unrolled_mod_output_bc diff --git a/test/bsort/bsort.c b/test/bsort/bsort.c index 499ac129..0d9f0ff2 100644 --- a/test/bsort/bsort.c +++ b/test/bsort/bsort.c @@ -28,17 +28,14 @@ Declaration of global variables */ -#define bsort_SIZE 3 - - - +#define bsort_SIZE 2 int bsort_return( int Array[] ) { int Sorted = 1; int Index; - #pragma unroll 3 + #pragma unroll bsort_SIZE for ( Index = 0; Index < bsort_SIZE - 1; Index ++ ) Sorted = Sorted && ( Array[ Index ] < Array[ Index + 1 ] ); @@ -57,13 +54,13 @@ int bsort_BubbleSort( int Array[] ) int Sorted = 0; int Temp, Index, i; - #pragma unroll 3 + #pragma unroll bsort_SIZE for ( i = 0; i < bsort_SIZE - 1; i ++ ) { Sorted = 1; - #pragma unroll 3 + #pragma unroll bsort_SIZE for ( Index = 0; Index < bsort_SIZE - 1; Index ++ ) { - if ( Index > bsort_SIZE - i ) - break; + // if ( Index > bsort_SIZE - i ) // This does not seem necessary with the for loop. + // break; if ( Array[ Index ] > Array[Index + 1] ) { Temp = Array[ Index ]; Array[ Index ] = Array[ Index + 1 ]; diff --git a/test/if_elif_else/helper.h b/test/if_elif_else/helper.h deleted file mode 100644 index 1233dafd..00000000 --- a/test/if_elif_else/helper.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef HELPER_H -#define HELPER_H - -// __attribute__((always_inline)); - -int abs(int x); - -#endif diff --git a/test/if_elif_else/if_elif_else.c b/test/if_elif_else/if_elif_else.c deleted file mode 100644 index 1f26f4e8..00000000 --- a/test/if_elif_else/if_elif_else.c +++ /dev/null @@ -1,23 +0,0 @@ -#include -#include -#include - -int abs(int x); - -int test(int x){ - if (x == 4) { - int a = 1; - int b = a * 2; - int c = a * b; - } - if (x == 4) { - int a = 1; - int b = a * 2; - int c = a * b; - return c; - } - return 0; -} - - - diff --git a/test/if_elif_else/config.yaml b/test/if_statements/config.yaml similarity index 94% rename from test/if_elif_else/config.yaml rename to test/if_statements/config.yaml index b28c09aa..c4653253 100644 --- a/test/if_elif_else/config.yaml +++ b/test/if_statements/config.yaml @@ -1,13 +1,12 @@ --- gametime-project: file: - location: if_elif_else.c + location: test.c analysis-function: test additional-files: [helper.c] start-label: null end-label: null - preprocess: include: null merge: null diff --git a/test/if_elif_else/helper.c b/test/if_statements/helper.c similarity index 100% rename from test/if_elif_else/helper.c rename to test/if_statements/helper.c diff --git a/test/if_statements/test.c b/test/if_statements/test.c new file mode 100644 index 00000000..10c879c5 --- /dev/null +++ b/test/if_statements/test.c @@ -0,0 +1,21 @@ +// FIXME: Support header files and move the abs declaration to a helper.h. +int abs(int x); // Defined in helper.c + +// Function under analysis +int test(int x){ + if (abs(x) == 42) { + int a = 1; + int b = a * 2; + int c = a * b; + return c; + } + else if (abs(x) == 128) { + int a = 1; + int b = a * 2; + return b; + } + return 0; +} + + +