import os import re import yaml from io import StringIO from tree_sitter import Language, Parser import tree_sitter_cpp as tscpp CPP_LANGUAGE = Language(tscpp.language()) def remove_comments_cpp(source_code): parser = Parser(CPP_LANGUAGE) tree = parser.parse(source_code.encode()) def extract_code(node): if node.type in ['comment']: return '' elif node.children: return ''.join(extract_code(child) for child in node.children) return source_code[node.start_byte:node.end_byte] extractedCode = extract_code(tree.root_node) return extractedCode # Regular expression to match .get("a:b:c", x) pattern = re.compile(r'\.get<([^>]+)>\("([^"]+)",\s*([^);]+)\)') def parse_value(value, type_hint): """Converts values to appropriate types.""" value = value.strip() # Handle common types if type_hint in {"int", "long"}: return (int(value), type_hint) elif type_hint in {"float", "double"}: return float(value) elif value == "defaultDataDir": return "data" # special case for defaultDataDir return (float(value), type_hint) elif type_hint == "bool": return (value.lower() in {"true", "1"}, type_hint) elif value.startswith('"') and value.endswith('"'): return (value.strip('"'), type_hint) # Remove quotes for string literals elif value.startswith("'") and value.endswith("'"): return (value.strip("'"), type_hint) # Remove single quotes return value.strip("'") # Remove single quotes elif value.startswith('"') and value.endswith('"'): return (value.strip('"'). type_hint)# Remove quotes for string literals return (value, type_hint) # Return as-is if unsure def insert_into_dict(hierarchy_dict, keys, value, type_hint): """Recursively inserts values into a nested dictionary.""" key = keys.pop(0) if not keys: # If last key, store value hierarchy_dict[key] = parse_value(value, type_hint) else: if key not in hierarchy_dict: hierarchy_dict[key] = {} insert_into_dict(hierarchy_dict[key], keys, value, type_hint) def scan_files(directory): """Scans all .cpp, .h, .c, and .hpp files in a directory recursively.""" hierarchy_dict = {} for root, _, files in os.walk(directory): for file in files: if file.endswith(('.cpp', '.h', '.c', '.hpp')): file_path = os.path.join(root, file) with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: noCommentSourceCode = remove_comments_cpp(f.read()) matchs = pattern.finditer(noCommentSourceCode) for match in matchs: print(f"Match: {match.group()}") type_hint, hierarchy, value = match.groups() keys = hierarchy.split(":") if keys[0] == "Data" and keys[1] == "Dir": continue # Skip Data:Dir as it is a special case insert_into_dict(hierarchy_dict, keys, value, type_hint) return hierarchy_dict class QuotedString(str): pass def represent_quoted_string(dumper, data): return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='"') def split_dict_recursive(originalDict): dataDict = {} typeDict = {} for key, value in originalDict.items(): if isinstance(value, dict): # If the value is another dictionary, recurse dataDict[key], typeDict[key] = split_dict_recursive(value) elif isinstance(value, tuple) and len(value) == 2: # If it's a tuple, split it dataDict[key] = value[0] # First element is data typeDict[key] = value[1] # Second element is type hint else: raise ValueError(f"Unexpected structure in dictionary at key '{key}': {value}") return dataDict, typeDict def save_yaml(data, output_file): """Saves the nested dictionary to a YAML file.""" yaml.add_representer(QuotedString, represent_quoted_string) def quote_strings(data): if isinstance(data, dict): return {k: quote_strings(v) for k, v in data.items()} elif isinstance(data, list): return [quote_strings(item) for item in data] elif isinstance(data, str): return QuotedString(data) else: return data data = quote_strings(data) options, types = split_dict_recursive(data) with open(output_file, 'w', encoding='utf-8') as f: yaml.dump(options, f, default_flow_style=False, sort_keys=False, indent=4) with open(output_file, 'a') as f: f.write("\n") f.write("# This file was auto-generated by generateDefaultConfig.py\n") f.write("# Do not modify this file directly.\n") f.write('\n#') f.write('='*35 + " TYPE HINTS " + '='*35 + '\n') f.write('\n') type_file = StringIO() yaml.dump(types, type_file, default_flow_style=False, sort_keys=False, indent=4) # write types yaml out put with # in front of each line with open(output_file, 'a') as f: for line in type_file.getvalue().split('\n'): f.write(f"# {line}\n") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Generate a YAML file with default configuration values.") parser.add_argument("directory", help="Directory to scan for .cpp, .h, .c, and .hpp files.") parser.add_argument("-o", "--output", help="Output file name.", default="defaults.yaml") args = parser.parse_args() extracted_data = scan_files(args.directory) print(extracted_data) save_yaml(extracted_data, args.output) print(f"✅ YAML file generated: {args.output}")