研究下osu!的pp算法~(Part_3:并不完美的代码实现难度评价代码,但是至少完成了大部分)

下面是一段用Python写的代码,作用是评价一段cpp/python代码实现的难度(并不是算法设计的难度),对应了Part_2中算法实现部分的难度,不过这只是一个不完美的版本,实际的评估难度还需要人工验证。
(写这段代码的过程太烧CPU了,再加上放假,有的时候一连咕了好几天...,最后还不能完美实现想要的功能,需要人工验证,大家只要看看就可以)
(后续会逐步添加注释)

import re
import math
import argparse

# 想定义一个codeparser类把下面的代码封装起来,我要如何实现呢?
# 代码如下:

class CodeParser:
    def __init__(self, language):
        self.language = language
        self.language_pp = {
            "python": {
                "int_declaration": 0.09,
                "integer_division": 0.24,
                "multiplication": 0.09,
                "subtraction": 0.09,
                "addiction": 0.09,
                "include": 0.03,
                "output_handling": 0.12,
                "input_handling": 0.21,
                "note": 0.075,
                "shift_left": 0.09,
                "shift_right": 0.09,
                "bitwise_operations": 0.21,
                "logical_operations": 0.105,
                "conditionals": 0.165,
                "loops": 0.225,
                "structs_and_classes": 0.165,
                "functions": 0.15,
                "recursive": 0.60,
                "arrays": 0.21,
                "points": 0.12,
            },
            "cpp": {
                "int_declaration": 0.12,
                "integer_division": 0.18,
                "multiplication": 0.12,
                "subtraction": 0.12,
                "addiction": 0.12,
                "include": 0.06,
                "output_handling": 0.26,
                "input_handling": 0.16,
                "note": 0.1,
                "shift_left": 0.12,
                "shift_right": 0.12,
                "bitwise_operations": 0.28,
                "logical_operations": 0.14,
                "conditionals": 0.22,
                "loops": 0.3,
                "structs_and_classes": 0.32,
                "functions": 0.26,
                "recursive": 0.88,
                "pointers": 0.34,
                "arrays": 0.28,
                "points": 0.16,
                "arrows": 0.16,
            }
        } #奖励的PP值
        self.property = ["int_declaration", "integer_division", "arrows", "include", "output_handling", "input_handling", "addiction", "note", "shift_left", "shift_right", "bitwise_operations", "logical_operations", "conditionals", "loops", "structs_and_classes","functions", "recursive", "arrays", "points", "pointers", "subtraction", "multiplication"]
        self.func_name = set()
        self.struct_name = set()
        self.array_variables = set()
        self.pointer_variables = {}
        self.actual_arrow_count = 0
        self.approximate_ptr_count = 0
        self.keyword_dict = {
            "int": r'\bint\b',
            "division_cpp": r'(?<!/)/(?!/)',
            "division_py": r'//|/',
            "multiplication": r'\*',
            "subtraction": r'-',
            "include": r'import|include',
            "output": r'print|System\.out\.println|cout',
            "note_cpp": r'\/\/',
            "note_py": r'#',
            "add": r'\+',
            "shift_left": r'<<',
            "shift_right": r'>>',
            "bitwise_and_cpp": r'(?<!&)&(?!&)',
            "bitwise_and_py": r'(?<!&)&(?!&)',
            "bitwise_or_cpp": r'(?<!\|)\|(?!\|)',
            "bitwise_or_py": r'(?<!\|)\|(?!\|)',
            "bitwise_xor_cpp": r'\^',
            "bitwise_xor_py": r'\^',
            "bitwise_not_cpp": r'~',
            "bitwise_not_py": r'~',
            "logical_and_cpp": r'&&',
            "logical_and_py": r'\band\b',
            "logical_or_cpp": r'\|\|',
            "logical_or_py": r'\bor\b',
            "logical_not_cpp": r'!',
            "logical_not_py": r'\bnot\b',
            "if_cpp": r'\bif\b',
            "else_cpp": r'\belse\b',
            "switch_cpp": r'\bswitch\b',
            "case_cpp": r'\bcase\b',
            "default_cpp": r'\bdefault:\b',
            "if_py": r'\bif\b',
            "else_py": r'\belse\b',
            "while_loop": r'\bwhile\b',
            "for_loop_cpp": r'\bfor\s*\([^)]*\)\s*{',
            "for_loop_py": r'\bfor\s+\w+\s+in\b',
            "break_keyword": r'\bbreak\b',
            "continue_keyword": r'\bcontinue\b',
            "enumerate_py": r'\benumerate\b',
            "range_py": r'\brange\b',
            "function_cpp": r'\b(?:%s)\s+(\w+)\s*\([^)]*\)\s*(?:{|;)', #%s表示某种类型
            "function_py": r'\bdef\s+(\w+)\s*\([^)]*\):',
            "struct_cpp": r'\bstruct\s+([a-zA-Z_]\w*)\s*\{',
            "class_cpp": r'\bclass\s+([a-zA-Z_]\w*)\s*[\{:]',
            "class_py": r'\bclass\s+([a-zA-Z_]\w*)\s*:',
            #"pointer_declaration_cpp": r'\b(\w+(?:\s*\*\s*\w+\s*)*\**)\s*;',
            #"pointer_declaration_cpp": r'\b(?:%s)\s+(\w+)\s*(?:(?:\[\s*\])+|[*&])?\s*(?:\w+\s*::\s*)?(\w+)\s*\([^)]*\)\s*(?:{|;)' % '|'.join(self.struct_name),
            "array_declaration_cpp": r'\b(\w+)\s*\[.*?\]\s*(?:\[.*?\]\s*)*;',
            "vector_declaration_cpp": r'\bvector<[^>]*>\s+(\w+)\s*;',
            "array_use_cpp": r'\b(\w+)\s*\[.*?\]\s*(?:\[.*?\]\s*)*;',
            "array_use_py": r'\b(\w+)\s*\[.*?\]\s*;',
            
        } #关键词字典

    def detect_int_declaration(self, code): #int声明
        pt = re.findall(self.keyword_dict["int"], code)
        #print(f"int appears:{pt}")
        if pt is None:
            return []
        return pt

    def detect_integer_division(self, code): #除法
        if self.language == "cpp":
            pattern = self.keyword_dict["division_cpp"]  # 对于C++,匹配单斜杠,但排除双斜杠
        elif self.language == "python":
            pattern = self.keyword_dict["division_py"]  # 对于Python,匹配'//'或'/'
        else:
            return None  # 处理未知语言的情况

        pt = re.findall(pattern, code)
        #print(f"division appears: {pt}")
        if pt is None:
            return []
        return pt
    
    def detect_note(self,code): #注释的个数(需要人工确认,仅供参考)
        if self.language == "cpp":
            pattern = self.keyword_dict["note_cpp"]  # 对于C++,匹配单斜杠,但排除双斜杠
        elif self.language == "python":
            pattern = self.keyword_dict["note_py"]  # 对于Python,匹配'//'或'/'
        else:
            return None  # 处理未知语言的情况

        pt = re.findall(pattern, code)
        #print(f"Note appears: {pt}")
        if pt is None:
            return []
        return pt


    def detect_multiplication(self, code): #乘法
        pt = re.findall(self.keyword_dict["multiplication"], code)
        #print(f"Multiplication appears:{pt}")
        if pt is None:
            return []
        return ['*' for _ in range (len(pt) - self.approximate_ptr_count)]

    def detect_subtraction(self, code): #减法
        pt = re.findall(self.keyword_dict["subtraction"], code)
        #print(f"Negative and subtraction appears:{pt}")
        if pt is None:
            return []
        return ['-' for _ in range (len(pt) - self.actual_arrow_count)]
    
    def detect_addiction(self, code): #加法
        pt = re.findall(self.keyword_dict["add"], code)
        #print(f"Addiction appears:{pt}")
        if pt is None:
            return []
        return pt

    def detect_include(self, code): #include/import
        pt = re.findall(self.keyword_dict["include"], code)
        #print(f"Include found:{pt}")
        if pt is None:
            return []
        return pt

    def detect_output_handling(self, code): #输出
        # 使用正则表达式匹配代码中的关键词
        matches = re.findall(self.keyword_dict["output"], code)

        # 如果没有匹配结果,返回空列表
        if matches is None:
            return []
        #print(f"Output found:{matches}")

        return matches
    
    def detect_input_handling(self, code): #输入
        # 定义关键词列表
        input_keywords = ["scanf", "cin", "input", "Scanner"]

        # 初始化计数器
        count = []

        # 遍历关键词列表,使用正则表达式进行匹配
        for keyword in input_keywords:
            # 构造正则表达式,不区分大小写
            pattern = re.compile(rf'\b{re.escape(keyword)}\b', re.IGNORECASE)

            # 使用正则表达式匹配代码中的关键词
            matches = re.findall(pattern, code)

            # 更新计数器
            count += matches

        #print(f"Input found:{count}")
        return count
    
    def detect_shift_left(self, code):
        if self.language == "cpp":
            lines = code.split('\n')
            for line in lines:
                if 'cout' in line:
                    continue  # Skip lines containing 'cout'
                
                matches = re.findall(self.keyword_dict["shift_left"], line)
                if matches:
                    return matches  # Found bit left shift in this line
            
        elif self.language == "python":
            pattern = self.keyword_dict["shift_left"]
            matches = re.findall(pattern, code)
            return matches if matches is not None else []
            
        return []  # Handling unknown language

    def detect_shift_right(self, code):
        if self.language == "cpp":
            lines = code.split('\n')
            for line in lines:
                if 'cin' in line: 
                    continue  # Skip lines containing 'cin'

                matches = re.findall(self.keyword_dict["shift_right"], line)
                if matches:
                    return matches  # Found bit right shift in this line

        elif self.language == "python":
            pattern = self.keyword_dict["shift_right"]
            matches = re.findall(pattern, code)
            return matches if matches is not None else []

        return []  # Handling unknown language
    
    def detect_bitwise_operations(self, code):
        if self.language == "cpp":
            pattern_and = self.keyword_dict["bitwise_and_cpp"]
            pattern_or = self.keyword_dict["bitwise_or_cpp"]
            pattern_xor = self.keyword_dict["bitwise_xor_cpp"]
            pattern_not = self.keyword_dict["bitwise_not_cpp"]

            matches_and = re.findall(pattern_and, code)
            matches_or = re.findall(pattern_or, code)
            matches_xor = re.findall(pattern_xor, code)
            matches_not = re.findall(pattern_not, code)

            return matches_and + matches_or + matches_xor + matches_not
        elif self.language == "python":
            pattern_and = self.keyword_dict["bitwise_and_py"]
            pattern_or = self.keyword_dict["bitwise_or_py"]
            pattern_xor = self.keyword_dict["bitwise_xor_py"]
            pattern_not = self.keyword_dict["bitwise_not_py"]

            matches_and = re.findall(pattern_and, code)
            matches_or = re.findall(pattern_or, code)
            matches_xor = re.findall(pattern_xor, code)
            matches_not = re.findall(pattern_not, code)

            return matches_and + matches_or + matches_xor + matches_not

        return None  # Handling unknown language

    def detect_logical_operations(self, code):
        if self.language == "cpp":
            pattern_and = self.keyword_dict["logical_and_cpp"]
            pattern_or = self.keyword_dict["logical_or_cpp"]
            pattern_not = self.keyword_dict["logical_not_cpp"]

            matches_and = re.findall(pattern_and, code)
            matches_or = re.findall(pattern_or, code)
            matches_not = re.findall(pattern_not, code)

            return matches_and + matches_or + matches_not
        elif self.language == "python":
            pattern_and = self.keyword_dict["logical_and_py"]
            pattern_or = self.keyword_dict["logical_or_py"]
            pattern_not = self.keyword_dict["logical_not_py"]

            matches_and = re.findall(pattern_and, code)
            matches_or = re.findall(pattern_or, code)
            matches_not = re.findall(pattern_not, code)

            return matches_and + matches_or + matches_not

        return None  # Handling unknown language
    
    def detect_conditionals(self, code):
        if self.language == "cpp":
            pattern_if = self.keyword_dict["if_cpp"]
            pattern_else = self.keyword_dict["else_cpp"]
            pattern_switch = self.keyword_dict["switch_cpp"]
            pattern_case = self.keyword_dict["case_cpp"]
            pattern_default = self.keyword_dict["default_cpp"]

            matches_if = re.findall(pattern_if, code)
            matches_else = re.findall(pattern_else, code)
            matches_switch = re.findall(pattern_switch, code)
            matches_case = re.findall(pattern_case, code)
            matches_default = re.findall(pattern_default, code)

            return matches_if + matches_else + matches_switch + matches_case + matches_default

        elif self.language == "python":
            pattern_if = self.keyword_dict["if_py"]
            pattern_else = self.keyword_dict["else_py"]

            matches_if = re.findall(pattern_if, code)
            matches_else = re.findall(pattern_else, code)

            return matches_if + matches_else

        return None  # Handling unknown language
    
    def detect_loops(self, code):
        if self.language == "cpp":
            pattern_while = self.keyword_dict["while_loop"]
            pattern_for = self.keyword_dict["for_loop_cpp"]
            pattern_break = self.keyword_dict["break_keyword"]
            pattern_continue = self.keyword_dict["continue_keyword"]

            matches_while = re.findall(pattern_while, code)
            matches_for = re.findall(pattern_for, code)
            matches_break = re.findall(pattern_break, code)
            matches_continue = re.findall(pattern_continue, code)

            return matches_while + matches_for + matches_break + matches_continue

        elif self.language == "python":
            pattern_while = self.keyword_dict["while_loop"]
            pattern_for = self.keyword_dict["for_loop_py"]
            pattern_break = self.keyword_dict["break_keyword"]
            pattern_continue = self.keyword_dict["continue_keyword"]
            pattern_enumerate = self.keyword_dict["enumerate_py"]
            pattern_range = self.keyword_dict["range_py"]

            matches_while = re.findall(pattern_while, code)
            matches_for = re.findall(pattern_for, code)
            matches_break = re.findall(pattern_break, code)
            matches_continue = re.findall(pattern_continue, code)
            matches_enumerate = re.findall(pattern_enumerate, code)
            matches_range = re.findall(pattern_range, code)

            return (
                matches_while + matches_for + matches_break +
                matches_continue + matches_enumerate + matches_range
            )

        return None  # Handling unknown language
    
    def detect_structs_and_classes(self, code):
        if self.language == "cpp":
            patterns = [self.keyword_dict["struct_cpp"], self.keyword_dict["class_cpp"]]
            for pattern in patterns:
                matches = re.findall(pattern, code)
                self.struct_name.update(matches)

        elif self.language == "python":
            pattern = self.keyword_dict["class_py"]
            matches = re.findall(pattern, code)
            self.struct_name.update(matches)

        return list(self.struct_name) if self.struct_name is not None else []

    def detect_functions(self, code):
        self.detect_structs_and_classes(code)

        if self.language == "cpp":
            for struct_name in self.struct_name:
                pattern_function = self.keyword_dict["function_cpp"] % re.escape(struct_name)
                matches_function = re.findall(pattern_function, code)
                self.func_name.update(matches_function)

            return self.func_name

        elif self.language == "python":
            pattern_function = self.keyword_dict["function_py"]
            matches_function = re.findall(pattern_function, code)
            self.func_name.update(matches_function)

            return list(self.func_name)
    
    def detect_recursive(self, code):
        recursive_functions = set()

        if self.language == "cpp":
            for func_name in self.func_name:
                pattern_cpp = fr'\b{re.escape(func_name)}\s*{{.*{re.escape(func_name)}.*}}'
                matches_cpp = re.search(pattern_cpp, code)
                if matches_cpp:
                    recursive_functions.add(func_name)

        elif self.language == "python":
            lines = code.split('\n')
            #print(lines)
            for func_name in self.func_name:
                func_line_number = None

                # Find the line number where the function is defined
                for i, line in enumerate(lines):
                    if re.search(fr'\bdef {re.escape(func_name)}\s*\(.*\):', line):
                        func_line_number = i
                        break
                #print(f"Function {func_name} is at row {func_line_number}")

                if func_line_number is not None:
                    # Count the number of leading spaces in the line where the function is defined
                    space_count = len(lines[func_line_number]) - len(lines[func_line_number].lstrip(' '))
                    #print(f"It has {space_count} spaces!")

                    # Search for the function name in lines from the function definition to the end of the code
                    for i in range(func_line_number + 1, len(lines)):
                        # Check if the current line has fewer than space_count leading spaces
                        #print(lines[i], end='\t')
                        #print(len(lines[i]) - len(lines[i].lstrip(' ')))
                        if len(lines[i]) - len(lines[i].lstrip(' ')) <= space_count:
                            break

                        # Check for the function name in lines with space_count + 1 leading spaces
                        if len(lines[i]) - len(lines[i].lstrip(' ')) > space_count:
                            #print("#In function")
                            if re.search(fr'\b{re.escape(func_name)}\b', lines[i]):
                                recursive_functions.add(func_name)
                                break

        return list(recursive_functions)
    
    def detect_arrays(self, code):

        def scan(code):
                detailed_use_list = []
                used_list = set()
                stack = []
                start = False  # Whether the target string has appeared

                i = 0
                while i < len(code):
                    if code[i] == '[' and not start:
                        # Start scanning valid identifier
                        identifier = ""
                        j = i - 1
                        while j >= 0 and (code[j].isalnum() or code[j] == '_'):
                            identifier = code[j] + identifier
                            j -= 1

                        if identifier.isidentifier():
                            start = True
                            i = j + 1  # Move the index to the beginning of the identifier

                    if start:
                        if code[i] == '[':
                            stack.append('[')
                        elif code[i] == ']':
                            if stack:
                                stack.pop()
                            else:
                                # Invalid string, reset and continue scanning
                                start = False
                                stack = []
                                i += 1
                                continue

                            if not stack:
                                # Valid string found, add to the list
                                detailed_use_list.append(code[j+1:i+1])  # Include the whole sequence in square brackets
                                used_list.add(identifier)
                                start = False

                    i += 1

                return detailed_use_list,used_list
        
        def process_arrays(arrays_list, used_list):
                final_array_list = used_list.union(set(array[0] for array in arrays_list))

                # Find dimensions for arrays in final_array_list
                dimensions_dict = []
                for array_name, array_dim in arrays_list:
                    if array_name not in used_list:
                        # If the array was used, initialize its dimension to (0,)
                        dimensions_dict.append((array_name,0))
                    else:
                        # If the array was not used, assign a default dimension (e.g., (1,))
                        index = next(i for i, (name, _) in enumerate(arrays_list) if name == array_name)
                        dimensions_dict.append((array_name,arrays_list[index][1]))

                return dimensions_dict
        
        if self.language == "cpp":
            pattern_array_declaration = self.keyword_dict["array_declaration_cpp"]
            matches_array = re.findall(pattern_array_declaration, code)
            arrays = []

            for array_declaration in matches_array:
                self.array_variables.add(array_declaration)
                arrays.append((array_declaration,0))

            pattern_vector_declaration = self.keyword_dict["vector_declaration_cpp"]
            matches_vector = re.findall(pattern_vector_declaration, code)

            for vector_declaration in matches_vector:
                # Count the number of '<' occurrences to determine the maximum dimension
                dimension = vector_declaration.count('<')
                self.array_variables.add(vector_declaration)
                arrays.append((vector_declaration,dimension))


            array_usages,used_list = scan(code)

            arrays = process_arrays(arrays,used_list)
            #print("After Processing:")
            #print(arrays)

            for array_name, array_init_dim in arrays:
                if array_init_dim == 0:
                    continue
                for usage in array_usages:
                    if array_name in usage:
                        usage_dim = self.get_array_dimensions(usage)
                        arrays = [(name, max(dim, usage_dim)) if name == array_name else (name, dim) for name, dim in arrays]
                #print(arrays)
            result = [var[0] for var in arrays for _ in range ((var[1] * (var[1] + 1)) // 2)]

            return result



        # Other logic...
        elif self.language == "python":
            pattern_array_declaration1 = r'\w+\s*=\s*\[[^\]]*\]'
            pattern_array_declaration2 = r'\w+\s*=\s*\([^)]*\)'
            pattern_array_declaration3 = r'\w+\s*=\s*\{[^\}]*\}'
            #pattern_array_declaration3 = r'\w+\s*=\s*\{(?:[^{}"\':]+|\'[^\']*\'|"[^"]*")*:[^\}]*\}'
            pattern_array_declaration4 = r'\w+\s*=\s*\w+\.append\([^)]*\)'
            
            matches_array1 = re.findall(pattern_array_declaration1, code)
            matches_array2 = re.findall(pattern_array_declaration2, code)
            matches_array3 = re.findall(pattern_array_declaration3, code)
            matches_array4 = re.findall(pattern_array_declaration4, code)
            #print(matches_array1,matches_array2,matches_array3,matches_array4)

            arrays = []
            for array_declaration in matches_array1 + matches_array2 + matches_array3 + matches_array4:
                array_name, array_content = re.split(r'\s*=\s*', array_declaration)
                array_dim = self.get_init_dimensions(array_content)
                arrays.append((array_name, array_dim))

            # Check array usage to update dimensions
            
            array_usages,used_list = scan(code)
            #print(used_list)
            #通过used_list删除第一个阶段没有用到的元素,而used_list中的所有元素都会进行保留
            #删除属于arrays但不属于used_list中的元素,即final_array_list = used_list 和(arrays_list - used_list)的并集,并为其中只在used_list出现过的元素维度赋初值0.
            #如何做到这一点呢?注意到之前初始化的arrays是一个二元组...

            #print(array_usages)
            #print("Before Processing:")
            #print(arrays)

            
            
            arrays = process_arrays(arrays,used_list)
            #print("After Processing:")
            #print(arrays)

            for array_name, array_init_dim in arrays:
                if array_init_dim == 0:
                    continue
                for usage in array_usages:
                    if array_name in usage:
                        usage_dim = self.get_array_dimensions(usage)
                        arrays = [(name, max(dim, usage_dim)) if name == array_name else (name, dim) for name, dim in arrays]
                #print(arrays)
            result = [var[0] for var in arrays for _ in range ((var[1] * (var[1] + 1)) // 2)]

        return result
    
    def get_init_dimensions(self,array_content):
        total_brackets = array_content.count('[') + array_content.count(']') + array_content.count('{') + array_content.count('}') + array_content.count('(') + array_content.count(')')
        return total_brackets // 2

    def get_array_dimensions(self, array_content):
        total_brackets = array_content.count('[') + array_content.count(']')
        return total_brackets // 2
    

    def detect_pointers(self, code): #由于rf正则表达式构建出来以后会出现错误,所以这里暂时没法检测到变量,只能人工检测..> <所以这一部分只是一个基本的思路,实际上检测不出来,然后就会导致乘法符号检测的结果也是错的
        # Initialize variables
        type_list = ['int','float','double','enum'] + list(self.struct_name)
        if self.language != "cpp":
            return []

        # Step 1: Detect pointer variable definitions
        # Pattern: type_list (0~x spaces) (0~y * symbols) (0~z spaces) [valid identifier]
        pointer_variable_pattern = re.compile(r'\b({"|".join(type_list)})\s*({"\s*".join(["(\*+)"])}\s*)\b(\w+)\b')
        matches_step1 = pointer_variable_pattern.findall(code)

        # Collect unique identifiers and calculate approximate and actual pointer counts
        pointer_identifiers = set()
        total_approx_pointer_count = 0
        total_actual_pointer_count = 0

        for match in matches_step1:
            _, _, asterisks, identifier = match
            asterisk_count = len(asterisks)
            total_approx_pointer_count += (asterisk_count * (asterisk_count + 1)) // 2
            total_actual_pointer_count += asterisk_count
            pointer_identifiers.add(identifier)

        # Step 2: Detect pointer dereferences
        # Pattern: (0~y * symbols) (0~z spaces) [valid identifier]
        pointer_dereference_pattern = re.compile(r'\b({"|".join(pointer_identifiers)})\s*({"\s*".join(["(\*+)"])}\s*)\b(\w+)\b')
        matches_step2 = pointer_dereference_pattern.findall(code)

        # Calculate the total actual pointer count for dereferences
        total_actual_pointer_count_dereference = sum(len(asterisks) for _, asterisks, _ in matches_step2)

        # Return the results
        self.approx_pointer_count = total_approx_pointer_count
        self.actual_pointer_count = total_actual_pointer_count + total_actual_pointer_count_dereference

        return ['*' for _ in range(total_actual_pointer_count + total_actual_pointer_count_dereference)]


    
    #考虑到一些数据结构的复杂度,我们设计了这两个函数:
    def detect_points(self, code):
        points_count = 0
        combo = 0

        lines = code.split('\n')

        for line in lines:
            # Iterate through each character in the line
            #print(points_count)
            #print(line, end=' ')
            for char in line:
                if char == '.':
                    combo += 1
                elif not (char.isspace() or char.isalpha() or char.isdigit() or char == '_'):
                    # Combo interrupted in Python, reset combo and add to points_count
                    points_count += (combo * (combo + 1)) // 2
                    combo = 0

        if self.language == "cpp":
            # Count occurrences of '...'
            cpp_ellipsis_count = code.count('...')
            # Subtract three times the count of '...' from the total count
            points_count -= 3 * cpp_ellipsis_count

        return ['.' for _ in range(points_count)]

    def detect_arrows(self, code):
        # Initialize variables
        if self.language != "cpp":
            return []
        arrows_count = 0
        actual_arrows_count = 0
        combo = 0

        # Split the code into lines
        lines = code.split('\n')

        for line in lines:
            #print(arrows_count, end=' ')
            #print(line)
            check_arrow = False
            # Iterate through each character in the line
            for char in line:
                if char == '-' and not check_arrow:
                    # Arrow starts
                    check_arrow = True
                elif char == '>' and check_arrow:
                    # Arrow ends
                    combo += 1
                    check_arrow = False
                    actual_arrows_count += 1
                elif combo and not (char.isspace() or char.isalpha() or char.isdigit() or char == '_'):
                    # Combo interrupted, reset combo and add to arrows_count
                    arrows_count += (combo * (combo + 1)) // 2
                    combo = 0
        #如何补充计数actual_arrows_count的代码呢?

        self.actual_arrow_count = actual_arrows_count

        return ['->' for _ in range (arrows_count)]
        

    def calculate_algorithm_implementation_pp(self, code):
        total_pp = 0.0

        for function_name in self.property:
            print(f"Check {function_name}--",end = " ")
            detect_function = getattr(self, f'detect_{function_name}')
            matches = detect_function(code)
            n = len(matches)
            print(f"{function_name} appears list: {matches}, appear times: {n}",end = " ")
            if n != 0:
                additional_pp = self.language_pp[self.language][function_name] * math.sqrt(n)
            else:
                additional_pp = 0
            print(f"+{additional_pp}pp")
            total_pp += additional_pp

        return total_pp
    
    

# 读取代码文件
def read_code_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        code = file.read()
    return code

# 根据代码特征,自动识别代码语言
def detect_language(file_path):
    # 使用文件扩展名来判断语言类型
    extension = file_path.split('.')[-1].lower()

    if extension == "py":
        return "python"
    elif extension == "cpp" or extension == "cxx":
        return "cpp"
    elif extension == "java":
        return "java"
    else:
        return None

# 通过argparse定义命令行参数
def parse_args():
    parser = argparse.ArgumentParser(description="Calculate algorithm implementation PP.")
    parser.add_argument("file_path", help="Path to the code file")
    return parser.parse_args()

# 测试
def main():
    args = parse_args()
    code = read_code_file(args.file_path)
    print(f"Your code is: \n\n{code}\n\n")
    language = detect_language(args.file_path)
    # 创建 CodeParser 实例
    code_parser = CodeParser(language=language)
    # 计算算法实现 pp
    implementation_pp = code_parser.calculate_algorithm_implementation_pp(code)
    print(f"Algorithm Implementation PP(or Coding Star Rating): {implementation_pp}")
if __name__ == "__main__":
    main()

posted @ 2024-01-30 01:32  Yuzu_OvO(喵露露版)  阅读(43)  评论(0)    收藏  举报