py

#!/bin/env python3# -*- coding: utf-8 -*-import osimport sysimport argparseimport subprocessimport timeimport jsonimport csvimport reimport randomimport shlexfrom datetime import datetimefrom pathlib import Pathfrom concurrent.futures import ThreadPoolExecutor, as_completedfrom typing import List, Dict, Tuple, Optionalimport copyimport threadingimport signalimport stat_transaction_count  # reuse transaction/cycle count helpers
class Colors:    """Terminal color definitions"""    RED = '\033[91m'    GREEN = '\033[92m'    YELLOW = '\033[93m'    BLUE = '\033[94m'    MAGENTA = '\033[95m'    CYAN = '\033[96m'    WHITE = '\033[97m'    BOLD = '\033[1m'    UNDERLINE = '\033[4m'    END = '\033[0m'
class TestResult:    """Test result class"""    def __init__(self, name: str, config: str = "default"):        self.name = name        self.config = config        self.status = "PENDING"  # PENDING, RUNNING, PASS, FAIL, TIMEOUT, ERROR, RERUN PASS, RERUN FAIL        self.start_time = None        self.end_time = None        self.duration = 0        self.log_file = ""        self.job_id = None        self.retry_count = 0        self.retry_started = False  # Track if retry has been started        self.error_msg = ""        self.coverage_db = ""        self.estimated_duration = 0  # Estimated duration (seconds)        self.seed = None        self.opts = []        self.is_retry = False  # Track if this is a retry case            def start(self):        """Start test"""        self.start_time = time.time()        self.status = "RUNNING"            def finish(self, status: str, error_msg: str = ""):        """Complete test"""        self.end_time = time.time()        self.duration = self.end_time - self.start_time if self.start_time else 0                # Handle retry statuses        if self.is_retry or self.retry_count > 0:            if status == "PASS":                self.status = "RERUN PASS"            elif status == "FAIL":                self.status = "RERUN FAIL"            else:                self.status = status        else:            self.status = status                    self.error_msg = error_msg            def get_duration_str(self) -> str:        """Get formatted duration string"""        if self.duration == 0:            return "N/A"                hours = int(self.duration // 3600)        minutes = int((self.duration % 3600) // 60)        seconds = int(self.duration % 60)                if hours > 0:            return f"{hours}h{minutes}m{seconds}s"        elif minutes > 0:            return f"{minutes}m{seconds}s"        else:            return f"{seconds}s"
class RegressionRunner:    """Regression test runner"""        def __init__(self, args):        self.args = args        self.tests = []        self.results = {}        self.start_time = time.time()                # Create timestamp for regression run        self.regression_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')                # Extract directory name from current simulation directory        current_dir = os.path.basename(os.getcwd())        # Extract xxx part from sim_xxx pattern        if current_dir.startswith('sim_'):            dir_suffix = current_dir[4:]  # Remove 'sim_' prefix        else:            dir_suffix = ''                self.regression_dir_name = f"regression_{dir_suffix}_{self.regression_timestamp}"                # Set up directories based on output path        self._setup_directories()                self.history_db_file = Path("test_history.json")  # Historical data file        self.test_history = self.load_test_history()  # Load historical test data        self.job_ids = []        self.submitted_jobs = []  # Track submitted job IDs        self.submitted_results = []  # Track submitted test results        # Map job_id -> full opcode used for submission, to allow direct resubmission without lookups        self.job_meta: Dict[str, Dict] = {}        self.running_jobs = 0  # Track number of running jobs (RUN status)        self.pending_jobs = 0  # Track number of pending jobs (PEND status)        self.lock = threading.Lock()        # Delay first status summary until the print interval elapses (default: 30 minutes)        self.last_status_print = time.time()        self.status_print_interval = args.status_interval * 60  # Status print interval (seconds)        self._stop_status_thread = False  # Control status print thread stop                # Create necessary directories immediately        self._create_directories()                # Initialize real-time report generation        self.report_update_interval = 30  # Update report every 30 seconds        self.last_report_update = time.time()        self.real_time_report_path = self.report_dir / "zregress_report.log"                # Load error monitor state if exists        self.load_error_monitor_state()                # Initialize error monitoring        self.error_monitor_interval = args.error_monitor_interval * 60  # Convert minutes to seconds        self.last_error_monitor_time = time.time()        self.log_read_positions = {}  # Track last read position for each log file        # Track last time each log file produced new content; used to detect hung simulations        self.log_last_update_times: Dict[str, float] = {}        # Configurable hang timeout in seconds (no new log lines)        self.hang_timeout_seconds = (            getattr(args, 'hang_timeout_minutes', 30) * 60            if hasattr(args, 'hang_timeout_minutes') and args.hang_timeout_minutes is not None            else 30 * 60        )                # Configurable PEND timeout in seconds (for jobs waiting for resources)        self.pend_timeout_seconds = (            getattr(args, 'pend_timeout_minutes', None) * 60            if hasattr(args, 'pend_timeout_minutes') and args.pend_timeout_minutes is not None            else None        )        self.error_keywords = ['UVM_ERROR', 'UVM_FATAL', 'Solver failed', 'FATAL', 'Error', 'Offending']                # Set up signal handling        signal.signal(signal.SIGINT, self.signal_handler)        signal.signal(signal.SIGTERM, self.signal_handler)                # Initialize random seed for better randomization        random.seed(time.time())                # Add flag to control validation frequency        self._validation_count = 0        self._last_validation_time = 0                # Auto-restart configuration        self.auto_restart = getattr(args, 'auto_restart', False)        self.restart_interval_hours = getattr(args, 'restart_interval_hours', None)        self.restart_count = 0  # Track number of restarts        self.max_restarts = getattr(args, 'max_restarts', None)  # Maximum number of restarts (None = unlimited)        self.first_run_start_time = time.time()  # Track first run start time for interval-based restart            def _setup_directories(self):        """Set up regression directories"""        # Set up regression directory        if hasattr(self.args, 'output_dir') and self.args.output_dir:            base_output_dir = Path(self.args.output_dir)            self.regression_dir = base_output_dir / self.regression_dir_name        else:            self.regression_dir = Path(self.regression_dir_name)                # Set up subdirectories            self.log_dir = self.regression_dir / "logs"            self.report_dir = self.regression_dir / "report_log"            self.coverage_dir = self.regression_dir / "coverage"            self.wave_dir = self.regression_dir / "waves"            def _create_directories(self):        """Create all necessary directories"""        self.regression_dir.mkdir(parents=True, exist_ok=True)        self.log_dir.mkdir(parents=True, exist_ok=True)        self.report_dir.mkdir(parents=True, exist_ok=True)        self.coverage_dir.mkdir(parents=True, exist_ok=True)        self.wave_dir.mkdir(parents=True, exist_ok=True)                # Directory setup completed            def _get_output_dir_path(self):        """Get the output directory path"""        if hasattr(self.args, 'output_dir'):            return self.args.output_dir        return "."            def load_test_history(self) -> Dict:        """Load historical test data"""        if not self.history_db_file.exists():            return {}                    try:            with open(self.history_db_file, 'r') as f:                return json.load(f)        except Exception as e:            print(f"{Colors.YELLOW}Warning: Unable to load historical test data: {e}{Colors.END}")            return {}                def save_test_history(self):        """Save historical test data"""        # Update historical data        for result_key, result in self.results.items():            if result.status in ["PASS", "RERUN PASS"] and result.duration > 0:                test_config_key = f"{result.name}:{result.config}"                                # If it's a new test, initialize history record                if test_config_key not in self.test_history:                    self.test_history[test_config_key] = {                        'durations': [],                        'avg_duration': 0,                        'last_duration': 0,                        'count': 0                    }                                # Update historical data                history = self.test_history[test_config_key]                history['durations'].append(result.duration)                # Only keep the last 10 runs                if len(history['durations']) > 10:                    history['durations'] = history['durations'][-10:]                history['avg_duration'] = sum(history['durations']) / len(history['durations'])                history['last_duration'] = result.duration                history['count'] += 1                # Save to file        try:            with open(self.history_db_file, 'w') as f:                json.dump(self.test_history, f, indent=2)        except Exception as e:            print(f"{Colors.YELLOW}Warning: Unable to save historical test data: {e}{Colors.END}")                def estimate_test_duration(self, test_name: str, config: str) -> float:        """Estimate test duration (seconds)"""        test_config_key = f"{test_name}:{config}"                # If historical data exists, use average duration        if test_config_key in self.test_history:            return self.test_history[test_config_key]['avg_duration']                # If no specific test history, try to use average of tests with same config        config_tests = [k for k in self.test_history.keys() if k.endswith(f":{config}")]        if config_tests:            avg_duration = sum(self.test_history[k]['avg_duration'] for k in config_tests) / len(config_tests)            return avg_duration                    # If no historical data at all, use default value (5 minutes)        return 300            def signal_handler(self, signum, frame):        """Signal handler"""        print(f"\n{Colors.YELLOW}Received signal {signum}, cleaning up...{Colors.END}")        self.cleanup()        sys.exit(1)            def cleanup(self):        """Clean up resources"""        # Save error monitor state        self.save_error_monitor_state()                if self.args.mode == "lsf" and self.job_ids:            print(f"{Colors.YELLOW}Cancelling LSF jobs...{Colors.END}")            for job_id in self.job_ids:                try:                    subprocess.run(["bkill", str(job_id)],                                  stdout=subprocess.PIPE,                                 stderr=subprocess.PIPE,                                 timeout=10)                except:                    pass                        def load_test_list(self, test_file: str, default_config: str = None) -> List[Tuple[str, str]]:        """Load test list, returns list of (test_name, config)"""        tests = []        try:            with open(test_file, 'r') as f:                for line in f:                    line = line.strip()                    if line and not line.startswith('#'):                        # Check if line contains config info (format: test_name:config)                        if ':' in line:                            test_name, config = line.split(':', 1)                            tests.append((test_name.strip(), config.strip()))                        else:                            # Use default config                            tests.append((line, default_config))        except FileNotFoundError:            print(f"{Colors.RED}Error: Test list file not found {test_file}{Colors.END}")            sys.exit(1)                    return tests        def load_test_cases(self, test_files: List[str]) -> List[Dict]:        """Load the test define"""        cases = []        for file in test_files:            with open(file, "r") as f:                cases.extend(json.load(f))        return cases        def load_failed_regression_cases(self, failed_regression_file: str) -> List[Dict]:        """Load failed test cases from failed regression JSON file"""        try:            with open(failed_regression_file, "r") as f:                failed_cases = json.load(f)                        print(f"Loaded {len(failed_cases)} failed test cases from {failed_regression_file}")                        # Convert failed regression format back to standard test case format            standard_cases = []            for failed_case in failed_cases:                # Extract the original test case data, removing the failure-specific fields                standard_case = {}                for key, value in failed_case.items():                    if not key.startswith('actual_') and key not in ['log_file', 'retry_count', 'failure_timestamp', 'original_repeat']:                        standard_case[key] = value                                # Restore original repeat count if it was modified                if 'original_repeat' in failed_case:                    standard_case['repeat'] = failed_case['original_repeat']                                standard_cases.append(standard_case)                        print(f"Converted {len(standard_cases)} failed test cases to standard format")            return standard_cases                    except Exception as e:            print(f"Error loading failed regression file {failed_regression_file}: {e}")            return []        def load_regression_list_cases(self, regression_list_file: str) -> List[Dict]:        """Load test cases from regression list JSON file"""        try:            with open(regression_list_file, "r") as f:                test_cases = json.load(f)                        print(f"Loaded {len(test_cases)} test cases from regression list: {regression_list_file}")                        # Validate that each test case has required fields            valid_cases = []            for i, case in enumerate(test_cases):                if not isinstance(case, dict):                    print(f"Warning: Test case {i} is not a dictionary, skipping")                    continue                                if 'name' not in case:                    print(f"Warning: Test case {i} missing 'name' field, skipping")                    continue                                # Set default values for optional fields                if 'config' not in case:                    case['config'] = 'default'                if 'repeat' not in case:                    case['repeat'] = 1                if 'timeout' not in case:                    case['timeout'] = 60                if 'opts' not in case:                    case['opts'] = []                if 'group' not in case:                    case['group'] = ['default']                                valid_cases.append(case)                        print(f"Validated {len(valid_cases)} test cases from regression list")            return valid_cases                    except Exception as e:            print(f"Error loading regression list file {regression_list_file}: {e}")            return []        def filter_cases(self, cases: List[Dict], groups: List[str]) -> List[Dict]:        """Select the group by tag"""        return [case for case in cases if set(groups).issubset(set(case["group"]))]  # get test cases by group        def submit_compile(self, que: str, dienum: str, rtl_ver: str, mode: str, define: str = None) -> Dict:        """Submit the elab and compile"""        result = {            "name": "compile",            "status": "PENDING",        }                try:            # Build output directory path: self.args.output_dir            output_dir = self.args.output_dir                        # Using output directory for compile                        # Construct bsub command            cmd = ["bsub"]            # Add LSF parameters            cmd.extend(["-q", que])                        # Add resource reservation for compile job            resource_requests = []                        # Add memory reservation if specified            if hasattr(self.args, 'memory') and self.args.memory is not None:                memory_mb = self.args.memory * 1024  # Convert GB to MB                resource_requests.append(f"rusage[mem={memory_mb}]")                # Memory reservation configured for compile job                        # Add CPU selection for compile job (always request at least 1 CPU)            cpu_cores = getattr(self.args, 'cpu_cores', 1)                        # Build resource request string with select and rusage            resource_string = f"select[ncpus>={cpu_cores}]"            if resource_requests:                resource_string += f" rusage[{','.join(resource_requests)}]"                        cmd.extend(["-R", resource_string])            # Resource request configured for compile job                        # Set job name and output            cmd.extend([                "-J", f"pre_jobs",                "make",                f'pre_full_run',                #f'DUT_VER={rtl_ver}',                #f'die_num={dienum}',                #f'WORK_DIR={output_dir}',                f'mode={mode}'                #f'p2_mode={mode}'            ])                        if define is not None:                cmd.extend([f'def+={define}'])                        # Compile command prepared                        # Submit job            output = subprocess.check_output(cmd, shell=False)            job_id = self.parse_job_id(output)            result["job_id"] = job_id            result["status"] = "SUBMITTED"                    except subprocess.CalledProcessError as e:            result["status"] = "SUBMIT_FAIL"            result["error"] = str(e)        except Exception as e:            result["status"] = f"ERROR: {str(e)}"                    return result        def gen_test_case(self, case: Dict, w_dir: str, log_dir: str, que: str, specified_seed: str = None) -> List[Dict]:        """Generate test case commands"""        opcodes = []                for repeat in range(case["repeat"]):            # Construct bsub command            cmd = ["bsub"]            # Add LSF parameters            cmd.extend(["-q", que])                        # Memory reservation handling            memory_gb = None                        # Priority 1: Check if memory is specified in the test case JSON            if "memory" in case and case["memory"]:                try:                    memory_gb = int(case["memory"])                    # Using memory from JSON configuration                except (ValueError, TypeError):                    print(f"  Warning: Invalid memory value in JSON: {case['memory']}")                        # Priority 2: Use command line argument if JSON doesn't have memory            if memory_gb is None and hasattr(self.args, 'memory') and self.args.memory is not None:                memory_gb = self.args.memory                # Using memory from command line                        # Add resource reservation to bsub command            resource_requests = []                        # Add memory reservation if specified            if memory_gb:                memory_mb = memory_gb * 1024  # Convert GB to MB                resource_requests.append(f"rusage[mem={memory_mb}]")                # Memory reservation configured                        # Add CPU selection (always request at least 1 CPU)            cpu_cores = getattr(self.args, 'cpu_cores', 1)                        # Build resource request string with select and rusage            resource_string = f"select[ncpus>={cpu_cores}]"            if resource_requests:                resource_string += f" rusage[{','.join(resource_requests)}]"                        cmd.extend(["-R", resource_string])            # Resource request configured                        # Legacy memory handling (commented out in original)            # if "memory" in case:            #     if case["memory"] != "":            #         cmd.extend(["-M", str(self.parse_memory(case["memory"]))])                        # Use specified seed if provided, otherwise generate unique seed            if specified_seed is not None:                seed = int(specified_seed)                # Using specified seed for repeat            else:                # Generate unique seed for each test case, opts, and repeat                # Include opts in seed generation to ensure different opts get different seeds                opts_str = "_".join(case["opts"]) if case["opts"] else "no_opts"                unique_seed_base = hash(case["name"] + opts_str + str(repeat) + str(int(time.time() * 1000)))                seed = abs(unique_seed_base) % 10000                # Generated seed for repeat with opts            if "lmn" in case:                lmn = case["lmn"]            else:                lmn = ""                        # Set job name and output            cmd.extend([                "-J", f"TEST_{case['name']}_{repeat}",                "make",                f'batch_run',                f'tc={case["name"]}',                f'pl=UVM_LOW',                f'timestamp=N',                f'timeout={case["timeout"]}',                f'WORK_DIR={w_dir}',                f'LOGDIR={str(self.log_dir)}',  # Point to the logs directory                f'WAVEDIR={str(self.wave_dir)}',  # Add wave directory parameter                f'wave={"fsdb" if self.args.wave else "null"}',                f'seed={seed}',                f'lmn={lmn}'            ])                        # Add coverage parameter if specified            if hasattr(self.args, 'cov') and self.args.cov:                cmd.extend([f'cov={self.args.cov}'])                # Coverage parameter configured                        # Debug: print timeout value            # Test case timeout configured                        # Add VCS optimization options            if hasattr(self.args, 'vcs_optimize') and self.args.vcs_optimize:                vcs_cores = getattr(self.args, 'vcs_cores', 4)                cmd.extend([f'opts+=+VCS_PARALLEL={vcs_cores}'])                cmd.extend([f'opts+=+VCS_OPTIMIZE=1'])                # VCS optimization configured                    if hasattr(self.args, 'vcs_xa') and self.args.vcs_xa:                cmd.extend([f'opts+=+VCS_XA=1'])                # VCS-XA acceleration configured                        # Add optional parameters            # Adding opts configuration            for opt in case["opts"]:                if opt:  # Only add non-empty opts                    cmd.extend([f'opts+=+{opt}'])                    # Added opt configuration                        # Submit job - use the regression-specific log directory            # Create test-specific log directory under logs/            # Build log file name robustly to avoid extra underscores when fields are empty            opts_str = "_".join([o for o in (case.get("opts") or []) if o])            test_log_dir = self.log_dir / case['name']            test_log_dir.mkdir(parents=True, exist_ok=True)            name_parts = [case['name'], str(seed)]            if opts_str:                name_parts.append(opts_str)            else:                name_parts.append("no_opts")            if lmn:                name_parts.append(lmn)            safe_name = "_".join(name_parts)            log_file = str(test_log_dir / f"{safe_name}.log")            # Log file configured                        opcodes.append({                "cmd": cmd,                "case": case,                "id": repeat,                "log_path": log_file,                "seed": str(seed)            })                return opcodes        def submit_test_case(self, opcode: Dict) -> Dict:        """Submit one test to LSF"""        result = {            "name": opcode["case"].get("name", "unknown"),            "status": "PENDING",            "seed": opcode["seed"],            "id": opcode["id"]        }                try:            # Submit cmd with detailed error capture            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} Submitting job: {opcode['case'].get('name', 'unknown')} seed={opcode['seed']}")            print(f"INFO: {timestamp} Command: {' '.join(opcode['cmd'])}")                        # Use subprocess.run to capture both stdout and stderr            process = subprocess.run(opcode["cmd"], shell=False,                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,                                   universal_newlines=True, timeout=120)                        # Check return code            if process.returncode != 0:                result["status"] = "SUBMIT_FAIL"                result["error"] = f"Command failed with return code {process.returncode}"                result["stdout"] = process.stdout                result["stderr"] = process.stderr                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid UNKNOWN] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} SUBMIT_FAIL")                print(f"INFO: {timestamp} Return code: {process.returncode}")                print(f"INFO: {timestamp} STDOUT: {process.stdout}")                print(f"INFO: {timestamp} STDERR: {process.stderr}")                return result                        # Parse job ID from output            job_id = self.parse_job_id(process.stdout.encode())                        if job_id == "UNKNOWN":                result["status"] = "SUBMIT_FAIL"                result["error"] = "Failed to parse job ID from LSF output"                result["stdout"] = process.stdout                result["stderr"] = process.stderr                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid {job_id}] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} SUBMIT_FAIL")                print(f"INFO: {timestamp} LSF Output: {process.stdout}")                print(f"INFO: {timestamp} LSF Error: {process.stderr}")            else:                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                opts_str = "_".join(opcode['case'].get('opts', [])) if opcode['case'].get('opts') else "no_opts"                print(f"INFO: {timestamp} [jobid {job_id}] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} SUBMITTED")                result["job_id"] = job_id                result["status"] = "SUBMITTED"                result["error"] = ''                result["stdout"] = process.stdout                result["stderr"] = process.stderr                if job_id not in self.submitted_jobs:                    self.submitted_jobs.append(job_id)                # Store additional test case info for later reference                result["case_name"] = opcode['case'].get('name', 'unknown')                result["case_seed"] = opcode['seed']                # Persist the full opcode for this job so FAIL handling can resubmit directly                try:                    self.job_meta[str(job_id)] = copy.deepcopy(opcode)                    result["opcode"] = self.job_meta[str(job_id)]                except Exception:                    # Best-effort; do not block on deepcopy issues                    self.job_meta[str(job_id)] = opcode                                # Also update the corresponding TestResult object                # Use the full unique key to find the correct TestResult                test_name = opcode['case'].get('name', 'unknown')                config = opcode['case'].get('config', 'default')                seed = opcode['seed']                opts_str = "_".join(opcode['case'].get('opts', [])) if opcode['case'].get('opts') else "no_opts"                unique_key = f"{test_name}:{config}:{seed}:{opts_str}"                # Store unique_key alongside opcode for direct updates later                try:                    self.job_meta[str(job_id)]["unique_key"] = unique_key                except Exception:                    pass                result["unique_key"] = unique_key                                if unique_key in self.results:                    self.results[unique_key].job_id = job_id                    self.results[unique_key].seed = seed                    if opcode.get('log_path'):                        self.results[unique_key].log_file = opcode['log_path']                    print(f"DEBUG: Updated TestResult {unique_key} with job_id {job_id}")                else:                    print(f"Warning: TestResult not found for key: {unique_key}")                    except subprocess.TimeoutExpired as e:            result["status"] = "SUBMIT_FAIL"            result["error"] = f"Command timeout: {str(e)}"            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid UNKNOWN] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} SUBMIT_FAIL: Timeout")        except subprocess.CalledProcessError as e:            result["status"] = "SUBMIT_FAIL"            result["error"] = str(e)            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid UNKNOWN] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} SUBMIT_FAIL: {str(e)}")        except Exception as e:            result["status"] = f"ERROR: {str(e)}"            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid UNKNOWN] {opcode['case'].get('name', 'unknown')} seed={opcode['seed']} ERROR: {str(e)}")                    return result        def get_test_info_by_job_id(self, job_id: str) -> Optional[Dict]:        """Get test case information by job ID"""        # First search through submitted results to find matching job_id        for result in getattr(self, 'submitted_results', []):            if result.get('job_id') == job_id:                # Try multiple field names to find the test name                test_name = result.get('case_name') or result.get('name')                test_seed = result.get('case_seed') or result.get('seed')                                if test_name and test_seed:                return {                        'name': test_name,                        'seed': test_seed,                    'id': result.get('id', 'unknown')                }                # If not found in submitted_results, search through self.results        for result_key, result in self.results.items():            if hasattr(result, 'job_id') and result.job_id == job_id:                return {                    'name': result.name,                    'seed': getattr(result, 'seed', 'unknown'),                    'id': getattr(result, 'id', 'unknown')                }                # Debug info (muted)        # print(f"DEBUG: Could not find test info for job_id {job_id}")        # print(f"DEBUG: submitted_results count: {len(getattr(self, 'submitted_results', []))}")        # print(f"DEBUG: self.results count: {len(self.results)}")        # for i, result in enumerate(getattr(self, 'submitted_results', [])[:3]):        #     print(f"DEBUG: submitted_results[{i}]: {result}")        # for i, (key, result) in enumerate(list(self.results.items())[:3]):        #     print(f"DEBUG: results[{i}] {key}: job_id={getattr(result, 'job_id', 'None')}")                return None        def _resubmit_from_stored_opcode(self, job_id: str):        """Directly resubmit a failed job using the stored opcode, avoiding any name/seed lookup."""        stored = self.job_meta.get(str(job_id))        if not stored:            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid {job_id}] No stored opcode found; cannot direct-resubmit")            return                # CRITICAL FIX: Check retry limit before proceeding        current_retry_attempt = stored.get('retry_attempt', 0)        max_retries = getattr(self.args, 'retry', 0)                if max_retries <= 0:            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid {job_id}] Retry disabled (max_retries={max_retries})")            return                    if current_retry_attempt >= max_retries:            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid {job_id}] Max retries reached ({current_retry_attempt}/{max_retries}), stopping retry")            return                try:            # Clone and mark as retry while keeping same seed/opts/config            opcode = copy.deepcopy(stored)            opcode['retry_attempt'] = current_retry_attempt + 1            opcode['retry_seed'] = opcode.get('seed', 'unknown')        except Exception:            opcode = stored            opcode['retry_attempt'] = current_retry_attempt + 1            opcode['retry_seed'] = opcode.get('seed', 'unknown')
        # Backup the original log before resubmitting to avoid it being overwritten by the retry        try:            original_log_path = self.get_test_log_path_by_job_id(job_id)            if original_log_path and os.path.exists(original_log_path):                p = Path(original_log_path)                backup_path = p.with_name(p.stem + '_bak.log') if p.suffix == '.log' else Path(str(p) + '_bak.log')                if not backup_path.exists():                    os.rename(str(p), str(backup_path))                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Backed up log: {p} -> {backup_path}")        except Exception as _e:            print(f"WARNING: Failed to backup log for job {job_id}: {_e}")
        # Force wave dump on retry: ensure wave=fsdb in command        try:            cmd_list = opcode.get('cmd', [])            if isinstance(cmd_list, list):                replaced = False                for i, token in enumerate(cmd_list):                    if isinstance(token, str) and token.startswith('wave='):                        if token != 'wave=fsdb':                            cmd_list[i] = 'wave=fsdb'                        replaced = True                        break                if not replaced:                    cmd_list.append('wave=fsdb')                # Keep rerun tag consistent where applicable for standard resubmits (no harm if already present)                if not any(isinstance(t, str) and t.startswith('lmn=') for t in cmd_list):                    cmd_list.append('lmn=rerun')                opcode['cmd'] = cmd_list        except Exception:            pass
        # Ensure corresponding TestResult exists/updated based on unique_key (bypass name/seed mapping)        unique_key = stored.get('unique_key')        if not unique_key:            # Fallback: compute from opcode            test_name = opcode.get('case', {}).get('name', 'unknown')            config = opcode.get('case', {}).get('config', 'default')            seed_val = opcode.get('seed', 'unknown')            opts_str = "_".join(opcode.get('case', {}).get('opts', [])) if opcode.get('case', {}).get('opts') else "no_opts"            unique_key = f"{test_name}:{config}:{seed_val}:{opts_str}"        else:            # Parse parts just for constructing missing TestResult if needed            try:                name_part, config_part, seed_part, _ = unique_key.split(':', 3)            except ValueError:                name_part = opcode.get('case', {}).get('name', 'unknown')                config_part = opcode.get('case', {}).get('config', 'default')                seed_part = opcode.get('seed', 'unknown')
        # Create or update TestResult entry directly by unique_key        try:            if unique_key not in self.results:                # Build a new TestResult with available meta                new_name = locals().get('name_part', opcode.get('case', {}).get('name', 'unknown'))                new_cfg = locals().get('config_part', opcode.get('case', {}).get('config', 'default'))                self.results[unique_key] = TestResult(new_name, new_cfg)                self.results[unique_key].seed = locals().get('seed_part', opcode.get('seed', 'unknown'))                self.results[unique_key].opts = opcode.get('case', {}).get('opts', [])                try:                    self.results[unique_key].estimated_duration = self.estimate_test_duration(new_name, new_cfg)                except Exception:                    pass            else:                # Update existing TestResult for retry                existing_result = self.results[unique_key]                existing_result.retry_count = opcode.get('retry_attempt', 1)                existing_result.is_retry = True                existing_result.retry_started = True                        # Create a new unique key for retry cases to track them separately            retry_unique_key = f"{unique_key}_retry_{opcode.get('retry_attempt', 1)}"            if retry_unique_key not in self.results:                retry_name = locals().get('name_part', opcode.get('case', {}).get('name', 'unknown'))                retry_cfg = locals().get('config_part', opcode.get('case', {}).get('config', 'default'))                retry_result = TestResult(retry_name, retry_cfg)                retry_result.seed = locals().get('seed_part', opcode.get('seed', 'unknown'))                retry_result.opts = opcode.get('case', {}).get('opts', [])                retry_result.retry_count = opcode.get('retry_attempt', 1)                retry_result.is_retry = True                retry_result.retry_started = True                try:                    retry_result.estimated_duration = self.estimate_test_duration(retry_name, retry_cfg)                except Exception:                    pass                self.results[retry_unique_key] = retry_result        except Exception:            pass
        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        case_name = opcode.get('case', {}).get('name', 'unknown')        case_seed = opcode.get('seed', 'unknown')        print(f"INFO: {timestamp} [jobid {job_id}] Directly resubmitting FAIL case {case_name} seed={case_seed} (retry {opcode.get('retry_attempt', 1)}/{max_retries})")                # Submit the retry job        result = self.submit_test_case(opcode)        if result["status"] == "SUBMITTED":            # Store the retry job metadata            retry_job_id = result["job_id"]            opcode['unique_key'] = retry_unique_key            self.job_meta[str(retry_job_id)] = opcode                        # Update the retry TestResult with the new job ID            if retry_unique_key in self.results:                self.results[retry_unique_key].job_id = retry_job_id                self.results[retry_unique_key].status = "PENDING"                        # Add to submitted jobs list            if retry_job_id not in self.submitted_jobs:                self.submitted_jobs.append(retry_job_id)                        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid {retry_job_id}] {case_name} seed={case_seed} Submitted (retry {opcode.get('retry_attempt', 1)}/{max_retries})")        else:            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} [jobid {job_id}] Retry submission failed: {result.get('error', 'Unknown error')}")
    def update_test_result_with_job_info(self, test_name: str, job_id: str, seed: str, log_file: str = None):        """Update TestResult object with job information"""        for result_key, result in self.results.items():            if result.name == test_name:                result.job_id = job_id                # Store seed information                if not hasattr(result, 'seed'):                    result.seed = seed                # Store log file path                if log_file:                    result.log_file = log_file                # Update status to RUNNING when job is submitted                if result.status == "PENDING":                    result.start()  # This will set status to RUNNING and start_time                break        def update_test_result_status(self, job_id: str, status: str):        """Update test result status in self.results"""        # Find the test result by job_id        for result_key, result in self.results.items():            if hasattr(result, 'job_id') and result.job_id == job_id:                result.finish(status, "")                break                # Also check in submitted_results        for result in getattr(self, 'submitted_results', []):            if result.get('job_id') == job_id:                result['status'] = status                break        def _update_test_result_status(self, unique_key: str, job_id: str, seed: str, status: str = "PENDING"):        """Helper method to update TestResult status consistently"""        if unique_key in self.results:            self.results[unique_key].job_id = job_id            self.results[unique_key].seed = seed            self.results[unique_key].status = status            return True        else:            print(f"Warning: TestResult not found for key: {unique_key}")            return False        def get_test_status_display(self, job_id: str, status: str) -> str:        """Get formatted test status display string"""        test_info = self.get_test_info_by_job_id(job_id)        if test_info:            return f"[jobid {job_id}] {test_info['name']} seed={test_info['seed']}"        else:            # Unknown mapping happens when TestResult hasn't been recorded yet; mute noisy label            return f"[jobid {job_id}]"        def parse_job_id(self, output: bytes) -> str:        """Parse job ID from LSF output"""        try:            output_str = output.decode('utf-8')            # Look for pattern like "Job <12345> is submitted to queue <queue_name>"            match = re.search(r'Job <(\d+)>', output_str)            if match:                return match.group(1)            else:                return "UNKNOWN"        except Exception:            return "UNKNOWN"        def parse_memory(self, memory_str: str) -> int:        """Parse memory string to MB"""        try:            if memory_str.endswith('GB'):                return int(float(memory_str[:-2]) * 1024)            elif memory_str.endswith('MB'):                return int(float(memory_str[:-2]))            else:                return int(memory_str)        except Exception:            return 4000  # Default to 4GB        def run_compile_and_regression(self, dienum: str, rtl_ver: str, mode: str, define: str = None) -> bool:        """Run complete compile and regression flow, returns True if successful, False if failed"""        print(f"{Colors.BLUE}=== Starting Compile and Regression Flow ==={Colors.END}")                # Build output directory path: self.args.output_dir        output_dir = self.args.output_dir                print(f"Using compile output directory: {output_dir}")        print(f"Using regression directory: {self.regression_dir}")                # Check if compile should be skipped        skip_compile = self.should_skip_compile()                if skip_compile:            print(f"{Colors.YELLOW}Compile step is set to be bypassed{Colors.END}")                        # Check if compile files already exist            if self.check_compile_files_exist(output_dir, dienum, rtl_ver, mode):                print(f"{Colors.GREEN}Existing compile files found, skipping compile step{Colors.END}")                compile_required = False            else:                print(f"{Colors.YELLOW}No existing compile files found, compile step is required{Colors.END}")                compile_required = True        else:            compile_required = True                # Step 1: Submit compile job (if required)        if compile_required:            print(f"Step 1: Submitting compile job...")            compile_result = self.submit_compile(self.args.queue, dienum, rtl_ver, mode, define)                        if compile_result["status"] == "SUBMITTED":                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid {compile_result['job_id']}] compile_job SUBMITTED")                                # Wait for compile job to complete                print(f"Waiting for compile job to complete...")                compile_success = self.wait_for_job_completion(compile_result["job_id"])                                # If compile failed, exit immediately                if not compile_success:                    print(f"{Colors.RED}Compilation failed! Exiting without running regression tests.{Colors.END}")                    return False                                # Verify compile was successful by checking for output files                if not self.check_compile_files_exist(output_dir, dienum, rtl_ver, mode):                    print(f"{Colors.RED}Error: Compile job completed but no output files found{Colors.END}")                    print(f"{Colors.RED}Compilation verification failed! Exiting without running regression tests.{Colors.END}")                    return False                                    print(f"{Colors.GREEN}Compile job completed successfully{Colors.END}")            else:                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid UNKNOWN] compile_job SUBMIT_FAIL: {compile_result.get('error', 'Unknown error')}")                return False        else:            print(f"Step 1: Compile step skipped (bypass enabled and files exist)")                # Step 2: Generate and submit test cases        print(f"Step 2: Generating and submitting test cases...")                # Load test cases        if hasattr(self.args, 'failed_regression') and self.args.failed_regression:            # Load from failed regression file            print(f"Loading failed test cases from: {self.args.failed_regression}")            test_cases = self.load_failed_regression_cases(self.args.failed_regression)            if not test_cases:                print(f"{Colors.RED}Error: No test cases loaded from failed regression file{Colors.END}")                return False            print(f"Loaded {len(test_cases)} failed test cases for re-run")        elif hasattr(self.args, 'list') and self.args.list:            # Load from regression list file in ../def/case_def/ directory            regression_list_path = os.path.join(os.getcwd(), "..", "def", "case_def", self.args.list)            print(f"Loading test cases from regression list: {regression_list_path}")            test_cases = self.load_regression_list_cases(regression_list_path)            if not test_cases:                print(f"{Colors.RED}Error: No test cases loaded from regression list file{Colors.END}")                return False            print(f"Loaded {len(test_cases)} test cases from regression list")        else:            # Load from normal test files            cur_path = os.getcwd()            test_file_list_name = cur_path + "/../def/json_list"            test_file_list = []                        with open(test_file_list_name, 'r') as f:                for line in f:                    if line != '\n':                        file_path = cur_path + "/../def" + line                        test_file_list.append(file_path.replace('\n', ''))                        test_cases = self.load_test_cases(test_file_list)            print(f"Loaded {len(test_cases)} test cases from files")                        # Filter test cases by group (only if groups are specified)            if self.args.groups:                test_cases = self.filter_cases(test_cases, self.args.groups)                print(f"Filtered to {len(test_cases)} test cases for groups: {self.args.groups}")            else:                print(f"No group filter applied, using all {len(test_cases)} test cases")                # Generate test case commands        all_opcodes = []        for case in test_cases:            print(f"Processing test case: {case['name']} with repeat={case.get('repeat', 1)}")            # Use regression-specific log directory for simulation output            sim_output_dir = str(self.regression_dir)            print(f"  Using simulation output directory: {sim_output_dir}")            opcodes = self.gen_test_case(case, output_dir, sim_output_dir, self.args.queue)            all_opcodes.extend(opcodes)                # Remove duplicate opcodes based on unique identifier        print(f"Generated {len(all_opcodes)} test case commands")        print("Removing duplicate opcodes...")                # Create a set to track unique identifiers        seen_identifiers = set()        unique_opcodes = []                for opcode in all_opcodes:            # Create a unique identifier for each opcode            # Combine test name, seed, and repeat ID to ensure uniqueness            unique_id = f"{opcode['case']['name']}_{opcode['seed']}_{opcode['id']}"                        if unique_id not in seen_identifiers:                seen_identifiers.add(unique_id)                unique_opcodes.append(opcode)            else:                # print(f"  Skipping duplicate: {unique_id}")                pass                all_opcodes = unique_opcodes        print(f"After removing duplicates: {len(all_opcodes)} unique test case commands")                # Initialize test results for all test cases - ensure no duplicates        print(f"Initializing test results for {len(all_opcodes)} opcodes...")        unique_test_keys = set()                for opcode in all_opcodes:            test_name = opcode["case"].get("name", "unknown")            config = opcode["case"].get("config", "default")            seed = opcode["seed"]            opts_str = "_".join(opcode["case"].get("opts", [])) if opcode["case"].get("opts") else "no_opts"                        # Create a unique key that combines test name, config, seed, and opts            unique_key = f"{test_name}:{config}:{seed}:{opts_str}"                        if unique_key not in unique_test_keys:                unique_test_keys.add(unique_key)                                if unique_key not in self.results:                    self.results[unique_key] = TestResult(test_name, config)                    self.results[unique_key].seed = seed                    self.results[unique_key].opts = opcode["case"].get("opts", [])                    self.results[unique_key].estimated_duration = self.estimate_test_duration(test_name, config)                    # print(f"  Created TestResult for: {unique_key}")                else:                    # print(f"  TestResult already exists for: {unique_key}")                    pass            else:                # print(f"  Skipping duplicate opcode for: {unique_key}")                pass                print(f"Initialized {len(unique_test_keys)} unique test results")                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"INFO: {timestamp} Submitting test cases with max concurrent limit: {self.args.max_concurrent}")                # Initial job submission up to max_concurrent        initial_submit_count = min(self.args.max_concurrent, len(all_opcodes))        sim_cases_num = len(all_opcodes)        print(f"Initial submission: will submit {initial_submit_count} jobs")                for _ in range(initial_submit_count):            if all_opcodes:                opcode = all_opcodes.pop(0)                result = self.submit_test_case(opcode)                self.submitted_results.append(result)                                if result["status"] == "SUBMITTED":                    # Don't increment running_jobs yet - wait for actual RUN status                    if result["job_id"] not in self.submitted_jobs:                        self.submitted_jobs.append(result["job_id"])                                        # Update corresponding TestResult object                    test_name = opcode["case"].get("name", "unknown")                    config = opcode["case"].get("config", "default")                    seed = opcode["seed"]                    opts_str = "_".join(opcode["case"].get("opts", [])) if opcode["case"].get("opts") else "no_opts"                    unique_key = f"{test_name}:{config}:{seed}:{opts_str}"                                        if self._update_test_result_status(unique_key, result["job_id"], seed, "PENDING"):                        print(f"DEBUG: Updated TestResult {unique_key} with job_id {result['job_id']} in initial submission")                                        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} [jobid {result['job_id']}] {result['name']} seed={result.get('seed', 'unknown')} PENDING")                else:                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} [jobid UNKNOWN] {result['name']} seed={result.get('seed', 'unknown')} SUBMIT_FAIL: {result.get('error', 'Unknown error')}")                                # Add delay between submissions to prevent server overload                time.sleep(2)                print(f"Initial submission complete. Submitted jobs: {len(self.submitted_jobs)}, Pending opcodes: {len(all_opcodes)}")                # Clean any duplicate job IDs that may have been added        self._clean_submitted_jobs()                # CRITICAL FIX: Initialize job status counts after initial submission        if self.submitted_jobs:            print(f"Initializing job status counts for {len(self.submitted_jobs)} submitted jobs...")            # Check current status of all submitted jobs            initial_status_changes = {}            for job_id in self.submitted_jobs:                status = self.check_lsf_job_status(int(job_id))                initial_status_changes[int(job_id)] = status                print(f"DEBUG: Job {job_id} initial status: {status}")                        # Update counts based on initial status            self._update_job_status_counts(initial_status_changes)            print(f"DEBUG: After initialization - running_jobs: {self.running_jobs}, pending_jobs: {self.pending_jobs}")                    # Main loop: monitor jobs and submit new ones as slots become available        last_status_print_time = time.time()  # Track last status print time        last_status_log_time = time.time()  # Track last RERUN status log time (60s)                # Start status monitoring thread for LSF regression        self.status_thread = threading.Thread(target=self._status_print_thread, daemon=True)        self.status_thread.start()        print(f"{Colors.BLUE}Started status monitoring thread for real-time report updates{Colors.END}")                # 提前启动作业监控线程,避免主循环阻塞导致后续 monitor 阶段无法及时覆盖        try:            if not hasattr(self, 'monitor_thread') or not getattr(self, 'monitor_thread', None) or not self.monitor_thread.is_alive():                # Do not pass the shared list reference; let the function pick up the live list                self.monitor_thread = threading.Thread(target=self.monitor_all_jobs, daemon=True)                self.monitor_thread.start()                print(f"{Colors.BLUE}Started early monitor_all_jobs thread{Colors.END}")        except Exception:            pass
        # DEBUG: Print loop condition values        print(f"DEBUG: Loop condition check - all_opcodes: {len(all_opcodes)}, running_jobs: {self.running_jobs}, pending_jobs: {self.pending_jobs}")        print(f"DEBUG: Loop condition result: {bool(all_opcodes or (self.running_jobs > 0 or self.pending_jobs > 0))}")                while all_opcodes or (self.running_jobs > 0 or self.pending_jobs > 0):            # Check for completed jobs and update status counts            monitor_alive = False            try:                monitor_alive = hasattr(self, 'monitor_thread') and self.monitor_thread and self.monitor_thread.is_alive()                                except Exception:                monitor_alive = False                        if self.submitted_jobs and not monitor_alive:                # Lightweight accounting only; lifecycle handled by monitor thread                status_changes = {}                for job_id in self.submitted_jobs[:]:                    status_changes[job_id] = self.check_lsf_job_status(int(job_id))                self._update_job_status_counts(status_changes)                try:                    self.update_real_time_report()                            except Exception:                                pass
                # Print status summary similar to the image format                    try:                        total_reruns = sum(getattr(res, 'retry_count', 0) for _, res in self.results.items())                    pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("PASS", "RERUN PASS"))                    fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("FAIL", "RERUN FAIL"))                    rerun_pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN PASS")                    rerun_fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN FAIL")                    total_test_cases = self.get_total_test_cases_count()                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} Status: Running={self.running_jobs}, Pending={self.pending_jobs}, Had Rerun={total_reruns}, Pass={pass_count}, Fail={fail_count}, RerunPass={rerun_pass_count}, RerunFail={rerun_fail_count}, Total={total_test_cases}")                    except Exception:                    pass            elif monitor_alive:                # When monitor thread owns job lifecycle, perform a lightweight status update only                    try:                    self._update_lsf_job_statuses()                    self.update_real_time_report()                    except Exception:                    pass                        # Submit new jobs if we have capacity and pending opcodes            while all_opcodes and self.running_jobs < self.args.max_concurrent:                opcode = all_opcodes.pop(0)                result = self.submit_test_case(opcode)                self.submitted_results.append(result)                                if result["status"] == "SUBMITTED":                    # Don't increment running_jobs yet - wait for actual RUN status                    if result["job_id"] not in self.submitted_jobs:                        self.submitted_jobs.append(result["job_id"])                                        # Update corresponding TestResult object                    test_name = opcode["case"].get("name", "unknown")                    config = opcode["case"].get("config", "default")                    seed = opcode["seed"]                    opts_str = "_".join(opcode["case"].get("opts", [])) if opcode["case"].get("opts") else "no_opts"                    unique_key = f"{test_name}:{config}:{seed}:{opts_str}"                                        if self._update_test_result_status(unique_key, result["job_id"], seed, "PENDING"):                        print(f"DEBUG: Updated TestResult {unique_key} with job_id {result['job_id']} in main loop")                                        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} [jobid {result['job_id']}] {result['name']} seed={result.get('seed', 'unknown')} PENDING")                                        # Show regression status after each submission                    total_test_cases = self.get_total_test_cases_count()                    self.show_regression_status(self.running_jobs, self.pending_jobs, total_test_cases)                else:                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} [jobid UNKNOWN] {result['name']} seed={result.get('seed', 'unknown')} SUBMIT_FAIL: {result.get('error', 'Unknown error')}")                                # Add delay between submissions to prevent server overload                time.sleep(2)                        # If we're waiting for jobs to complete, add a small delay and show status every 30 seconds            if (self.running_jobs > 0 or self.pending_jobs > 0) and not all_opcodes:                current_time = time.time()                # Show regression status every 30 seconds                if current_time - last_status_print_time >= 30:                    # Update LSF job statuses before showing status                    self._update_lsf_job_statuses()                    total_test_cases = self.get_total_test_cases_count()                    self.show_regression_status(self.running_jobs, self.pending_jobs, total_test_cases)                    last_status_print_time = current_time                                        # Update real-time report every 30 seconds                    self.update_real_time_report()                                # Log detailed status with RERUNS every 60 seconds                if current_time - last_status_log_time >= 60:                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    try:                        total_reruns = sum(getattr(res, 'retry_count', 0) for _, res in self.results.items())                    except Exception:                        total_reruns = 0                    try:                        pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("PASS", "RERUN PASS"))                        fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("FAIL", "RERUN FAIL"))                        rerun_pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN PASS")                        rerun_fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN FAIL")                    except Exception:                        pass_count = fail_count = rerun_pass_count = rerun_fail_count = 0                    print(f"INFO: {timestamp} Status: RUNNING={self.running_jobs}, PENDING={self.pending_jobs}, Total={sim_cases_num}, RERUNS={total_reruns}, Pass={pass_count}, Fail={fail_count}, RerunPass={rerun_pass_count}, RerunFail={rerun_fail_count}")                    last_status_log_time = current_time                time.sleep(3)                # Step 3: Monitor all jobs        print(f"Step 3: Monitoring all jobs...")                # Show final submission summary        successful_submissions = len([r for r in self.submitted_results if r["status"] == "SUBMITTED"])        failed_submissions = len([r for r in self.submitted_results if r["status"] != "SUBMITTED"])        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"INFO: {timestamp} Submission Summary: {successful_submissions} successful, {failed_submissions} failed")                # Clean any duplicate job IDs before final status check        self._clean_submitted_jobs()                # Final status check: ensure all submitted jobs have their final status        print(f"Performing final status check for all submitted jobs...")        for job_id in self.submitted_jobs[:]:  # Use slice copy            status = self.check_lsf_job_status(int(job_id))            if status in ["DONE", "EXIT", "TERM", "KILL"]:                # Update corresponding TestResult object                test_info = self.get_test_info_by_job_id(job_id)                if test_info:                    test_name = test_info['name']                    seed = test_info['seed']                    # Find the correct TestResult object by searching through all results                    found_result = None                    for result_key, result_obj in self.results.items():                        if result_obj.name == test_name and getattr(result_obj, 'seed', '') == seed:                            found_result = result_obj                            break                                        if found_result:                        if status == "DONE":                            # CRITICAL FIX: Even for DONE status, check log file for errors                            log_file_path = self.get_test_log_path_by_job_id(job_id)                            if log_file_path and os.path.exists(log_file_path):                                test_passed = self.check_test_result(log_file_path)                                has_runtime_errors = self._check_for_runtime_errors(log_file_path)                                                                if test_passed and not has_runtime_errors:                                    found_result.finish("PASS", "")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} PASS")                                elif test_passed and has_runtime_errors:                                    # Test passed but had runtime errors - mark as FAIL with error info                                    error_msg = "Test passed but had runtime errors (running but had error)"                                    found_result.finish("FAIL", error_msg)                                    found_result.error_detected = True                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} FAIL (running but had error)")                                else:                                    found_result.finish("FAIL", "Test failed (from log file)")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} FAIL")                            else:                                # No log file available, assume PASS for DONE status                                found_result.finish("PASS", "")                                print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} PASS (no log file)")                        else:                            # LSF显示EXIT/TERM/KILL时,先看仿真log是否已PASS                            log_file_path = self.get_test_log_path_by_job_id(job_id)                            if log_file_path and os.path.exists(log_file_path) and self.check_test_result(log_file_path):                                found_result.finish("PASS", "")                                print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} PASS")                        else:                            found_result.finish("FAIL", f"Job status: {status}")                                print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} [jobid {job_id}] {test_name} seed={seed} FAIL")                    else:                        print(f"Warning: TestResult not found for {test_name} seed={seed}")                                self.submitted_jobs.remove(job_id)                # Additional check: ensure all TestResult objects have correct status        # print(f"Performing additional status validation for all test results...")        for result_key, result_obj in self.results.items():            if result_obj.status == "RUNNING" and hasattr(result_obj, 'job_id') and result_obj.job_id:                # Check if this job is actually completed                try:                    status = self.check_lsf_job_status(int(result_obj.job_id))                    if status in ["DONE", "EXIT", "TERM", "KILL"]:                        if status == "DONE":                            # CRITICAL FIX: Even for DONE status, check log file for errors                            log_file_path = self.get_test_log_path_by_job_id(result_obj.job_id)                            if log_file_path and os.path.exists(log_file_path):                                test_passed = self.check_test_result(log_file_path)                                has_runtime_errors = self._check_for_runtime_errors(log_file_path)                                                                if test_passed and not has_runtime_errors:                                    result_obj.finish("PASS", "")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> PASS")                                elif test_passed and has_runtime_errors:                                    # Test passed but had runtime errors - mark as FAIL                                    error_msg = "Test passed but had runtime errors (running but had error)"                                    result_obj.finish("FAIL", error_msg)                                    result_obj.error_detected = True                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> FAIL (running but had error)")                                else:                                    result_obj.finish("FAIL", "Test failed (from log file)")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> FAIL")                            else:                                # No log file available, assume PASS for DONE status                                result_obj.finish("PASS", "")                                print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> PASS (no log file)")                        else:                            # 再次以日志为准                            log_file_path = self.get_test_log_path_by_job_id(result_obj.job_id)                            if log_file_path and os.path.exists(log_file_path) and self.check_test_result(log_file_path):                                result_obj.finish("PASS", "")                                print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> PASS (from log)")                        else:                            result_obj.finish("FAIL", f"Job status: {status}")                            print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Status corrected: {result_obj.name} -> FAIL")                except Exception as e:                    print(f"Warning: Could not check status for job {result_obj.job_id}: {e}")                # Final validation: ensure no RUNNING status remains if all jobs are done        if len(self.submitted_jobs) == 0:            print(f"All jobs completed, ensuring no RUNNING status remains...")            for result_key, result_obj in self.results.items():                if result_obj.status == "RUNNING":                    # If job is not in submitted_jobs but status is RUNNING,                     # it means the job completed but status wasn't updated                    if hasattr(result_obj, 'job_id') and result_obj.job_id:                        try:                            status = self.check_lsf_job_status(int(result_obj.job_id))                            if status == "DONE":                                # CRITICAL FIX: Even for DONE status, check log file for errors                                log_file_path = self.get_test_log_path_by_job_id(result_obj.job_id)                                if log_file_path and os.path.exists(log_file_path):                                    test_passed = self.check_test_result(log_file_path)                                    has_runtime_errors = self._check_for_runtime_errors(log_file_path)                                                                        if test_passed and not has_runtime_errors:                                        result_obj.finish("PASS", "")                                        print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> PASS")                                    elif test_passed and has_runtime_errors:                                        # Test passed but had runtime errors - mark as FAIL                                        error_msg = "Test passed but had runtime errors (running but had error)"                                        result_obj.finish("FAIL", error_msg)                                        result_obj.error_detected = True                                        print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> FAIL (running but had error)")                                    else:                                        result_obj.finish("FAIL", "Test failed (from log file)")                                        print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> FAIL")                                else:                                    # No log file available, assume PASS for DONE status                                    result_obj.finish("PASS", "")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> PASS (no log file)")                            elif status in ["EXIT", "TERM", "KILL"]:                                log_file_path = self.get_test_log_path_by_job_id(result_obj.job_id)                                if log_file_path and os.path.exists(log_file_path) and self.check_test_result(log_file_path):                                    result_obj.finish("PASS", "")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> PASS (from log)")                                else:                                    result_obj.finish("FAIL", f"Job status: {status}")                                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> FAIL")                        except Exception as e:                            print(f"Warning: Could not check final status for job {result_obj.job_id}: {e}")                    else:                        # No job_id, mark as PENDING                        result_obj.status = "PENDING"                        print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} Final status correction: {result_obj.name} -> PENDING (no job_id)")                # ENHANCED: Final status refresh - recheck all test results based on log files        print(f"{Colors.BLUE}Performing final status refresh based on log files...{Colors.END}")        self._final_status_refresh()                        self.monitor_all_jobs(self.submitted_jobs)                # Stop all monitoring threads after all jobs are completed        print(f"{Colors.BLUE}Stopping all monitoring threads...{Colors.END}")        self._stop_status_thread = True        if hasattr(self, 'status_thread') and self.status_thread.is_alive():            self.status_thread.join(timeout=5)            print(f"{Colors.BLUE}Status monitoring thread stopped{Colors.END}")                # Return True to indicate successful completion        return True        def wait_for_job_completion(self, job_id: str) -> bool:        """Wait for a specific job to complete, returns True if successful, False if failed"""        print(f"Waiting for job {job_id} to complete...")        unknown_count = 0        max_unknown_threshold = 20  # Allow more UNKNOWN status for compile jobs                while True:            status = self.check_lsf_job_status(int(job_id))                        if status == "DONE":                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid {job_id}] compile_job PASS :)")                return True  # Compilation successful            elif status in ["EXIT", "TERM", "KILL"]:                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} [jobid {job_id}] compile_job FAIL :(")                return False  # Compilation failed            elif status == "UNKNOWN":                unknown_count += 1                if unknown_count >= max_unknown_threshold:                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} [jobid {job_id}] compile_job FAIL: unknown_lsf_status")                    return False  # Compilation failed due to unknown status                print(f"Job {job_id} status: {status} (unknown count: {unknown_count})")            else:                # Reset unknown counter for other statuses                unknown_count = 0                print(f"Job {job_id} status: {status}")                        time.sleep(10)                # Add a small delay after job completion to ensure all files are written        print(f"Job {job_id} completed with status: {status}, waiting 5 seconds for file system sync...")        time.sleep(5)        def monitor_all_jobs(self, job_list=None):        if job_list is None:            job_list = self.submitted_jobs            self._clean_submitted_jobs()                    print(f"Monitoring {len(job_list)} submitted jobs...")                max_unknown_count = {}        max_unknown_threshold = 10                while job_list:            completed_jobs = []            job_ids = [int(job_id) for job_id in job_list]            status_map = self.batch_check_job_status(job_ids)            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                        # Show lightweight progress for RUN/PEND            for job_id, status in status_map.items():                if status in ["RUN", "PEND"]:                    print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} {status}")                        for job_id in list(job_list):                int_job_id = int(job_id)                status = status_map.get(int_job_id, "UNKNOWN")                                if status == "DONE":                    # Decide by log contents                    try:                        log_file_path = self.get_test_log_path_by_job_id(job_id)                    except Exception:                        log_file_path = None                    test_passed = False                    has_runtime_errors = False                    if log_file_path and os.path.exists(log_file_path):                        test_passed = self.check_test_result(log_file_path)                        has_runtime_errors = self._check_for_runtime_errors(log_file_path)                                        test_info = self.get_test_info_by_job_id(job_id)                    found_result = None                    if test_info:                        test_name = test_info['name']                        seed = test_info['seed']                        for _, result_obj in self.results.items():                            if result_obj.name == test_name and getattr(result_obj, 'seed', '') == seed:                                found_result = result_obj                                break                                        if test_passed and not has_runtime_errors:                        # Update status considering retry context                        if found_result and found_result.is_retry:                            found_result.finish("RERUN PASS", "")                        else:                            found_result.finish("PASS", "")                        self.update_test_result_status(job_id, "PASS")                        print(f"INFO: {timestamp} {self.get_test_status_display(job_id, 'DONE')} PASS")                        completed_jobs.append(job_id)                    elif test_passed and has_runtime_errors:                        # Test passed but had runtime errors - mark as FAIL                        error_msg = "Test passed but had runtime errors (running but had error)"                        if found_result and found_result.is_retry:                            found_result.finish("RERUN FAIL", error_msg)                        else:                            found_result.finish("FAIL", error_msg)                        found_result.error_detected = True                        self.update_test_result_status(job_id, "FAIL")                        print(f"INFO: {timestamp} {self.get_test_status_display(job_id, 'DONE')} FAIL (running but had error)")                        # Centralized retry on FAIL - let _resubmit_from_stored_opcode handle retry limits                        if getattr(self.args, 'retry', 0) > 0:                            self._resubmit_from_stored_opcode(job_id)                        completed_jobs.append(job_id)                    else:                        # Test failed - mark as FAIL                        if found_result and found_result.is_retry:                            found_result.finish("RERUN FAIL", "DONE but log indicates failure")                        else:                            found_result.finish("FAIL", "DONE but log indicates failure")                        self.update_test_result_status(job_id, "FAIL")                        print(f"INFO: {timestamp} {self.get_test_status_display(job_id, 'DONE')} FAIL")                        # Centralized retry on FAIL - let _resubmit_from_stored_opcode handle retry limits                        if getattr(self.args, 'retry', 0) > 0:                            self._resubmit_from_stored_opcode(job_id)                        completed_jobs.append(job_id)                                        elif status in ["EXIT", "TERM", "KILL"]:                    # Prefer log PASS override if available                    try:                        log_file_path = self.get_test_log_path_by_job_id(job_id)                    except Exception:                        log_file_path = None                    if log_file_path and os.path.exists(log_file_path) and self.check_test_result(log_file_path):                        # Update status considering retry context                        if found_result and found_result.is_retry:                            found_result.finish("PASS", "")                        else:                            self.update_test_result_status(job_id, "PASS")                        print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} PASS")                        completed_jobs.append(job_id)                    else:                        # Update status considering retry context                        if found_result and found_result.is_retry:                            found_result.finish("FAIL", f"Job status: {status}")                        else:                            self.update_test_result_status(job_id, "FAIL")                        print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} FAIL")                        if getattr(self.args, 'retry', 0) > 0:                            # Let _resubmit_from_stored_opcode handle retry limits                            self._resubmit_from_stored_opcode(job_id)                    completed_jobs.append(job_id)                                        elif status == "UNKNOWN":                    # Use log to decide if possible; else threshold-based retry                    try:                        log_file_path = self.get_test_log_path_by_job_id(job_id)                    except Exception:                        log_file_path = None                    if log_file_path and os.path.exists(log_file_path):                        if self.check_test_result(log_file_path):                            self.update_test_result_status(job_id, "PASS")                            print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} PASS (from log)")                            completed_jobs.append(job_id)                        else:                            self.update_test_result_status(job_id, "FAIL")                            print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} FAIL (from log)")                            if getattr(self.args, 'retry', 0) > 0:                                # Retry directly from stored opcode                                self._resubmit_from_stored_opcode(job_id)                            completed_jobs.append(job_id)                    else:                        # Track UNKNOWN streak                        if job_id not in max_unknown_count:                            max_unknown_count[job_id] = 0                        max_unknown_count[job_id] += 1                        if max_unknown_count[job_id] >= max_unknown_threshold:                            print(f"INFO: {timestamp} {self.get_test_status_display(job_id, status)} FAIL: unknown_lsf_status")                            if getattr(self.args, 'retry', 0) > 0:                                self._resubmit_from_stored_opcode(job_id)                            completed_jobs.append(job_id)                        else:                            # Reset UNKNOWN counter for stable statuses                            if job_id in max_unknown_count:                                max_unknown_count[job_id] = 0                        # Remove completed jobs from monitoring list            for job_id in completed_jobs:                if job_id in job_list:                    job_list.remove(job_id)                if job_id in max_unknown_count:                    del max_unknown_count[job_id]                        # Recalculate running/pending counts from current statuses            try:                remaining_ids = [int(j) for j in job_list]                # Build a fresh status map limited to remaining jobs                remaining_statuses = {jid: status_map.get(jid, "UNKNOWN") for jid in remaining_ids}                self.running_jobs = sum(1 for s in remaining_statuses.values() if s == "RUN")                self.pending_jobs = sum(1 for s in remaining_statuses.values() if s == "PEND")            except Exception:                pass                        # Print status summary after job completion            if completed_jobs:                try:                    total_reruns = sum(getattr(res, 'retry_count', 0) for _, res in self.results.items())                    pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("PASS", "RERUN PASS"))                    fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') in ("FAIL", "RERUN FAIL"))                    rerun_pass_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN PASS")                    rerun_fail_count = sum(1 for _, res in self.results.items() if getattr(res, 'status', '') == "RERUN FAIL")                    total_test_cases = self.get_total_test_cases_count()                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"INFO: {timestamp} Completed {len(completed_jobs)} jobs. Status: Running={self.running_jobs}, Pending={self.pending_jobs}, Had Rerun={total_reruns}, Pass={pass_count}, Fail={fail_count}, RerunPass={rerun_pass_count}, RerunFail={rerun_fail_count}, Total={total_test_cases}")                except Exception:                    pass                        if job_list:                time.sleep(5)                # Grace period: if new jobs appear (e.g., retries) after list became empty, resume monitoring        try:            for _ in range(3):                if len(self.submitted_jobs) > 0:                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')} New jobs detected after completion, resuming monitoring...")                    return self.monitor_all_jobs(self.submitted_jobs)                time.sleep(5)        except Exception:            pass
        try:            self.running_jobs = 0            self.pending_jobs = 0        except Exception:            pass        print("All jobs completed!")        self._stop_status_thread = True        if hasattr(self, 'status_thread') and self.status_thread.is_alive():            self.status_thread.join(timeout=5)            print(f"{Colors.BLUE}Stopped status monitoring thread{Colors.END}")                # Also stop monitor thread if it exists        if hasattr(self, 'monitor_thread') and self.monitor_thread.is_alive():            print(f"{Colors.BLUE}Stopping monitor thread{Colors.END}")            # Note: monitor_thread is a daemon thread, it will stop when main thread exits    
        
            def check_lsf_job_status(self, job_id: int) -> str:        """Check LSF job status"""        try:            result = subprocess.run(                ["bjobs", "-noheader", str(job_id)],                stdout=subprocess.PIPE,                stderr=subprocess.PIPE,                universal_newlines=True,  # 使用universal_newlines代替text参数                timeout=30            )                        if result.returncode == 0:                lines = result.stdout.strip().split('\n')                if lines and lines[0]:                    parts = lines[0].split()                    if len(parts) >= 3:                        status = parts[2]  # Status column                        # Map LSF status to our status                        if status in ["RUN", "PEND", "WAIT", "SUSP"]:                            return status                        elif status in ["DONE", "EXIT", "TERM", "KILL"]:                            return status                        else:                            return status                else:                    # Job not found in queue, might have completed                    print(f"INFO: Job {job_id} not found in queue, checking if completed")                    return "UNKNOWN"            else:                # Command failed, print error details                print(f"Warning: bjobs command failed for job {job_id}")                print(f"Return code: {result.returncode}")                print(f"STDOUT: {result.stdout}")                print(f"STDERR: {result.stderr}")                                except subprocess.TimeoutExpired:            print(f"Warning: bjobs command timeout for job {job_id}")        except FileNotFoundError:            print(f"Warning: bjobs command not found, LSF may not be available")        except Exception as e:            print(f"Warning: Error checking job {job_id} status: {e}")                    return "UNKNOWN"        def get_job_details(self, job_id: int) -> Dict:        """Get detailed job information including failure reasons"""        try:            result = subprocess.run(                ["bjobs", "-l", str(job_id)],                stdout=subprocess.PIPE,                stderr=subprocess.PIPE,                universal_newlines=True,                timeout=60            )                        details = {                "job_id": job_id,                "status": "UNKNOWN",                "exit_code": None,                "exit_reason": None,                "submission_time": None,                "start_time": None,                "finish_time": None,                "cpu_time": None,                "max_memory": None,                "output_file": None,                "error_file": None            }                        if result.returncode == 0:                content = result.stdout                lines = content.split('\n')                                for line in lines:                    line = line.strip()                    if "Job Status" in line:                        details["status"] = line.split()[-1]                    elif "Exit Status" in line:                        details["exit_code"] = line.split()[-1]                    elif "Exit Reason" in line:                        details["exit_reason"] = line.split(":", 1)[-1].strip()                    elif "Submitted" in line:                        details["submission_time"] = line.split(":", 1)[-1].strip()                    elif "Started" in line:                        details["start_time"] = line.split(":", 1)[-1].strip()                    elif "Finished" in line:                        details["finish_time"] = line.split(":", 1)[-1].strip()                    elif "CPU time used" in line:                        details["cpu_time"] = line.split(":", 1)[-1].strip()                    elif "MAX MEM" in line:                        details["max_memory"] = line.split(":", 1)[-1].strip()                    elif "Output file" in line:                        details["output_file"] = line.split(":", 1)[-1].strip()                    elif "Error file" in line:                        details["error_file"] = line.split(":", 1)[-1].strip()                        return details                    except Exception as e:            print(f"Warning: Error getting job details for {job_id}: {e}")            return {"job_id": job_id, "status": "UNKNOWN", "error": str(e)}        def batch_check_job_status(self, job_ids: List[int]) -> Dict[int, str]:        """Batch check multiple job statuses to reduce LSF calls"""        if not job_ids:            return {}                try:            # Use bjobs with multiple job IDs to reduce calls            job_id_str = " ".join(map(str, job_ids))            result = subprocess.run(                ["bjobs", "-noheader"] + list(map(str, job_ids)),                stdout=subprocess.PIPE,                stderr=subprocess.PIPE,                universal_newlines=True,  # 使用universal_newlines代替text参数                timeout=45            )                        status_map = {}            if result.returncode == 0:                lines = result.stdout.strip().split('\n')                for line in lines:                    if line.strip():                        parts = line.split()                        if len(parts) >= 3:                            job_id = int(parts[0])                            status = parts[2]                            status_map[job_id] = status                        # Fill in UNKNOWN for jobs not found            for job_id in job_ids:                if job_id not in status_map:                    status_map[job_id] = "UNKNOWN"                                return status_map                    except Exception as e:            print(f"Warning: Batch job status check failed: {e}")            # Fall back to individual checks            return {job_id: self.check_lsf_job_status(job_id) for job_id in job_ids}        

        

                                def check_test_result(self, log_file: str) -> bool:        """Check test result - Enhanced version that prioritizes UVM Report Summary"""        if not os.path.exists(log_file):            return False                    try:            with open(log_file, 'r') as f:                content = f.read()                        # PRIORITY 1: Check UVM Report Summary first - this is the most reliable indicator            try:                severity_block_match = re.search(r"\*\*\s*Report counts by severity[\s\S]*?(?:\n\*\*|\Z)", content, re.IGNORECASE)                if severity_block_match:                    severity_block = severity_block_match.group(0)                    def _extract_count(label: str) -> int:                        m = re.search(rf"{label}\s*:\s*(\d+)", severity_block, re.IGNORECASE)                        return int(m.group(1)) if m else 0                    summary_error_count = _extract_count('UVM_ERROR')                    summary_fatal_count = _extract_count('UVM_FATAL')                                        # If UVM Report Summary shows 0 errors and 0 fatals, check for TEST CASE PASSED                    if summary_error_count == 0 and summary_fatal_count == 0:                        # Check for TEST CASE PASSED - this is the definitive indicator                        if re.search(r'TEST CASE PASSED', content, re.IGNORECASE):                            print(f"DEBUG: UVM Report Summary shows 0 errors/0 fatals + TEST CASE PASSED found -> PASS")                            return True                        else:                            print(f"DEBUG: UVM Report Summary shows 0 errors/0 fatals but no TEST CASE PASSED -> FAIL")                            return False                    else:                        # UVM Report Summary shows errors/fatals - definitely FAIL                        print(f"DEBUG: UVM Report Summary indicates FAIL (UVM_ERROR={summary_error_count}, UVM_FATAL={summary_fatal_count})")                        return False            except Exception:                # Non-fatal; fall back to detailed checks below                pass                        # Also check for "TEST CASE FAILED" pattern            if re.search(r'TEST CASE FAILED', content, re.IGNORECASE):                print(f"DEBUG: 'TEST CASE FAILED' found in log -> FAIL")                return False
            # CRITICAL FIX: First check for errors before summary section            # Locate the boundary where summary starts (errors after this should be ignored)            summary_markers = [                r'UVM Report catcher Summary',                r'UVM Report Summary'            ]            summary_idx = len(content)            for marker in summary_markers:                m = re.search(marker, content)                if m:                    summary_idx = min(summary_idx, m.start())
            # Region to search for real errors (before summary)            error_region = content[:summary_idx]
            # Error detection (only before summary)            error_patterns = [r'UVM_ERROR', r'UVM_FATAL', r'SCOREBOARD_MISMATCH', r'Solver failed', r'Error', r'Offending']            first_error_match = None            last_error_idx = -1            for pat in error_patterns:                for m in re.finditer(pat, error_region):                    if first_error_match is None:                        first_error_match = m                    last_error_idx = max(last_error_idx, m.start())
            # Non-recoverable rule: any UVM_FATAL before summary is immediate FAIL            if re.search(r'UVM_FATAL', error_region):                print("DEBUG: UVM_FATAL found before summary -> immediate FAIL")                return False
            # PASS detection (stricter to avoid accidental matches)            # Accept common canonical PASS lines only, anchored to line start            pass_match = None            pass_patterns = [                r'^\s*TEST CASE PASSED\b',                r'^\s*UVM_.*?TEST PASSED\b',                r'^\s*SIMULATION PASSED\b',            ]            for _pat in pass_patterns:                _m = re.search(_pat, content, re.MULTILINE)                if _m:                    pass_match = _m                    break
            # CRITICAL FIX: New logic to handle "running but had error" cases            # If there are errors before summary, check if PASS comes after the last error            if first_error_match is not None:                if pass_match:                    pass_idx = pass_match.start()                    # Only PASS if PASS comes AFTER the last error (indicating recovery)                    if pass_idx > last_error_idx:                        # Before returning PASS, ensure no tail errors after summary                        tail_region = content[summary_idx:]                        if re.search(r'(UVM_ERROR|UVM_FATAL|SCOREBOARD_MISMATCH|Solver failed|Error|Offending)', tail_region, re.IGNORECASE):                            print("DEBUG: Errors found after summary (tail region) -> treat as FAIL")                            return False                        print(f"DEBUG: Test PASSED after errors - PASS at {pass_idx}, last error at {last_error_idx}")                        return True                    else:                        # PASS came before or at the same time as error - this is "running but had error"                        error_pos = first_error_match.start()                        start_pos = max(0, error_pos - 100)                        end_pos = min(len(error_region), error_pos + 200)                        error_context = error_region[start_pos:end_pos].strip()                                                error_log_file = Path(log_file).with_suffix('.error.log')                        with open(error_log_file, 'w') as ef:                            ef.write(f"Original Log File: {log_file}\n")                            ef.write(f"Error Type: {first_error_match.group(0)}\n")                            ef.write(f"Error Context:\n{error_context}\n")                            ef.write(f"PASS position: {pass_idx}, Last error position: {last_error_idx}\n")                            ef.write(f"Decision: FAIL - PASS came before/at same time as error\n")                                                print(f"DEBUG: Test FAILED - PASS at {pass_idx}, last error at {last_error_idx} (running but had error)")                        return False                else:                    # No PASS found, but there are errors - definitely FAIL                    error_pos = first_error_match.start()                    start_pos = max(0, error_pos - 100)                    end_pos = min(len(error_region), error_pos + 200)                    error_context = error_region[start_pos:end_pos].strip()                                        error_log_file = Path(log_file).with_suffix('.error.log')                    with open(error_log_file, 'w') as ef:                        ef.write(f"Original Log File: {log_file}\n")                        ef.write(f"Error Type: {first_error_match.group(0)}\n")                        ef.write(f"Error Context:\n{error_context}\n")                        ef.write(f"Decision: FAIL - No PASS found, but errors exist\n")                                        print(f"DEBUG: Test FAILED - No PASS found, but errors exist")                    return False
            # No errors found before summary            if pass_match:                # Additional guard: if tail (after summary) contains errors due to log stitching, FAIL                tail_region = content[summary_idx:]                if re.search(r'(UVM_ERROR|UVM_FATAL|SCOREBOARD_MISMATCH|Solver failed|Error|Offending)', tail_region, re.IGNORECASE):                    print("DEBUG: Tail errors detected after summary despite PASS -> FAIL")                    return False                # No errors and PASS found - definitely PASS                print(f"DEBUG: Test PASSED - No errors found, PASS exists")                return True            else:                # No errors and no PASS - treat as not passed                print(f"DEBUG: Test not passed - No errors found, but no PASS either")                return False                        except Exception as e:            print(f"{Colors.RED}Error checking log file: {e}{Colors.END}")                    return False        def _final_status_refresh(self):        """Final status refresh - recheck all test results based on log files"""        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        refreshed_count = 0                for result_key, result_obj in self.results.items():            # Skip if no job_id or already in final state            if not hasattr(result_obj, 'job_id') or not result_obj.job_id:                continue                            # Skip if already in final PASS/FAIL state            if result_obj.status in ["PASS", "FAIL", "RERUN PASS", "RERUN FAIL", "ERROR", "TIMEOUT"]:                continue                        try:                # Get log file path                log_file_path = self.get_test_log_path_by_job_id(result_obj.job_id)                if not log_file_path or not os.path.exists(log_file_path):                    continue                                # Check log file for TEST CASE PASSED                test_passed = self.check_test_result(log_file_path)                                if test_passed:                    # Test passed - update status based on retry context                    if result_obj.is_retry:                        old_status = result_obj.status                        result_obj.finish("RERUN PASS", "")                        print(f"INFO: {timestamp} Final refresh: {result_obj.name} {old_status} -> RERUN PASS (log shows TEST CASE PASSED)")                    else:                        old_status = result_obj.status                        result_obj.finish("PASS", "")                        print(f"INFO: {timestamp} Final refresh: {result_obj.name} {old_status} -> PASS (log shows TEST CASE PASSED)")                    refreshed_count += 1                else:                    # Test failed - update status based on retry context                    if result_obj.is_retry:                        old_status = result_obj.status                        result_obj.finish("RERUN FAIL", "Final refresh: log indicates failure")                        print(f"INFO: {timestamp} Final refresh: {result_obj.name} {old_status} -> RERUN FAIL (log indicates failure)")                    else:                        old_status = result_obj.status                        result_obj.finish("FAIL", "Final refresh: log indicates failure")                        print(f"INFO: {timestamp} Final refresh: {result_obj.name} {old_status} -> FAIL (log indicates failure)")                    refreshed_count += 1                                except Exception as e:                print(f"Warning: Could not refresh status for {result_obj.name}: {e}")                if refreshed_count > 0:            print(f"INFO: {timestamp} Final status refresh completed: {refreshed_count} tests updated")        else:            print(f"INFO: {timestamp} Final status refresh completed: no tests needed updating")        def _check_for_runtime_errors(self, log_file: str) -> bool:        """Check for runtime errors in log file that indicate test should be retried"""        if not os.path.exists(log_file):            return False                    try:            with open(log_file, 'r') as f:                content = f.read()                        # Check for runtime error patterns that indicate test should be retried            runtime_error_patterns = [                r'UVM_ERROR',                r'UVM_FATAL',                 r'SCOREBOARD_MISMATCH',                r'Solver failed',                r'\bError\b',                r'Offending',                r'ERROR.*runtime',                r'FATAL.*runtime',                r'Exception.*occurred',                r'Assertion.*failed',                r'Timeout.*occurred',                r'Memory.*leak',                r'Resource.*exhausted',                r'Connection.*failed',                r'Protocol.*violation',                r'Deadlock.*detected',                r'Livelock.*detected'            ]                        # Search for runtime errors in the entire log            for pattern in runtime_error_patterns:                if re.search(pattern, content, re.IGNORECASE):                    print(f"DEBUG: Runtime error detected in {log_file}: {pattern}")                    return True                        return False                        except Exception as e:            print(f"{Colors.RED}Error checking for runtime errors in log file: {e}{Colors.END}")            return False            def retry_failed_tests(self):        """Retry failed tests with parallel retry mechanism"""        failed_tests = [(name, result) for name, result in self.results.items()                        if result.status in ["FAIL", "ERROR", "TIMEOUT"]]                               if not failed_tests:            print(f"{Colors.GREEN}No tests need retry{Colors.END}")            return                    print(f"\n{Colors.YELLOW}=== Retry Failed Tests ==={Colors.END}")        print(f"Failed test count: {len(failed_tests)}")        print(f"Max retry count: {self.args.retry}")        print(f"Retry strategy: Parallel retry - ALL retries must pass for test to pass")                for test_key, result in failed_tests:            # Extract test name from the key (test_key format is "test_name:config:seed")            test_name = result.name  # Use the actual test name from TestResult object            original_seed = getattr(result, 'seed', None)                        print(f"\n{Colors.CYAN}Starting parallel retry for {test_name} (original seed: {original_seed}){Colors.END}")                            # LSF mode parallel retry                retry_results = self._run_parallel_lsf_retry(test_name, result, original_seed)                        # Process retry results            self._process_parallel_retry_results(test_name, result, retry_results)    
    
        def _run_parallel_lsf_retry(self, test_name: str, result, original_seed: str) -> List[Dict]:        """Run parallel LSF retry tests"""        retry_results = []                # Find the original test case data        print(f"  Searching for original test case: {test_name}")        original_case = self.find_original_test_case(test_name)                if not original_case:            print(f"  Test case not found in JSON files, checking self.tests...")            for test_tuple in self.tests:                if test_tuple[0] == test_name:                    print(f"  Found test case '{test_name}' in self.tests")                    original_case = {                        'name': test_name,                        'config': test_tuple[1],                        'repeat': 1,                        'timeout': 60,                        'opts': []                    }                    break                if not original_case:            print(f"{Colors.RED}Warning: Could not find original test case for {test_name}, skipping retry{Colors.END}")            return []                # Generate retry opcodes        output_dir = self.args.output_dir        sim_output_dir = getattr(self.args, 'dir', output_dir)                retry_opcodes = []        for retry in range(1, self.args.retry + 1):            if retry == 1 and original_seed:                # First retry: use original seed                print(f"  Retry {retry}: Using original seed: {original_seed}")                opcodes = self.gen_test_case(original_case, output_dir, sim_output_dir, self.args.queue, specified_seed=original_seed)            else:                # Other retries: use random seed                print(f"  Retry {retry}: Using random seed")                opcodes = self.gen_test_case(original_case, output_dir, sim_output_dir, self.args.queue)                        if opcodes:                opcode = opcodes[0]                opcode['retry_attempt'] = retry                opcode['retry_seed'] = original_seed if retry == 1 else 'random'                retry_opcodes.append(opcode)                # Submit all retry jobs in parallel        print(f"  Submitting {len(retry_opcodes)} parallel retry jobs...")        submitted_jobs = []                for opcode in retry_opcodes:            retry_result = self.submit_test_case(opcode)            if retry_result["status"] == "SUBMITTED":                retry_result['retry_attempt'] = opcode['retry_attempt']                retry_result['retry_seed'] = opcode['retry_seed']                submitted_jobs.append(retry_result)                print(f"  Retry {opcode['retry_attempt']} submitted: jobid {retry_result['job_id']}")            else:                print(f"  Retry {opcode['retry_attempt']} submission failed: {retry_result.get('error', 'Unknown error')}")                # Wait for all jobs to complete        if submitted_jobs:            print(f"  Waiting for {len(submitted_jobs)} retry jobs to complete...")            retry_results = self._wait_for_parallel_jobs(submitted_jobs)                return retry_results        def _wait_for_parallel_jobs(self, submitted_jobs: List[Dict]) -> List[Dict]:        """Wait for multiple jobs to complete in parallel"""        completed_jobs = []        job_ids = [job['job_id'] for job in submitted_jobs]                while job_ids:            completed_jobs_batch = []            jobs_to_remove = []                        for job_id in job_ids:                status = self.check_lsf_job_status(int(job_id))                if status in ["DONE", "EXIT", "TERM", "KILL"]:                    # Find the corresponding job info                    job_info = next((job for job in submitted_jobs if job['job_id'] == job_id), None)                    if job_info:                        if status == "DONE":                            job_info["status"] = "PASS"                        else:                            job_info["status"] = "FAIL"                        completed_jobs_batch.append(job_info)                    jobs_to_remove.append(job_id)                        # Remove completed jobs from monitoring list            for job_id in jobs_to_remove:                job_ids.remove(job_id)                        if completed_jobs_batch:                completed_jobs.extend(completed_jobs_batch)                for job in completed_jobs_batch:                    if job["status"] == "PASS":                        status_icon = f"{Colors.GREEN}✓{Colors.END}"                    elif job["status"] == "RERUN PASS":                        status_icon = f"{Colors.CYAN}✓{Colors.END}"                    else:                        status_icon = f"{Colors.RED}✗{Colors.END}"                    print(f"  Retry {job['retry_attempt']} completed: {status_icon} {job['retry_seed']}")                        if job_ids:                time.sleep(10)  # Wait before next check                return completed_jobs        def _start_immediate_retry(self, result):        """Start immediate retry for a failed test"""        test_name = result.name        original_seed = getattr(result, 'seed', None)                # Rate limit concurrent retry threads        if not hasattr(self, 'active_retry_threads'):            self.active_retry_threads = 0        if self.active_retry_threads >= 20:            print(f"  Retry queue full (20). Delaying retry for {test_name}...")            # Busy-wait with sleep until slot available            while self.active_retry_threads >= 20:                time.sleep(5)                # Check if we've already started retries for this test        if hasattr(result, 'retry_started') and result.retry_started:            print(f"  Retry already started for {test_name}, skipping")            return                # Mark that retry has been started        if not hasattr(result, 'retry_started'):            result.retry_started = False        result.retry_started = True        result.retry_count = 0                print(f"\n{Colors.CYAN}🚀 Starting immediate retry for {test_name} (original seed: {original_seed}){Colors.END}")                # Start retry in a separate thread to avoid blocking the main monitoring loop        self.active_retry_threads += 1        retry_thread = threading.Thread(            target=self._run_immediate_retry,            args=(test_name, result, original_seed),            daemon=True        )        retry_thread.start()        def _run_immediate_retry(self, test_name: str, result, original_seed: str):        """Run immediate retry in a separate thread"""        try:                # LSF mode immediate retry                self._run_immediate_lsf_retry(test_name, result, original_seed)        except Exception as e:            print(f"{Colors.RED}Error in immediate retry for {test_name}: {e}{Colors.END}")        finally:            # Decrease active retry counter when thread finishes            if hasattr(self, 'active_retry_threads') and self.active_retry_threads > 0:                self.active_retry_threads -= 1            def _run_immediate_lsf_retry(self, test_name: str, result, original_seed: str):        """Run immediate LSF retry - stop on first success"""        # Find the original test case data with proper opts handling        original_case = self.find_original_test_case_with_opts(test_name, result)                if not original_case:            print(f"{Colors.RED}Warning: Could not find original test case for {test_name}, trying fallback method{Colors.END}")            # Fallback to original method            original_case = self.find_original_test_case(test_name)                    if not original_case:            print(f"{Colors.RED}Warning: Could not find original test case for {test_name}, skipping immediate retry{Colors.END}")            return                # Start only one retry attempt - the main loop will handle subsequent retries if needed        retry = 1        if original_seed:            # First retry: use original seed            print(f"  Immediate retry {retry}: Using original seed: {original_seed}")            opcodes = self.gen_test_case(original_case, self.args.output_dir, str(self.regression_dir), self.args.queue, specified_seed=original_seed)        else:            # Use random seed            print(f"  Immediate retry {retry}: Using random seed")            opcodes = self.gen_test_case(original_case, self.args.output_dir, str(self.regression_dir), self.args.queue)
        # Force wave dump for retry-generated opcodes BEFORE selecting and submitting        try:            for oc in opcodes or []:                cmd_list = oc.get('cmd', [])                if isinstance(cmd_list, list):                    replaced = False                    for i, token in enumerate(cmd_list):                        if isinstance(token, str) and token.startswith('wave='):                            if token != 'wave=fsdb':                                cmd_list[i] = 'wave=fsdb'                            replaced = True                            break                    if not replaced:                        cmd_list.append('wave=fsdb')                    # Tag immediate retry                    if not any(isinstance(t, str) and t.startswith('lmn=') for t in cmd_list):                        cmd_list.append('lmn=rerun')                    oc['cmd'] = cmd_list        except Exception:            pass                        if not opcodes:            print(f"  Failed to generate retry opcodes for {test_name}")            return                opcode = opcodes[0]        # Ensure the selected opcode itself carries wave=fsdb (explicit)        try:            cmd_list = opcode.get('cmd', [])            if isinstance(cmd_list, list):                replaced = False                for i, token in enumerate(cmd_list):                    if isinstance(token, str) and token.startswith('wave='):                        if token != 'wave=fsdb':                            cmd_list[i] = 'wave=fsdb'                        replaced = True                        break                if not replaced:                    cmd_list.append('wave=fsdb')                # Tag immediate retry                if not any(isinstance(t, str) and t.startswith('lmn=') for t in cmd_list):                    cmd_list.append('lmn=rerun')                opcode['cmd'] = cmd_list        except Exception:            pass        retry_result = self.submit_test_case(opcode)                if retry_result["status"] == "SUBMITTED":            result.retry_count = retry            print(f"  Immediate retry {retry} submitted: jobid {retry_result['job_id']}")                        # CRITICAL FIX: Don't wait here - let the main loop handle retry job completion            # The retry job is now tracked in the main loop and will be processed there            print(f"  Immediate retry {retry} submitted and tracked in main loop: jobid {retry_result['job_id']}")                        # Return immediately - the main loop will handle completion            return        else:            print(f"  {test_name} retry {retry} submission failed: {retry_result.get('error', 'Unknown error')}")            # If submission failed, we can't retry further            print(f"{Colors.RED}✗{Colors.END} {test_name} retry submission failed - Original test remains FAIL")        def _wait_for_retry_job_completion(self, job_id: str, test_name: str, retry_num: int) -> bool:        """Wait for a specific retry job to complete"""        print(f"  Waiting for retry {retry_num} job {job_id} to complete...")                while True:            status = self.check_lsf_job_status(int(job_id))                        if status == "DONE":                print(f"  Retry {retry_num} job {job_id} PASSED")                return True            elif status in ["EXIT", "TERM", "KILL"]:                print(f"  Retry {retry_num} job {job_id} FAILED")                return False            elif status == "UNKNOWN":                # Job may have completed and been removed from queue                print(f"  Retry {retry_num} job {job_id} status UNKNOWN, checking log file...")                # Try to determine result from log file                test_info = self.get_test_info_by_job_id(job_id)                if test_info:                    log_file_path = self.get_test_log_path_by_job_id(job_id)                    if log_file_path and os.path.exists(log_file_path):                        # CRITICAL FIX: Enhanced status determination for retry jobs                        test_passed = self.check_test_result(log_file_path)                        has_runtime_errors = self._check_for_runtime_errors(log_file_path)                                                if test_passed and not has_runtime_errors:                            print(f"  Retry {retry_num} job {job_id} PASSED (from log file)")                            return True                        elif test_passed and has_runtime_errors:                            print(f"  Retry {retry_num} job {job_id} FAILED (running but had error, from log file)")                            return False                        else:                            print(f"  Retry {retry_num} job {job_id} FAILED (from log file)")                            return False                                # If we can't determine, assume failed                print(f"  Retry {retry_num} job {job_id} assumed FAILED")                return False            else:                # Still running                time.sleep(10)        def get_test_log_path_by_job_id(self, job_id: str) -> str:        """Get log file path by job ID"""        # 1) Prefer the opcode we stored at submission time (survives after job DONE)        try:            stored = getattr(self, 'job_meta', {}).get(str(job_id))            if stored:                log_path = stored.get('log_path') or stored.get('log_file')                if log_path:                    return log_path        except Exception:            pass
        # 2) Fallback: search through submitted_results snapshot        try:            for result in getattr(self, 'submitted_results', []):                if result.get('job_id') == job_id:                    return result.get('log_path', '')        except Exception:            pass
        # 3) Fallback: locate TestResult by job_id, then derive its log path        try:            for _, res in getattr(self, 'results', {}).items():                if hasattr(res, 'job_id') and str(getattr(res, 'job_id', '')) == str(job_id):                    # Prefer explicitly recorded path if present                    if getattr(res, 'log_file', ''):                        return res.log_file                    # Derive from standard layout                    derived = self.get_test_log_path(res)                    if derived:                        return derived        except Exception:            pass
        # 4) Nothing found        return ''        def _process_parallel_retry_results(self, test_name: str, result, retry_results: List):        """Process parallel retry results and update original test result"""        if not retry_results:            print(f"{Colors.RED}No retry results for {test_name}{Colors.END}")            return                # Check if ALL retries passed        passed_retries = [r for r in retry_results if r.status == "PASS" or r.get("status") == "PASS"]        total_retries = len(retry_results)                if len(passed_retries) == total_retries:            # ALL retries passed - mark original test as PASS            passed_retry = passed_retries[0]  # Use the first passed retry for info                        if hasattr(passed_retry, 'status'):  # LSF mode                result.finish("PASS", f"ALL {total_retries} retries successful")                result.retry_count = total_retries                result.seed = passed_retry.seed                result.log_file = passed_retry.log_file                print(f"{Colors.GREEN}✓{Colors.END} {test_name} ALL {total_retries} retries passed - Original test marked as PASS")            else:  # LSF mode                result.finish("PASS", f"ALL {total_retries} retries successful")                result.retry_count = total_retries                result.seed = passed_retry.get('seed', 'unknown')                result.job_id = passed_retry.get('job_id', 'unknown')                print(f"{Colors.GREEN}✓{Colors.END} {test_name} ALL {total_retries} retries passed - Original test marked as PASS")        else:            # Not all retries passed - test remains FAIL            failed_count = total_retries - len(passed_retries)            print(f"{Colors.RED}✗{Colors.END} {test_name} {failed_count}/{total_retries} retries failed - Original test remains FAIL")                        # Update with the last retry attempt info            last_retry = retry_results[-1]            if hasattr(last_retry, 'retry_attempt'):  # LSF mode                result.retry_count = last_retry.retry_attempt            else:  # LSF mode                result.retry_count = last_retry['retry_attempt']        def merge_coverage(self):        """Merge coverage databases"""        if not self.args.coverage:            return                    print(f"\n{Colors.BLUE}=== Merge Coverage Databases ==={Colors.END}")                # Find all coverage databases        cov_dbs = []        for result in self.results.values():            if result.coverage_db and os.path.exists(result.coverage_db):                cov_dbs.append(result.coverage_db)                        if not cov_dbs:            print(f"{Colors.YELLOW}No coverage databases found{Colors.END}")            return                    # Merge coverage        merged_db = self.coverage_dir / f"merged_{datetime.now().strftime('%Y%m%d_%H%M%S')}.vdb"                try:            cmd = ["urg", "-dir"] + cov_dbs + ["-dbname", str(merged_db)]            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)            print(f"{Colors.GREEN}Coverage merge completed: {merged_db}{Colors.END}")                        # Generate coverage report            report_dir = self.report_dir / f"coverage_{datetime.now().strftime('%Y%m%d_%H%M%S')}"            cmd = ["urg", "-dir", str(merged_db), "-report", str(report_dir)]            subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)            print(f"{Colors.GREEN}Coverage report generated: {report_dir}{Colors.END}")                    except subprocess.CalledProcessError as e:            print(f"{Colors.RED}Coverage merge failed: {e}{Colors.END}")                def generate_report(self):        """Generate test report"""        print(f"\n{Colors.BLUE}=== Generate Test Report ==={Colors.END}")                # Final status validation before generating report        print(f"Performing final status validation before generating report...")        self.validate_all_test_statuses()                # Clean up any duplicate test results before counting        self._clean_duplicate_test_results()                # Count results - ensure we count only tests with final statuses        final_status_results = [r for r in self.results.values() if r.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]]        print(f"Counting results from {len(final_status_results)} completed test cases...")        total = len(final_status_results)        passed = len([r for r in final_status_results if r.status == "PASS"])        rerun_passed = len([r for r in final_status_results if r.status == "RERUN PASS"])        failed = len([r for r in final_status_results if r.status in ["FAIL", "RERUN FAIL"]])        errors = len([r for r in final_status_results if r.status == "ERROR"])        timeouts = len([r for r in final_status_results if r.status == "TIMEOUT"])                print(f"Result counts: Total={total}, Passed={passed}, Rerun Passed={rerun_passed}, Failed={failed}, Errors={errors}, Timeouts={timeouts}")                # Calculate total time        total_time = time.time() - self.start_time                # Generate CSV report        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')        csv_file = self.report_dir / f"regression_{timestamp}.csv"                with open(csv_file, 'w', newline='') as f:            writer = csv.writer(f)            writer.writerow(['Test Name', 'Config', 'Status', 'Duration', 'Estimated Time', 'Retry Count', 'Log File', 'Error Message'])                        for result in final_status_results:                writer.writerow([                    result.name,                    result.config,                    result.status,                    result.get_duration_str(),                    f"{result.estimated_duration:.1f}s",                    result.retry_count,                    result.log_file,                    result.error_msg                ])                        # Generate JSON report        json_file = self.report_dir / f"regression_{timestamp}.json"        report_data = {            'summary': {                'total': total,                'passed': passed,                'rerun_passed': rerun_passed,                'failed': failed,                'errors': errors,                'timeouts': timeouts,                'pass_rate': f"{(passed + rerun_passed)/total*100:.1f}%" if total > 0 else "0%",                'total_time': f"{total_time:.1f}s",                'timestamp': timestamp            },            'tests': {                f"{result.name}_{getattr(result, 'seed', 'unknown')}": {                    'config': result.config,                    'status': result.status,                    'duration': result.duration,                    'estimated_duration': result.estimated_duration,                    'retry_count': result.retry_count,                    'log_file': result.log_file,                    'error_msg': result.error_msg                }                for result in final_status_results            }        }                with open(json_file, 'w') as f:            json.dump(report_data, f, indent=2, ensure_ascii=False)                    # Save historical test data for future time estimation        self.save_test_history()                    # Print summary        print(f"\n{Colors.BOLD}=== Regression Test Summary ==={Colors.END}")        print(f"Total Tests: {total}")        print(f"{Colors.GREEN}Passed: {passed}{Colors.END}")        print(f"{Colors.CYAN}Rerun Passed: {rerun_passed}{Colors.END}")        print(f"{Colors.RED}Failed: {failed}{Colors.END}")        print(f"{Colors.RED}Errors: {errors}{Colors.END}")        print(f"{Colors.YELLOW}Timeouts: {timeouts}{Colors.END}")        print(f"Pass Rate: {(passed + rerun_passed)/total*100:.1f}%" if total > 0 else "0%")        print(f"Total Time: {total_time/60:.1f} minutes")                print(f"\nReport Files:")        print(f"  CSV: {csv_file}")        print(f"  JSON: {json_file}")                # Show failed tests        failed_results = [r for r in self.results.values() if r.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]]        if failed_results:            print(f"\n{Colors.RED}Failed Tests:{Colors.END}")            for result in failed_results:                print(f"  {result.name}: {result.status} - {result.error_msg}")                # Generate detailed regression report (like Image 1)        self.generate_detailed_regression_report()                # Generate error summary report        self.generate_error_summary_report()                # Generate regression summary info (like Image 3)        self.generate_regression_summary_info()                # Generate final real-time report        self.generate_real_time_report()                # Save error monitor state        self.save_error_monitor_state()
        # After all standard reports, also collect and persist transaction/cycle statistics        try:            self.update_transaction_cycle_statistics()        except Exception as e:            print(f"{Colors.YELLOW}Warning: Failed to update transaction/cycle statistics: {e}{Colors.END}")                    def extract_job_statistics(self, result):        """Extract job statistics (CPU time, max memory, processes) from actual data"""        cpu_time = "0 sec"        max_mem = "N/A"        procs = "N/A"                # Try to get CPU time from result duration        if result.duration > 0:            cpu_time = f"{int(result.duration)} sec"                # Try to get job statistics from LSF if job_id is available        if hasattr(result, 'job_id') and result.job_id and result.job_id != 'unknown':            try:                # Use bjobs to get detailed job information                cmd = ["bjobs", "-l", str(result.job_id)]                output = subprocess.check_output(cmd, stderr=subprocess.PIPE, universal_newlines=True, timeout=10)                                # Parse CPU time                cpu_match = re.search(r'CPU time used is (\d+\.?\d*) seconds', output)                if cpu_match:                    cpu_seconds = float(cpu_match.group(1))                    cpu_time = f"{int(cpu_seconds)} sec"                                # Parse memory usage                mem_match = re.search(r'MAX MEM: (\d+\.?\d*) (\w+)', output)                if mem_match:                    mem_value = mem_match.group(1)                    mem_unit = mem_match.group(2)                    max_mem = f"{mem_value} {mem_unit}"                                # Parse number of processes                proc_match = re.search(r'Number of processors: (\d+)', output)                if proc_match:                    procs = proc_match.group(1)                                except (subprocess.TimeoutExpired, subprocess.CalledProcessError, FileNotFoundError):                # If bjobs fails, try to extract from log file                pass                # If LSF info not available, try to extract from log file        if max_mem == "N/A" and hasattr(result, 'log_file') and result.log_file:            try:                if os.path.exists(result.log_file):                    with open(result.log_file, 'r', encoding='utf-8', errors='ignore') as f:                        content = f.read()                                                # Look for memory usage patterns in log                        mem_patterns = [                            r'max_memory[:\s]+(\d+\.?\d*)\s*(\w+)',                            r'memory_usage[:\s]+(\d+\.?\d*)\s*(\w+)',                            r'peak_memory[:\s]+(\d+\.?\d*)\s*(\w+)',                            r'MAX_MEM[:\s]+(\d+\.?\d*)\s*(\w+)'                        ]                                                for pattern in mem_patterns:                            mem_match = re.search(pattern, content, re.IGNORECASE)                            if mem_match:                                mem_value = mem_match.group(1)                                mem_unit = mem_match.group(2)                                max_mem = f"{mem_value} {mem_unit}"                                break                                                # Look for process count patterns                        proc_patterns = [                            r'processes[:\s]+(\d+)',                            r'num_procs[:\s]+(\d+)',                            r'process_count[:\s]+(\d+)'                        ]                                                for pattern in proc_patterns:                            proc_match = re.search(pattern, content, re.IGNORECASE)                            if proc_match:                                procs = proc_match.group(1)                                break                                            except Exception:                pass                # If still no data, use reasonable defaults based on test type        if max_mem == "N/A":            # Estimate memory based on test name or use default            if "stress" in result.name.lower() or "full" in result.name.lower():                max_mem = "16 GB"            elif "small" in result.name.lower() or "basic" in result.name.lower():                max_mem = "4 GB"            else:                max_mem = "8 GB"                if procs == "N/A":            # Estimate process count based on test type or use default            if "stress" in result.name.lower() or "full" in result.name.lower():                procs = "12"            elif "small" in result.name.lower() or "basic" in result.name.lower():                procs = "4"            else:                procs = "8"                return cpu_time, max_mem, procs        def get_test_log_path(self, result):        """Get the actual log file path for a test result"""        # 1. Use result.log_file if it exists and is a valid path        if result.log_file and os.path.exists(result.log_file):            return os.path.abspath(result.log_file)                # 2. Try to construct log file path based on test case structure        sim_output_dir = str(self.regression_dir)        seed = getattr(result, 'seed', 'unknown')        test_name = result.name                # Try different log file naming patterns        possible_log_paths = [            f"{sim_output_dir}/logs/{test_name}/{test_name}_{seed}_*.log",  # Primary path with opts: logs/test_name/test_name_seed_opts_*.log            f"{sim_output_dir}/logs/{test_name}/{test_name}_{seed}.log",  # Fallback: logs/test_name/test_name_seed.log            f"{sim_output_dir}/logs/{test_name}/{test_name}_*.log",  # Wildcard pattern for logs/test_name/            f"{sim_output_dir}/logs/{test_name}.log",  # Fallback: logs/test_name.log            f"{sim_output_dir}/{test_name}/report.log",  # Legacy path: test_name/report.log            f"{sim_output_dir}/{test_name}_{seed}/report.log",  # Legacy path: test_name_seed/report.log        ]                for log_path in possible_log_paths:            if '*' in log_path:                # Handle wildcard patterns                import glob                matching_files = glob.glob(log_path)                if matching_files:                    return os.path.abspath(matching_files[0])  # Return first matching file            elif os.path.exists(log_path):                return os.path.abspath(log_path)                return None        def update_real_time_report(self):        """Update real-time regression report"""        current_time = time.time()        if current_time - self.last_report_update >= self.report_update_interval:            self.last_report_update = current_time            self.generate_real_time_report()        def generate_real_time_report(self):        """Generate real-time regression report"""        try:            with open(self.real_time_report_path, 'w', encoding='utf-8') as f:                # Write header                f.write("=" * 80 + "\n")                f.write(f"REAL-TIME REGRESSION REPORT - Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")                f.write("=" * 80 + "\n\n")                                # Generate test status and log paths section                f.write("=== TEST STATUS AND LOG PATHS ===\n")                f.write(self.generate_test_status_and_log_paths_content())                f.write("\n\n")                                # Generate summary info                f.write("=== REGRESSION SUMMARY ===\n")                f.write(self.generate_regression_summary_info_content())                f.write("\n\n")                                # Write footer                f.write("=" * 80 + "\n")                f.write("REPORT WILL BE UPDATED EVERY 30 SECONDS\n")                f.write("=" * 80 + "\n")                        except Exception as e:            print(f"Warning: Could not update real-time report: {e}")        def generate_progress_bar(self, percentage, width=50):        """Generate progress bar"""        filled_width = int(width * percentage / 100)        bar = '█' * filled_width + '░' * (width - filled_width)        return f"[{bar}]"        def generate_detailed_regression_report(self):        """Generate detailed regression report like Image 1"""        print(f"\n{Colors.BLUE}=== Detailed Regression Report ==={Colors.END}")                # Validate statuses before generating report        self.validate_all_test_statuses()                # Print header        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"INFO: {timestamp}: {'+' * 15} REPORT {'+' * 15}")                # Print table header        print(f"INFO: {timestamp}: | status | test_name | seed | jobid | cpu_time | max_mem | procs |")                # Process each test result - now PENDING status should be correctly updated        final_status_results = [r for r in self.results.values() if r.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]]                for result in final_status_results:            # Get test info            test_name = result.name            seed = getattr(result, 'seed', 'unknown')            job_id = getattr(result, 'job_id', 'unknown')                        # Get CPU time and memory info from actual data            cpu_time, max_mem, procs = self.extract_job_statistics(result)                        # If runtime errors were detected, force status to FAIL for reporting and accounting            if hasattr(result, 'error_detected') and result.error_detected and result.status in ["PASS", "RERUN PASS"]:                result.status = "FAIL"
            # Format status with proper colors (after possible override)            status = result.status            if status == "PASS":                status = f"{Colors.GREEN}PASS{Colors.END}"            elif status == "RERUN PASS":                status = f"{Colors.CYAN}RERUN PASS{Colors.END}"            elif status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                status = f"{Colors.RED}FAIL{Colors.END}"            else:                status = f"{Colors.YELLOW}{status}{Colors.END}"                        # Add error detection info to status            error_info = ""            if hasattr(result, 'error_detected') and result.error_detected:                error_info = " (running but had error)"                        # Print test result line            print(f"INFO: {timestamp}: | {status} | {test_name} | {seed} | {job_id} | {cpu_time} | {max_mem} | {procs} |{error_info}")                print(f"INFO: {timestamp}: {'+' * 15} END REPORT {'+' * 15}")        print(f"Total unique tests reported: {len(final_status_results)}")        def generate_error_summary_report(self):        """Generate error summary report with UVM_ERROR and UVM_FATAL details"""        print(f"\n{Colors.BLUE}=== Error Summary Report ==={Colors.END}")                # Collect all error information        error_info = {}        failed_tests = []                for result in self.results.values():            # Treat "running but had error" as FAIL for summary as well            if hasattr(result, 'error_detected') and result.error_detected and result.status in ["PASS", "RERUN PASS"]:                result.status = "FAIL"            if result.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                failed_tests.append(result)                                # Try to read log file for UVM_ERROR and UVM_FATAL                self.analyze_log_for_errors(result, error_info)                if not failed_tests:            print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')}: No failed tests found")            return                if not error_info:            print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')}: Found {len(failed_tests)} failed tests but no UVM_ERROR or UVM_FATAL found in logs")            print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')}: Failed tests with log paths:")            for result in failed_tests:                # Get the actual log file path                log_path = self.get_test_log_path(result)                if log_path:                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')}:   [{result.status}] {log_path}")                else:                    print(f"INFO: {datetime.now().strftime('%m-%d %H:%M:%S')}:   {result.name}: {result.status} - {result.error_msg} (No log file found)")            return                # Print error summary        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"INFO: {timestamp}: Total {len(failed_tests)} failure syndromes:")                # Group errors by type        error_count = 1        for error_type, error_details in error_info.items():            print(f"INFO: {timestamp}: ({error_count}) ERR ID:{error_details['id']}:")            print(f"INFO: {timestamp}: MSG: \"{error_details['message']}\"")                        # Print error count if available (from log content before UVM Report catcher Summary)            if 'count' in error_details:                print(f"INFO: {timestamp}: Count: {error_details['count']} (from log content before UVM Report catcher Summary)")                        # Print associated test paths            for test_path in error_details['tests']:                print(f"INFO: {timestamp}: {test_path}")                        error_count += 1        def analyze_log_for_errors(self, result, error_info):        """Analyze log file for UVM_ERROR and UVM_FATAL messages ONLY in content before UVM Report catcher Summary"""        # Use the new get_test_log_path method to get the actual log file path        log_file_path = self.get_test_log_path(result)                if not log_file_path:            print(f"Warning: No log file found for test {result.name}")            return                # Use the found log file path        log_file_paths = [log_file_path]                # Analyze each log file        for log_file_path in log_file_paths:            try:                with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f:                    content = f.read()                                # First, look for UVM Report catcher Summary section                uvm_summary_errors = self.analyze_uvm_report_catcher_summary(content, log_file_path)                for error_type, error_details in uvm_summary_errors.items():                    if error_type not in error_info:                        error_info[error_type] = error_details                    else:                        # Merge tests if error type already exists                        for test_path in error_details['tests']:                            if test_path not in error_info[error_type]['tests']:                                error_info[error_type]['tests'].append(test_path)                                # Only use UVM Report catcher Summary analysis, skip direct pattern matching                # to avoid capturing errors from UVM Report catcher Summary section                pass                                        except Exception as e:                print(f"Warning: Could not analyze log file {log_file_path}: {e}")                continue        def analyze_uvm_report_catcher_summary(self, content, log_file_path):        """Analyze log content before 'UVM Report catcher Summary' line for error information"""        error_info = {}                # Split content into lines to find the UVM Report catcher Summary line        lines = content.split('\n')        summary_line_index = -1                # Find the line containing "UVM Report catcher Summary"        for i, line in enumerate(lines):            if "UVM Report catcher Summary" in line:                summary_line_index = i                break                if summary_line_index == -1:            # No UVM Report catcher Summary found, return empty error_info            return error_info                # Extract content before the UVM Report catcher Summary line        content_before_summary = '\n'.join(lines[:summary_line_index])                # Look for UVM_ERROR and UVM_FATAL in the content before summary        error_patterns = [            r'UVM_ERROR\s*@\s*[^\n]*\s*:\s*([^\n]+)',            r'UVM_FATAL\s*@\s*[^\n]*\s*:\s*([^\n]+)',            r'UVM_ERROR\s+([^\n]+)',            r'UVM_FATAL\s+([^\n]+)'        ]                # Collect all error messages found before the summary        error_messages = []        fatal_messages = []                for pattern in error_patterns:            matches = re.findall(pattern, content_before_summary, re.IGNORECASE)            for match in matches:                if "UVM_ERROR" in pattern or "UVM_ERROR" in match:                    error_messages.append(match.strip())                elif "UVM_FATAL" in pattern or "UVM_FATAL" in match:                    fatal_messages.append(match.strip())                # Create error info structure        if error_messages:            # Use the first error message as representative            error_message = error_messages[0]            error_id = str(hash(error_message))[-8:]            error_info['UVM_ERROR'] = {                'id': error_id,                'message': f"UVM_ERROR: {error_message}",                'tests': [log_file_path],                'count': len(error_messages)            }                if fatal_messages:            # Use the first fatal message as representative            fatal_message = fatal_messages[0]            fatal_id = str(hash(fatal_message))[-8:]            error_info['UVM_FATAL'] = {                'id': fatal_id,                'message': f"UVM_FATAL: {fatal_message}",                'tests': [log_file_path],                'count': len(fatal_messages)            }                return error_info        def generate_regression_summary_info(self):        """Generate regression summary info like Image 3"""        print(f"\n{Colors.BLUE}=== Regression Summary Info ==={Colors.END}")                # Validate statuses before generating summary        self.validate_all_test_statuses()                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                # Generate regression seed        regress_seed = random.randint(1000000000, 9999999999)        print(f"INFO: {timestamp}: Regress Seed (rseed): {regress_seed}")                # Test list path - use absolute path        cur_path = os.getcwd()        test_list_path = os.path.abspath(os.path.join(cur_path, "../def/json_list"))        print(f"INFO: {timestamp}: Test list: {test_list_path}")                # Failure list path - use absolute path        failure_list_path = os.path.abspath(os.path.join(self.log_dir, "error.lst"))        print(f"INFO: {timestamp}: Failure list: {failure_list_path}")                # Regression report path - use absolute path        regression_report_path = os.path.abspath(os.path.join(self.report_dir, "zregress_report.log"))        print(f"INFO: {timestamp}: Regression report: {regression_report_path}")                # End time        end_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')        print(f"INFO: {timestamp}: End Time: {end_time}")                # Elapsed CPU time        elapsed_time = time.time() - self.start_time        hours = int(elapsed_time // 3600)        minutes = int((elapsed_time % 3600) // 60)        seconds = int(elapsed_time % 60)        print(f"INFO: {timestamp}: Elapsed CPU Time: {hours}:{minutes:02d}:{seconds:02d}")                # Determine regression result - use validated statuses        total_tests = len(self.results)        passed_tests = len([r for r in self.results.values() if r.status == "PASS"])        failed_tests = len([r for r in self.results.values() if r.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]])        pending_tests = len([r for r in self.results.values() if r.status == "PENDING"])        running_tests = len([r for r in self.results.values() if r.status == "RUNNING"])                # If there are still pending or running tests, consider it incomplete        if pending_tests > 0 or running_tests > 0:            print(f"INFO: {timestamp}: ZREGRESS INCOMPLETE (Pending: {pending_tests}, Running: {running_tests})")        elif failed_tests == 0:            print(f"{Colors.GREEN}INFO: {timestamp}: ZREGRESS PASS{Colors.END}")        else:            print(f"{Colors.RED}INFO: {timestamp}: ZREGRESS FAIL{Colors.END}")                # Print detailed status        print(f"INFO: {timestamp}: Total Tests: {total_tests}")        print(f"INFO: {timestamp}: Passed: {passed_tests}")        print(f"INFO: {timestamp}: Failed: {failed_tests}")        print(f"INFO: {timestamp}: Pending: {pending_tests}")        print(f"INFO: {timestamp}: Running: {running_tests}")                # Save error list to file and get saved paths        saved_paths = self.save_error_list(failure_list_path)                # Print saved file paths within regression summary        if saved_paths:            print(f"INFO: {timestamp}: Error list saved to: {saved_paths['error_lst']}")            print(f"INFO: {timestamp}: Error JSON saved to: {saved_paths['error_json']}")            print(f"INFO: {timestamp}: Failed regression list saved to: {saved_paths['failed_regression']}")                        # Print summary of failed tests if any            failed_count = len([r for r in self.results.values() if r.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]])            if failed_count > 0:                print(f"INFO: {timestamp}: Generated failed regression list with {failed_count} failed test cases")                print(f"INFO: {timestamp}: You can re-run failed tests using: python3 regress.py --failed-regression {saved_paths['failed_regression']}")                # End markers        print(f"INFO: {timestamp}: {'+' * 30}")        print(f"INFO: {timestamp}: {' ' * 10} ZREGRESS END {' ' * 10}")        print(f"INFO: {timestamp}: {'+' * 30}")                # Generate comprehensive regression report        self.generate_comprehensive_regression_report(regression_report_path)        def save_error_list(self, failure_list_path):        """Save error list to file and return saved paths"""        saved_paths = {}                try:            # Save error.lst (text format) - simple list of failed test names            with open(failure_list_path, 'w', encoding='utf-8') as f:                for result in self.results.values():                    if result.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                        f.write(f"{result.name}\n")                        saved_paths['error_lst'] = failure_list_path                        # Save error.json (JSON format with full test case details)            error_json_path = failure_list_path.replace('.lst', '.json')            error_cases = []                        for result in self.results.values():                if result.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                    # Find the original test case JSON data with proper opts handling                    test_case_data = self.find_original_test_case_with_opts(result.name, result)                    if not test_case_data:                        # Fallback to original method                        test_case_data = self.find_original_test_case(result.name)                    if test_case_data:                        error_case = {                            'test_name': result.name,                            'config': result.config,                            'status': result.status,                            'error_message': result.error_msg,                            'duration': result.duration,                            'seed': getattr(result, 'seed', 'unknown'),                            'job_id': getattr(result, 'job_id', 'unknown'),                            'original_test_case': test_case_data                        }                        error_cases.append(error_case)                        with open(error_json_path, 'w', encoding='utf-8') as f:                json.dump(error_cases, f, indent=2, ensure_ascii=False)                        saved_paths['error_json'] = error_json_path                        # Save failed_regression.json - regression list format for failed tests only            failed_regression_path = failure_list_path.replace('.lst', '_regression.json')            failed_regression_cases = []                        for result in self.results.values():                if result.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                    # Find the original test case JSON data with proper opts handling                    test_case_data = self.find_original_test_case_with_opts(result.name, result)                    if not test_case_data:                        # Fallback to original method                        test_case_data = self.find_original_test_case(result.name)                    if test_case_data:                        # Create a regression list entry with the same format as original test cases                        # but with updated information from the failed run                        failed_case = test_case_data.copy()  # Start with original test case data                                                # Update with actual run information                        failed_case.update({                            'actual_status': result.status,                            'actual_error_message': result.error_msg,                            'actual_duration': result.duration,                            'actual_seed': getattr(result, 'seed', 'unknown'),                            'actual_job_id': getattr(result, 'job_id', 'unknown'),                            'log_file': getattr(result, 'log_file', ''),                            'retry_count': getattr(result, 'retry_count', 0),                            'failure_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')                        })                                                # Optionally adjust repeat count for retry (if retry was attempted)                        if hasattr(result, 'retry_count') and result.retry_count > 0:                            # Reduce repeat count by retry attempts to avoid over-running                            original_repeat = failed_case.get('repeat', 1)                            failed_case['repeat'] = max(1, original_repeat - result.retry_count)                            failed_case['original_repeat'] = original_repeat                                                failed_regression_cases.append(failed_case)                        with open(failed_regression_path, 'w', encoding='utf-8') as f:                json.dump(failed_regression_cases, f, indent=2, ensure_ascii=False)                        saved_paths['failed_regression'] = failed_regression_path                    except Exception as e:            print(f"Warning: Could not save error list: {e}")            return None                return saved_paths        def find_original_test_case_with_opts(self, test_name: str, result):        """Find the original test case JSON data by test name using the exact opts array stored on the result.
        Important: Do NOT derive opts by splitting the unique key, because opts values may contain underscores        (e.g., "DELAY_SET_OFF") which would be incorrectly split. Prefer `result.opts` when available.        """        try:            # Prefer opts directly from the provided result object            target_opts = []            if hasattr(result, 'opts') and isinstance(result.opts, list):                target_opts = result.opts            else:                # Fallback: search a matching TestResult and read its opts property                for result_key, result_obj in self.results.items():                    if result_obj.name == test_name and hasattr(result_obj, 'opts'):                        target_opts = result_obj.opts or []                        break
            original_target_opts = list(target_opts) if isinstance(target_opts, list) else []            print(f"Looking for test case: {test_name} with opts: {original_target_opts}")            original_case = self.find_original_test_case_by_name_and_opts(test_name, original_target_opts)            if original_case:                print(f"Found original test case with matching opts: {original_target_opts}")                return original_case
            # Strict mode: fail fast when opts don't match            msg = f"Strict opts match failed for '{test_name}'. target_opts={original_target_opts}"            print(msg)            raise RuntimeError(msg)
        except Exception as e:            print(f"Warning: Could not find original test case with opts for {test_name}: {e}")            return None        def find_original_test_case_by_name_and_opts(self, test_name: str, target_opts: list):        """Find the original test case JSON data by test name and opts.
        STRICT MATCH MODE: Compare opts arrays exactly as listed in JSON.        No token splitting or normalization. "DELAY_SET_OFF" only matches the same string.        """        try:            # Load test cases from the original JSON files            cur_path = os.getcwd()            test_file_list_name = cur_path + "/../def/json_list"                        if not os.path.exists(test_file_list_name):                print(f"Warning: Test file list not found: {test_file_list_name}")                return None                        print(f"Looking for test case: {test_name} with opts: {target_opts}")            print(f"Searching in test file list: {test_file_list_name}")                        with open(test_file_list_name, 'r') as f:                for line in f:                    if line.strip() and not line.startswith('#'):                        file_path = cur_path + "/../def" + line.strip()                        print(f"Checking file: {file_path}")                                                if os.path.exists(file_path):                            with open(file_path, 'r') as json_file:                                test_cases = json.load(json_file)                                for case in test_cases:                                    case_name = case.get('name', '')                                    case_opts = case.get('opts', [])                                                                        if case_name == test_name:                                        # Check if opts match EXACTLY (strict mode)                                        if case_opts == target_opts:                                            print(f"Found test case '{test_name}' with matching opts {case_opts} in file: {file_path}")                                            return case                                        else:                                            print(f"  Found test case '{test_name}' but opts don't match: expected {target_opts}, got {case_opts}")                                    else:                                        print(f"  Checking case: '{case_name}' vs '{test_name}'")                        else:                            print(f"Warning: Test file not found: {file_path}")                        print(f"Test case '{test_name}' with opts {target_opts} not found in any JSON files")            return None                    except Exception as e:            print(f"Warning: Could not find original test case by name and opts for {test_name}: {e}")            return None        def find_original_test_case(self, test_name):        """Find the original test case JSON data by test name"""        try:            # Load test cases from the original JSON files            cur_path = os.getcwd()            test_file_list_name = cur_path + "/../def/json_list"                        if not os.path.exists(test_file_list_name):                print(f"Warning: Test file list not found: {test_file_list_name}")                return None                        print(f"Looking for test case: {test_name}")            print(f"Searching in test file list: {test_file_list_name}")                        with open(test_file_list_name, 'r') as f:                for line in f:                    if line.strip() and not line.startswith('#'):                        file_path = cur_path + "/../def" + line.strip()                        print(f"Checking file: {file_path}")                                                if os.path.exists(file_path):                            with open(file_path, 'r') as json_file:                                test_cases = json.load(json_file)                                for case in test_cases:                                    case_name = case.get('name', '')                                    if case_name == test_name:                                        print(f"Found test case '{test_name}' in file: {file_path}")                                        return case                                    else:                                        print(f"  Checking case: '{case_name}' vs '{test_name}'")                        else:                            print(f"Warning: Test file not found: {file_path}")                        print(f"Test case '{test_name}' not found in any JSON files")            return None                    except Exception as e:            print(f"Warning: Could not find original test case for {test_name}: {e}")            return None        def generate_comprehensive_regression_report(self, report_path):        """Generate comprehensive regression report including all sections"""        try:            with open(report_path, 'w', encoding='utf-8') as f:                # Write header                f.write("=" * 80 + "\n")                f.write("COMPREHENSIVE REGRESSION REPORT\n")                f.write("=" * 80 + "\n\n")                                # Generate detailed regression report                f.write("=== DETAILED REGRESSION REPORT ===\n")                f.write(self.generate_detailed_regression_report_content())                f.write("\n\n")                                # Generate error summary report                f.write("=== ERROR SUMMARY REPORT ===\n")                f.write(self.generate_error_summary_report_content())                f.write("\n\n")                                # Generate regression summary info                f.write("=== REGRESSION SUMMARY INFO ===\n")                f.write(self.generate_regression_summary_info_content())                f.write("\n\n")                                # Generate test status and log paths section                f.write("=== TEST STATUS AND LOG PATHS ===\n")                f.write(self.generate_test_status_and_log_paths_content())                f.write("\n\n")                                # Write footer                f.write("=" * 80 + "\n")                f.write("END OF REPORT\n")                f.write("=" * 80 + "\n")                            print(f"Comprehensive regression report saved to: {report_path}")                    except Exception as e:            print(f"Warning: Could not generate comprehensive regression report: {e}")        def show_concurrent_status(self, running_jobs: int, total_jobs: int, max_concurrent: int):        """Show concurrent job status"""        # Commented out concurrent status printing as requested        # timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        # if max_concurrent > 0:        #     utilization = (running_jobs / max_concurrent) * 100        #     print(f"INFO: {timestamp} Concurrent Status: {running_jobs}/{max_concurrent} jobs running ({utilization:.1f}% utilization)")        # else:        #     print(f"INFO: {timestamp} Concurrent Status: {running_jobs} jobs running (no limit)")        #         # if total_jobs > 0:        #     progress = ((total_jobs - len(self.submitted_jobs)) / total_jobs) * 100        #     print(f"INFO: {timestamp} Overall Progress: {progress:.1f}%")        pass        def get_total_test_cases_count(self):        """Get the correct total test cases count, avoiding duplicates and transient states"""        # Clean up duplicates first        self._clean_duplicate_test_results()                # Only count tests with final statuses, exclude transient PENDING/RUNNING states        final_status_tests = []        for result_key, result_obj in self.results.items():            # Only count tests that have reached a final state            if result_obj.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                final_status_tests.append(result_key)                return len(final_status_tests)        def show_regression_status(self, running_jobs: int, pending_jobs: int, total_test_cases: int):        """Show regression status: running/pending/completed test cases"""        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"INFO: {timestamp} Regression Status: {running_jobs} running, {pending_jobs} pending, {total_test_cases} completed")        def run(self):        """Run regression test"""        print(f"{Colors.BOLD}CMN-700 UVM Regression Test{Colors.END}")        print(f"Mode: {self.args.mode}")        print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")                # Display resource configuration        if hasattr(self.args, 'memory') and self.args.memory is not None:            print(f"Memory Reservation: {self.args.memory}GB per job (command line)")        else:            print(f"Memory Reservation: Not specified (using LSF default or JSON values)")                cpu_cores = getattr(self.args, 'cpu_cores', 1)        print(f"CPU Cores: {cpu_cores} per job")                # Display coverage configuration        if hasattr(self.args, 'cov') and self.args.cov:            print(f"Coverage Type: {self.args.cov}")        elif hasattr(self.args, 'coverage') and self.args.coverage:            print(f"Coverage: Enabled (legacy mode)")        else:            print(f"Coverage: Disabled")                print()                # Initialize environment        cur_path = os.getcwd()        test_file_list_name = cur_path + "/../def/json_list"        test_file_list = []                # Load test file list        with open(test_file_list_name, 'r') as f:            for line in f:                if line != '\n':                    file_path = cur_path + "/../def" + line                    # print(file_path)  # Commented out debug print                    test_file_list.append(file_path.replace('\n', ''))                # Load test cases        test_cases = self.load_test_cases(test_file_list)        # print(test_cases)  # Commented out debug print                # Filter test cases by group (only if groups are specified)        if self.args.groups:            selected_cases = self.filter_cases(test_cases, self.args.groups)        else:            selected_cases = test_cases                # Convert test cases to test configs format        test_configs = []        for case in selected_cases:            # Extract test name and config from the case            test_name = case.get('name', 'unknown')            config = case.get('config', 'default')            test_configs.append((test_name, config))                # Save processed test list        self.tests = test_configs        print(f"Total loaded tests: {len(self.tests)}")                # Set estimated time for each test        # Note: TestResult objects will be created later in run_compile_and_regression        # with the correct key format (test_name:config:seed:opts)        print(f"TestResult objects will be created during job submission with proper key format")                # Run tests based on mode        if self.args.legacy_mode == "compile_regression":            compile_success = self.run_compile_and_regression(                str(self.args.dienum),                self.args.rtl_ver,                self.args.p2_mode,                self.args.define            )                        # If compilation failed, exit without running regression            if not compile_success:                print(f"{Colors.RED}Compilation failed! Exiting without running regression tests.{Colors.END}")                return        else:            print(f"{Colors.YELLOW}Local regression mode is not supported. Please use LSF regression mode.{Colors.END}")            return                    # Note: Retry is now handled immediately when tests fail        # No need to run retry_failed_tests() here anymore        if self.args.retry > 0:            print(f"{Colors.YELLOW}Note: Retry is enabled and will be triggered immediately when tests fail{Colors.END}")                    # Merge coverage        if self.args.coverage:            self.merge_coverage()                    # Generate report        self.generate_report()                # Handle auto-restart logic        self._handle_auto_restart()        def _handle_auto_restart(self):        """Handle auto-restart logic after regression completion"""        # Check if auto-restart is enabled        if not self.auto_restart and self.restart_interval_hours is None:            return                # Check max restarts limit        if self.max_restarts is not None and self.restart_count >= self.max_restarts:            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')            print(f"\n{Colors.YELLOW}Auto-restart limit reached ({self.max_restarts} restarts). Stopping.{Colors.END}")            print(f"INFO: {timestamp} Total restarts: {self.restart_count}")            return                # Determine if we should restart        should_restart = False        restart_reason = ""                if self.auto_restart:            # Immediate restart after completion            should_restart = True            restart_reason = "auto-restart enabled"        elif self.restart_interval_hours is not None:            # Time-based restart            current_time = time.time()            elapsed_hours = (current_time - self.first_run_start_time) / 3600.0                        if elapsed_hours >= self.restart_interval_hours:                should_restart = True                restart_reason = f"restart interval reached ({self.restart_interval_hours} hours)"            else:                # Calculate wait time until next restart                remaining_hours = self.restart_interval_hours - elapsed_hours                remaining_minutes = int((remaining_hours - int(remaining_hours)) * 60)                timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')                print(f"\n{Colors.BLUE}INFO: {timestamp} Next auto-restart in {int(remaining_hours)}h {remaining_minutes}m{Colors.END}")                                # Wait until restart interval is reached                wait_seconds = remaining_hours * 3600                if wait_seconds > 0:                    print(f"{Colors.BLUE}Waiting {int(remaining_hours)}h {remaining_minutes}m until next restart...{Colors.END}")                    time.sleep(wait_seconds)                    should_restart = True                    restart_reason = f"restart interval reached ({self.restart_interval_hours} hours)"                if should_restart:            self.restart_count += 1            timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')            print(f"\n{Colors.CYAN}{'='*80}{Colors.END}")            print(f"{Colors.CYAN}Auto-Restart #{self.restart_count} - {restart_reason}{Colors.END}")            print(f"{Colors.CYAN}Time: {timestamp}{Colors.END}")            print(f"{Colors.CYAN}{'='*80}{Colors.END}\n")                        # Create new regression directory for restart            print(f"{Colors.BLUE}Creating new regression directory for restart #{self.restart_count}...{Colors.END}")            self.regression_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')                        # Extract directory name from current simulation directory            current_dir = os.path.basename(os.getcwd())            # Extract xxx part from sim_xxx pattern            if current_dir.startswith('sim_'):                dir_suffix = current_dir[4:]  # Remove 'sim_' prefix            else:                dir_suffix = ''                        # Add restart count to directory name to make it unique            self.regression_dir_name = f"regression_{dir_suffix}_{self.regression_timestamp}_restart{self.restart_count}"                        # Re-setup and create directories            self._setup_directories()            self._create_directories()                        # Update real-time report path for new directory            self.real_time_report_path = self.report_dir / "zregress_report.log"                        print(f"{Colors.GREEN}New regression directory created: {self.regression_dir}{Colors.END}\n")                        # Reset some state for new run            self.start_time = time.time()            self.results = {}            self.submitted_jobs = []            self.submitted_results = []            self.job_meta = {}            self.running_jobs = 0            self.pending_jobs = 0            self.log_read_positions = {}            self.log_last_update_times = {}                        # Reset error monitoring state            self.last_error_monitor_time = time.time()                        # Reset status thread control            self._stop_status_thread = False                        # For time-based restart, reset the first run start time to current time            # so the next interval starts from now            if self.restart_interval_hours is not None:                self.first_run_start_time = time.time()                        # Small delay before restart            time.sleep(2)                        # Recursively call run() to start new regression            try:                self.run()            except KeyboardInterrupt:                print(f"\n{Colors.YELLOW}User interrupted during auto-restart, cleaning up...{Colors.END}")                self.cleanup()                raise            except Exception as e:                print(f"{Colors.RED}Error during auto-restart: {e}{Colors.END}")                self.cleanup()                raise
    def collect_transaction_and_cycle_stats(self) -> Tuple[int, int, int]:        """        扫描当前回归目录下的所有仿真 log 文件,统计 transaction 和 cycle 数量总和。
        返回:          (total_transaction_count, total_cycle_count, counted_log_files)        """        log_root = Path(self.log_dir)        if not log_root.exists():            return 0, 0, 0
        total_txn = 0        total_cycles = 0        counted_logs = 0
        # 递归扫描当前回归 logs 目录下的所有 .log 文件        for log_file in log_root.rglob("*.log"):            try:                txn = stat_transaction_count.extract_transaction_count(log_file)                cyc = stat_transaction_count.extract_cycle_count(log_file)            except Exception:                continue
            if txn is not None:                total_txn += txn                counted_logs += 1            if cyc is not None:                total_cycles += cyc
        return total_txn, total_cycles, counted_logs
    def update_transaction_cycle_statistics(self):        """        使用 stat_transaction_count 的统计逻辑,将本次回归的        transaction_count 和 cycle_count 写入一个全局文档,并维护累加计数。
        文档格式(CSV):          date,regression_dir,transaction_count,cycle_count,cumulative_transaction_count,cumulative_cycle_count,log_files_count        """        # 先统计本次回归        total_txn, total_cycles, counted_logs = self.collect_transaction_and_cycle_stats()
        # 如果一个 log 都没有统计到 transaction,就直接返回,避免写入无意义记录        if counted_logs == 0:            print(f"{Colors.YELLOW}Warning: No transaction/cycle information found under {self.log_dir}{Colors.END}")            return
        history_path = Path("transaction_cycle_history.csv").resolve()
        cumulative_txn = 0        cumulative_cycles = 0
        # 如果历史文件存在,先把以前的记录读出来,计算累加值        if history_path.exists():            try:                with history_path.open("r", encoding="utf-8") as f:                    reader = csv.reader(f)                    header_read = False                    for row in reader:                        # 跳过表头                        if not header_read:                            header_read = True                            continue                        if len(row) < 5:                            continue                        try:                            # 第 3、4 列是本次的 transaction/cycle,总累加重新算                            run_txn = int(row[2])                            run_cycles = int(row[3])                        except ValueError:                            continue                        cumulative_txn += run_txn                        cumulative_cycles += run_cycles            except Exception as e:                print(f"{Colors.YELLOW}Warning: Failed to read existing history file {history_path}: {e}{Colors.END}")                cumulative_txn = 0                cumulative_cycles = 0
        # 把本次回归加到累加值里        cumulative_txn += total_txn        cumulative_cycles += total_cycles
        # 准备写入一行新记录        now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")        regression_dir_str = str(self.regression_dir.resolve())
        # 如果文件不存在,先写表头        file_exists = history_path.exists()        try:            with history_path.open("a", encoding="utf-8", newline="") as f:                writer = csv.writer(f)                if not file_exists:                    writer.writerow([                        "date",                        "regression_dir",                        "transaction_count",                        "cycle_count",                        "cumulative_transaction_count",                        "cumulative_cycle_count",                        "log_files_count",                    ])                writer.writerow([                    now_str,                    regression_dir_str,                    total_txn,                    total_cycles,                    cumulative_txn,                    cumulative_cycles,                    counted_logs,                ])
            print(                f"{Colors.GREEN}Transaction/Cycle statistics updated. "                f"txn={total_txn}, cycles={total_cycles}, "                f"cumulative_txn={cumulative_txn}, cumulative_cycles={cumulative_cycles}{Colors.END}"            )        except Exception as e:            print(f"{Colors.YELLOW}Warning: Failed to write transaction/cycle history to {history_path}: {e}{Colors.END}")        def check_compile_files_exist(self, output_dir: str, dienum: str, rtl_ver: str, mode: str) -> bool:        """Check if compile files already exist"""        try:            # Check for common compile output files            compile_files = [                f"{output_dir}/compile.log",                f"{output_dir}/compile_ok",                f"{output_dir}/compile.done",                f"{output_dir}/simv",  # VCS executable                f"{output_dir}/simv.daidir",  # VCS directory                f"{output_dir}/csrc",  # VCS source directory            ]                        # Check if any of these files exist            existing_files = [f for f in compile_files if os.path.exists(f)]                        if existing_files:                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} Found existing compile files:")                for f in existing_files:                    print(f"  - {f}")                return True            else:                timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                print(f"INFO: {timestamp} No existing compile files found")                return False                        except Exception as e:            print(f"{Colors.YELLOW}Warning: Error checking compile files: {e}{Colors.END}")            return False        def should_skip_compile(self) -> bool:        """Determine if compile should be skipped based on bypass argument"""        if not hasattr(self.args, 'bypass') or self.args.bypass is None:            return False                # Handle different bypass argument formats        if isinstance(self.args.bypass, list):            if len(self.args.bypass) == 0:                return False            bypass_value = self.args.bypass[0]        else:            bypass_value = self.args.bypass                # Convert to string and check        bypass_str = str(bypass_value).lower().strip()                # Skip compile if bypass is "1", "true", "yes", "skip", "bypass"        # Default is "0" which means compile        skip_values = ["1", "true", "yes", "skip", "bypass"]        return bypass_str in skip_values
    def _status_print_thread(self):        """Status print thread function"""        while not self._stop_status_thread:            time.sleep(5)  # Check every 5 seconds if status should be printed                        # Check if all tests are completed and stop if so            with self.lock:                all_completed = all(result.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]                                   for result in self.results.values())                if all_completed and (self.running_jobs == 0 and self.pending_jobs == 0):                    print(f"{Colors.GREEN}All tests completed, stopping status monitoring thread{Colors.END}")                    self._stop_status_thread = True                    break                                # Update LSF job statuses in real-time                self._update_lsf_job_statuses()                self.print_status_summary()                # Update real-time report in status thread                self.update_real_time_report()                # Monitor running tests for errors                self.monitor_running_tests_for_errors()        def _update_lsf_job_statuses(self):        """Update LSF job statuses in real-time to keep TestResult objects synchronized"""        if not hasattr(self, 'submitted_jobs') or not self.submitted_jobs:            return                try:            # Get current LSF status for all submitted jobs            job_ids = [int(job_id) for job_id in self.submitted_jobs]            if not job_ids:                return                        # Use batch status check for better performance            status_map = self.batch_check_job_status(job_ids)                        # Update TestResult objects based on current LSF status            for job_id, lsf_status in status_map.items():                if lsf_status in ["RUN", "PEND"]:                    # Find the corresponding TestResult object                    test_info = self.get_test_info_by_job_id(str(job_id))                    if test_info:                        test_name = test_info['name']                        seed = test_info['seed']                                                # Find the TestResult object by searching through all results                        found_result = None                        for result_key, result_obj in self.results.items():                            if result_obj.name == test_name and getattr(result_obj, 'seed', '') == seed:                                found_result = result_obj                                break                                                if found_result:                            # Update status based on LSF status                            if lsf_status == "RUN" and found_result.status == "PENDING":                                # Job just started running                                found_result.status = "RUNNING"                                if not found_result.start_time:                                    found_result.start()                                print(f"DEBUG: Status updated: {test_name} seed={seed} PENDING -> RUNNING (job_id: {job_id})")                            elif lsf_status == "PEND" and found_result.status == "RUNNING":                                # Job went back to pending (resource preemption, etc.)                                found_result.status = "PENDING"                                print(f"DEBUG: Status updated: {test_name} seed={seed} RUNNING -> PENDING (job_id: {job_id})")                        else:                            print(f"DEBUG: Could not find TestResult for {test_name} seed={seed}")                    else:                        print(f"DEBUG: Could not get test info for job_id {job_id}")                                except Exception as e:            print(f"Warning: Error updating LSF job statuses: {e}")        def estimate_completion_time(self):        """Estimate completion time"""        now = time.time()                # Calculate total time of completed tests        completed_tests = [r for r in self.results.values() if r.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]]        completed_time = sum(r.duration for r in completed_tests)                # Calculate running time of currently running tests        running_tests = [r for r in self.results.values() if r.status == "RUNNING"]        running_time = sum(now - r.start_time for r in running_tests if r.start_time)                # Calculate estimated time of pending tests        pending_tests = [r for r in self.results.values() if r.status == "PENDING"]        pending_time = sum(r.estimated_duration for r in pending_tests)                # Calculate remaining estimated time of running tests        running_remaining = sum(max(0, r.estimated_duration - (now - r.start_time))                                for r in running_tests if r.start_time)                # Total estimated remaining time        total_remaining = running_remaining + pending_time                # Calculate estimated completion time        estimated_completion = now + total_remaining        completion_time = datetime.fromtimestamp(estimated_completion).strftime('%Y-%m-%d %H:%M:%S')                # Calculate progress percentage (count-based, more stable)        total_tests = len(self.results) if self.results else 0        completed_count = len(completed_tests)        if total_tests > 0:            # Running contribution: fraction of elapsed/estimated for each running test            running_fraction_sum = 0.0            for r in running_tests:                if r.start_time and getattr(r, 'estimated_duration', 0) > 0:                    elapsed_r = max(0.0, now - r.start_time)                    est_r = max(1.0, float(r.estimated_duration))                    running_fraction_sum += min(elapsed_r / est_r, 1.0)            progress = (completed_count + running_fraction_sum) / total_tests * 100.0        else:            progress = 0.0                    return completion_time, progress, total_remaining        def print_status_summary(self):        """Print current test status summary"""        now = time.time()        # status_print_interval is in seconds; default args.status_interval=5 (minutes) → here we want 30 minutes        # Force the print interval to 30 minutes (override any smaller value)        thirty_minutes = 30 * 60        effective_interval = max(self.status_print_interval, thirty_minutes)        # Skip if not enough time has passed since last status print        if now - self.last_status_print < effective_interval:            return                    self.last_status_print = now                # Count tests in each status        total = len(self.results)        pending = len([r for r in self.results.values() if r.status == "PENDING"])        running = len([r for r in self.results.values() if r.status == "RUNNING"])        passed = len([r for r in self.results.values() if r.status == "PASS"])        rerun_passed = len([r for r in self.results.values() if r.status == "RERUN PASS"])        failed = len([r for r in self.results.values() if r.status in ["FAIL", "RERUN FAIL"]])        errors = len([r for r in self.results.values() if r.status == "ERROR"])        timeouts = len([r for r in self.results.values() if r.status == "TIMEOUT"])                # Debug: Print status distribution        print(f"DEBUG: Status distribution - PENDING: {pending}, RUNNING: {running}, PASS: {passed}, RERUN PASS: {rerun_passed}, FAIL: {failed}")                # Debug: Print some test statuses for verification        if running > 0:            running_tests = [r for r in self.results.values() if r.status == "RUNNING"]            print(f"DEBUG: Sample RUNNING tests: {[r.name for r in running_tests[:3]]}")        if pending > 0:            pending_tests = [r for r in self.results.values() if r.status == "PENDING"]            print(f"DEBUG: Sample PENDING tests: {[r.name for r in pending_tests[:3]]}")                # Calculate elapsed time        elapsed = now - self.start_time        hours = int(elapsed // 3600)        minutes = int((elapsed % 3600) // 60)        seconds = int(elapsed % 60)                # Estimate completion time and progress        completion_time, progress, remaining = self.estimate_completion_time()        remaining_hours = int(remaining // 3600)        remaining_minutes = int((remaining % 3600) // 60)                # Print status summary        print(f"\n{Colors.BOLD}=== Regression Status Summary (Runtime: {hours:02d}:{minutes:02d}:{seconds:02d}) ==={Colors.END}")        print(f"Total Tests: {total}")        print(f"{Colors.YELLOW}Pending: {pending}{Colors.END}")        print(f"{Colors.BLUE}Running: {running}{Colors.END}")        # CRITICAL FIX: Calculate actual LSF status counts from submitted jobs        actual_running_jobs = 0        actual_pending_jobs = 0                if hasattr(self, 'submitted_jobs') and self.submitted_jobs:            try:                # Get current LSF status for all submitted jobs                job_ids = [int(job_id) for job_id in self.submitted_jobs]                if job_ids:                    status_map = self.batch_check_job_status(job_ids)                    for job_id, status in status_map.items():                        if status == "RUN":                            actual_running_jobs += 1                        elif status == "PEND":                            actual_pending_jobs += 1            except Exception as e:                print(f"Warning: Error calculating actual LSF status: {e}")                print(f"{Colors.CYAN}LSF Status - RUN: {actual_running_jobs}, PEND: {actual_pending_jobs}{Colors.END}")        print(f"{Colors.GREEN}Passed: {passed}{Colors.END}")        print(f"{Colors.CYAN}Rerun Passed: {rerun_passed}{Colors.END}")        print(f"{Colors.RED}Failed: {failed}{Colors.END}")        print(f"{Colors.RED}Errors: {errors}{Colors.END}")        print(f"{Colors.YELLOW}Timeouts: {timeouts}{Colors.END}")                # Calculate progress based on test count (Passed/Total)        test_progress = ((passed + rerun_passed) / total) * 100.0 if total > 0 else 0.0        progress_bar = self.generate_progress_bar(test_progress)        print(f"\nProgress: {test_progress:.1f}% {progress_bar}")        print(f"Estimated Remaining Time: {remaining_hours} hours {remaining_minutes} minutes")        print(f"Estimated Completion Time: {completion_time}")                # Print running tests        if running > 0:            print(f"\n{Colors.BLUE}Running Tests:{Colors.END}")            running_tests = [r for r in self.results.values() if r.status == "RUNNING"]            for test in running_tests[:5]:  # Show at most 5                elapsed = now - test.start_time if test.start_time else 0                minutes = int(elapsed // 60)                seconds = int(elapsed % 60)                                # Get seed and opts information                seed = getattr(test, 'seed', 'unknown')                opts = getattr(test, 'opts', [])                opts_str = "_".join(opts) if opts else "no_opts"                                status_info = ""                if hasattr(test, 'error_detected') and test.error_detected:                    status_info = f" {Colors.RED}(running but had error){Colors.END}"                                print(f"  {test.name} seed={seed} opts={opts_str} (Runtime: {minutes}m{seconds}s){status_info}")            if len(running_tests) > 5:                print(f"  ... and {len(running_tests) - 5} other tests")                        # Print recently failed tests        if failed > 0 or errors > 0 or timeouts > 0:            print(f"\n{Colors.RED}Recently Failed Tests:{Colors.END}")            failed_tests = [r for r in self.results.values() if r.status in ["FAIL", "ERROR", "TIMEOUT"]]            for test in failed_tests[-5:]:  # Show at most 5 recent ones                log_path = getattr(test, 'log_file', '') or ''                log_part = f" log={log_path}" if log_path else ""                print(f"  {test.name}: {test.status}{log_part}")                print()  # Empty line
    def validate_all_test_statuses(self):        """Validate and correct all test statuses before generating report"""        current_time = time.time()                # Limit validation frequency to avoid excessive calls        if current_time - self._last_validation_time < 60:  # Only validate once per minute            return                self._last_validation_time = current_time        self._validation_count += 1                # print(f"Validating all test statuses... (validation #{self._validation_count})")                # First check all TestResult objects with RUNNING status        running_tests = [r for r in self.results.values() if r.status == "RUNNING"]        if running_tests:            print(f"Found {len(running_tests)} tests with RUNNING status, checking actual job status...")                        for result_obj in running_tests:                if hasattr(result_obj, 'job_id') and result_obj.job_id:                    try:                        status = self.check_lsf_job_status(int(result_obj.job_id))                        if status == "DONE":                            result_obj.finish("PASS", "")                            print(f"  Status corrected: {result_obj.name} -> PASS")                        elif status in ["EXIT", "TERM", "KILL"]:                            result_obj.finish("FAIL", f"Job status: {status}")                            print(f"  Status corrected: {result_obj.name} -> FAIL")                        elif status in ["RUN", "PEND", "WAIT", "SUSP"]:                            # Job is still running or pending, keep RUNNING status                            pass                        elif status == "UNKNOWN":                            # Job may have completed and been removed from queue, or still running                            # Only change status if we can definitively determine the result                            if hasattr(result_obj, 'log_file') and result_obj.log_file:                                if os.path.exists(result_obj.log_file):                                    if self.check_test_result(result_obj.log_file):                                        result_obj.finish("PASS", "")                                        print(f"  Status corrected: {result_obj.name} -> PASS (from log file)")                                    else:                                        result_obj.finish("FAIL", "Test failed (from log file)")                                        print(f"  Status corrected: {result_obj.name} -> FAIL (from log file)")                                else:                                    # No log file yet, keep RUNNING status (job might still be running)                                    pass                            else:                                # No log file info, keep RUNNING status (job might still be running)                                pass                        else:                            # Unknown LSF status, keep RUNNING status                            pass                    except Exception as e:                        print(f"  Warning: Could not check status for {result_obj.name}: {e}")                        # Keep RUNNING status if we can't determine status (job might still be running)                        pass                else:                    # No job_id, keep RUNNING status (job not yet submitted)                    pass                # Now check all TestResult objects with PENDING status to see if they've completed        pending_tests = [r for r in self.results.values() if r.status == "PENDING"]        if pending_tests:            print(f"Found {len(pending_tests)} tests with PENDING status, checking if they've completed...")                        for result_obj in pending_tests:                if hasattr(result_obj, 'job_id') and result_obj.job_id:                    try:                        status = self.check_lsf_job_status(int(result_obj.job_id))                        if status == "DONE":                            result_obj.finish("PASS", "")                            print(f"  Status corrected: {result_obj.name} -> PASS (was PENDING)")                        elif status in ["EXIT", "TERM", "KILL"]:                            result_obj.finish("FAIL", f"Job status: {status}")                            print(f"  Status corrected: {result_obj.name} -> FAIL (was PENDING)")                        elif status in ["RUN", "PEND", "WAIT", "SUSP"]:                            # If job is RUN, upgrade PENDING -> RUNNING; otherwise keep PENDING                            if status == "RUN":                                result_obj.status = "RUNNING"                                # Initialize start time if missing                                if not getattr(result_obj, 'start_time', None):                                    result_obj.start()                                print(f"  Status corrected: {result_obj.name} PENDING -> RUNNING")                            else:                                # Still pending/wait/suspend; keep PENDING                                print(f"  {result_obj.name} still {status}")                        elif status == "UNKNOWN":                            # Job may have completed and been removed from queue                            # Try to check if log file exists and determine result                            if hasattr(result_obj, 'log_file') and result_obj.log_file:                                if os.path.exists(result_obj.log_file):                                    if self.check_test_result(result_obj.log_file):                                        result_obj.finish("PASS", "")                                        print(f"  Status corrected: {result_obj.name} -> PASS (from log file, was PENDING)")                                    else:                                        result_obj.finish("FAIL", "Test failed (from log file)")                                        print(f"  Status corrected: {result_obj.name} -> FAIL (from log file, was PENDING)")                                else:                                    # No log file, keep PENDING status                                    print(f"  {result_obj.name} no log file - keeping PENDING status")                            else:                                # No log file info, keep PENDING status                                print(f"  {result_obj.name} no log file info - keeping PENDING status")                        else:                            # Unknown status, keep PENDING                            print(f"  {result_obj.name} unknown status {status} - keeping PENDING status")                    except Exception as e:                        print(f"  Warning: Could not check status for {result_obj.name}: {e}")                        # Keep PENDING status if we can't determine status                        print(f"  {result_obj.name} keeping PENDING status due to error")                else:                    # No job_id, keep PENDING status                    print(f"  {result_obj.name} no job_id - keeping PENDING status")                # Final count        final_running = len([r for r in self.results.values() if r.status == "RUNNING"])        final_pending = len([r for r in self.results.values() if r.status == "PENDING"])        final_passed = len([r for r in self.results.values() if r.status == "PASS"])        final_failed = len([r for r in self.results.values() if r.status in ["FAIL", "ERROR", "TIMEOUT"]])                def generate_detailed_regression_report_content(self):        """Generate detailed regression report content as string"""        output = []                # Validate statuses before generating report        self.validate_all_test_statuses()                # Print header        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        output.append(f"INFO: {timestamp}: {'+' * 15} REPORT {'+' * 15}")                # Print table header        output.append(f"INFO: {timestamp}: | status | test_name | seed | jobid | cpu_time | max_mem | procs |")                # Process each test result        for result in self.results.values():            # Get test info            test_name = result.name            seed = getattr(result, 'seed', 'unknown')            job_id = getattr(result, 'job_id', 'unknown')                        # Get CPU time and memory info from actual data            cpu_time, max_mem, procs = self.extract_job_statistics(result)                        # Handle PENDING status specially for file output            if result.status == "PENDING":                cpu_time = "-1|unknown"                        # Format status with proper colors (remove color codes for file output)            status = result.status            if status == "PASS":                status = "PASS"            elif status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                status = "FAIL"            elif status == "RUNNING":                status = "RUNNING"            elif status == "PENDING":                status = "PENDING"            else:                status = status                        # Print test result line            output.append(f"INFO: {timestamp}: | {status} | {test_name} | {seed} | {job_id} | {cpu_time} | {max_mem} | {procs} |")                output.append(f"INFO: {timestamp}: {'+' * 15} END REPORT {'+' * 15}")        output.append(f"Total unique tests reported: {len(self.results)}")                return "\n".join(output)        def generate_error_summary_report_content(self):        """Generate error summary report content as string"""        output = []                # Collect all error information from log content before UVM Report catcher Summary and direct patterns        error_info = {}        failed_tests = []                for result in self.results.values():            if result.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]:                failed_tests.append(result)                # Analyze log file for errors in content before UVM Report catcher Summary                self.analyze_log_for_errors(result, error_info)                if not failed_tests:            output.append("No UVM_ERROR or UVM_FATAL found in logs")        else:            if not error_info:                output.append(f"Found {len(failed_tests)} failed tests but no UVM_ERROR or UVM_FATAL found in logs")                for result in failed_tests:                    output.append(f"  {result.name}: {result.status} - {result.error_msg}")            else:                output.append(f"Found {len(failed_tests)} failed tests with error details:")                                # Group errors by type                error_count = 1                for error_type, error_details in error_info.items():                    output.append(f"({error_count}) ERR ID:{error_details['id']}:")                    output.append(f"MSG: \"{error_details['message']}\"")                                        # Print error count if available (from log content before UVM Report catcher Summary)                    if 'count' in error_details:                        output.append(f"Count: {error_details['count']} (from log content before UVM Report catcher Summary)")                                        # Print associated test paths                    for test_path in error_details['tests']:                        output.append(f"{test_path}")                                        error_count += 1                return "\n".join(output)        def generate_regression_summary_info_content(self):        """Generate regression summary info content as string"""        output = []                # Validate statuses before generating report        self.validate_all_test_statuses()                # Count results        total_tests = len(self.results)        passed_tests = len([r for r in self.results.values() if r.status == "PASS"])        rerun_passed_tests = len([r for r in self.results.values() if r.status == "RERUN PASS"])        failed_tests = len([r for r in self.results.values() if r.status in ["FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]])        pending_tests = len([r for r in self.results.values() if r.status == "PENDING"])        running_tests = len([r for r in self.results.values() if r.status == "RUNNING"])                # Print summary        output.append(f"Running: {running_tests}")        output.append(f"Pending: {pending_tests}")        output.append(f"Passed: {passed_tests}")        output.append(f"Rerun Passed: {rerun_passed_tests}")        output.append(f"Failed: {failed_tests}")        output.append(f"Total tests: {total_tests}")                # Print detailed status        output.append(f"Total Tests: {total_tests}")        output.append(f"Passed: {passed_tests}")        output.append(f"Rerun Passed: {rerun_passed_tests}")        output.append(f"Failed: {failed_tests}")        output.append(f"Pending: {pending_tests}")        output.append(f"Running: {running_tests}")                # Calculate pass rate        if total_tests > 0:            pass_rate = ((passed_tests + rerun_passed_tests) / total_tests) * 100            output.append(f"Pass Rate: {pass_rate:.1f}%")        else:            output.append("Pass Rate: 0.0%")                # Determine overall status        if pending_tests > 0 or running_tests > 0:            output.append("Overall Status: INCOMPLETE")        elif failed_tests == 0:            output.append("Overall Status: PASS")        else:            output.append("Overall Status: FAIL")                return "\n".join(output)        def generate_test_status_and_log_paths_content(self):        """Generate test status and log paths content for zregress_report.log"""        output = []                # Validate statuses before generating report        self.validate_all_test_statuses()                # Get ALL test results (not just final statuses)        all_results = list(self.results.values())                # Sort results by status priority: RUNNING, PENDING, FAIL/RERUN FAIL/ERROR/TIMEOUT, PASS, RERUN PASS        def sort_key(result):            status_priority = {                "RUNNING": 0,                "PENDING": 1,                 "FAIL": 2,                "RERUN FAIL": 2,                "ERROR": 2,                "TIMEOUT": 2,                "PASS": 3,                "RERUN PASS": 3            }            return status_priority.get(result.status, 4)                all_results.sort(key=sort_key)                # Print all test results with their log paths        for result in all_results:            log_path = self.get_test_log_path(result)            error_info = ""            if hasattr(result, 'error_detected') and result.error_detected:                error_info = " (running but had error)"                        if log_path:                output.append(f"[{result.status}]{error_info} {log_path}")            else:                # If no log file found, still show the test but indicate no log                output.append(f"[{result.status}]{error_info} {result.name}: No log file found")                return "\n".join(output)        def _update_job_status_counts(self, status_changes: Dict[int, str]):        """Update running_jobs and pending_jobs counts based on current LSF status"""        # Reset counts        new_running_count = 0        new_pending_count = 0                # Count jobs by status        for job_id, status in status_changes.items():            if status == "RUN":                new_running_count += 1            elif status == "PEND":                new_pending_count += 1                # CRITICAL FIX: Also count retry jobs if they exist        if hasattr(self, 'retry_results'):            for retry_job_id, retry_result in self.retry_results.items():                if int(retry_job_id) not in status_changes:                    # Check status of retry job                    retry_status = self.check_lsf_job_status(int(retry_job_id))                    if retry_status == "RUN":                        new_running_count += 1                    elif retry_status == "PEND":                        new_pending_count += 1                # Update counts and TestResult objects        for job_id, status in status_changes.items():            test_info = self.get_test_info_by_job_id(job_id)            if test_info:                test_name = test_info['name']                seed = test_info['seed']                                # Find the TestResult object                found_result = None                for result_key, result_obj in self.results.items():                    if result_obj.name == test_name and getattr(result_obj, 'seed', '') == seed:                        found_result = result_obj                        break                                if found_result:                    if status == "RUN" and found_result.status == "PENDING":                        # Job just started running                        found_result.status = "RUNNING"                        found_result.start()  # Set start time                        self._reset_log_read_position(test_name, seed)                        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                        print(f"INFO: {timestamp} [jobid {job_id}] {test_name} seed={seed} RUNNING")                    elif status == "PEND" and found_result.status == "RUNNING":                        # Job went back to pending (resource preemption, etc.)                        found_result.status = "PENDING"                        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                        print(f"INFO: {timestamp} [jobid {job_id}] {test_name} seed={seed} PENDING")                # Update global counts        old_running = self.running_jobs        old_pending = self.pending_jobs                self.running_jobs = new_running_count        self.pending_jobs = new_pending_count                # Log count changes if significant        if old_running != new_running_count or old_pending != new_pending_count:            timestamp = datetime.now().strftime('%m-%d %H:%M:%S')            print(f"INFO: {timestamp} Status Count Update: RUNNING {old_running} -> {new_running_count}, PENDING {old_pending} -> {new_pending_count}")        def _clean_submitted_jobs(self):        """Clean duplicate job IDs from submitted_jobs list"""        if len(self.submitted_jobs) != len(set(self.submitted_jobs)):            original_count = len(self.submitted_jobs)            self.submitted_jobs = list(dict.fromkeys(self.submitted_jobs))  # Remove duplicates while preserving order            cleaned_count = len(self.submitted_jobs)            if original_count != cleaned_count:                print(f"Cleaned duplicate job IDs: {original_count} -> {cleaned_count}")                print(f"Removed {original_count - cleaned_count} duplicate job IDs")        def _clean_duplicate_test_results(self):        """Clean up duplicate TestResult objects based on name:config:seed combination"""        print(f"Cleaning up duplicate test results...")        original_count = len(self.results)                # Create a mapping to track unique tests        unique_tests = {}        duplicates_to_remove = []                for result_key, result_obj in self.results.items():            # Create a unique identifier for each test            test_identifier = f"{result_obj.name}:{result_obj.config}:{getattr(result_obj, 'seed', 'unknown')}"                        if test_identifier not in unique_tests:                # First occurrence of this test                unique_tests[test_identifier] = result_key            else:                # Duplicate found - keep the one with more complete information                existing_key = unique_tests[test_identifier]                existing_obj = self.results[existing_key]                                # Determine which one to keep (prefer PASS over FAIL/ERROR/TIMEOUT, then prefer final status over PENDING/RUNNING)                if result_obj.status == "PASS" and existing_obj.status != "PASS":                    # Always keep PASS over any other status                    duplicates_to_remove.append(existing_key)                    unique_tests[test_identifier] = result_key                elif existing_obj.status == "PASS" and result_obj.status != "PASS":                    # Keep existing PASS, remove new non-PASS                    duplicates_to_remove.append(result_key)                elif result_obj.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"] and existing_obj.status in ["PENDING", "RUNNING"]:                    # Keep the new one with final status, remove the old one with transient status                    duplicates_to_remove.append(existing_key)                    unique_tests[test_identifier] = result_key                elif existing_obj.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"] and result_obj.status in ["PENDING", "RUNNING"]:                    # Keep the existing one with final status, remove the new one with transient status                    duplicates_to_remove.append(result_key)                else:                    # Both have same status level, keep the one with more info                    if hasattr(result_obj, 'job_id') and result_obj.job_id and not (hasattr(existing_obj, 'job_id') and existing_obj.job_id):                        # New one has job_id, existing one doesn't                        duplicates_to_remove.append(existing_key)                        unique_tests[test_identifier] = result_key                    else:                        # Keep existing one                        duplicates_to_remove.append(result_key)                # Remove duplicates        for key in duplicates_to_remove:            if key in self.results:                del self.results[key]                cleaned_count = len(self.results)
        def monitor_running_tests_for_errors(self):        """Monitor running tests for errors in their log files"""        current_time = time.time()        if current_time - self.last_error_monitor_time < self.error_monitor_interval:            return                self.last_error_monitor_time = current_time        timestamp = datetime.now().strftime('%m-%d %H:%M:%S')        print(f"\n{Colors.YELLOW}=== 30-Minute Error Monitoring Check ({timestamp}) ==={Colors.END}")                # Get all running tests and filter out PEND jobs        running_tests = []        for result in self.results.values():            if result.status == "RUNNING":                # Check if job is actually running (not pending)                job_id = getattr(result, 'job_id', None)                if job_id:                    try:                        lsf_status = self.check_lsf_job_status(int(job_id))                        if lsf_status == "RUN":                            running_tests.append(result)                        # Skip PEND jobs - they're waiting for resources, not actually running                    except Exception:                        # If we can't check LSF status, include it in monitoring                        running_tests.append(result)                else:                    # No job_id, include in monitoring                    running_tests.append(result)                if not running_tests:            print(f"  No actually running tests to monitor")            # Check if all tests are completed and stop monitoring if so            all_completed = all(result.status in ["PASS", "RERUN PASS", "FAIL", "RERUN FAIL", "ERROR", "TIMEOUT"]                               for result in self.results.values())            if all_completed and (self.running_jobs == 0 and self.pending_jobs == 0):                print(f"  {Colors.GREEN}All tests completed, stopping error monitoring{Colors.END}")                self._stop_status_thread = True            return                print(f"  Scanning {len(running_tests)} running tests for first-time errors...")                errors_found_count = 0        for result in running_tests:            if self._check_test_log_for_errors(result):                errors_found_count += 1                if errors_found_count == 0:            print(f"  {Colors.GREEN}✓ No new errors detected in running tests{Colors.END}")        else:            print(f"  {Colors.RED}⚠ Found {errors_found_count} new error(s) in running tests{Colors.END}")        def _check_test_log_for_errors(self, result):        """Check a specific test's log file for errors"""        test_name = result.name        seed = getattr(result, 'seed', 'unknown')                # Get the log file path        log_file_path = self.get_test_log_path(result)        if not log_file_path or not os.path.exists(log_file_path):            return False                # Get or initialize the last read position for this log file        log_key = f"{test_name}_{seed}"        last_position = self.log_read_positions.get(log_key, 0)                # Check if we've already reported errors for this test        if hasattr(result, 'error_reported') and result.error_reported:            return False                # CRITICAL FIX: Only check for timeout if job is actually RUNNING        # PEND jobs should not be subject to timeout detection as they're waiting for resources        job_id = getattr(result, 'job_id', None)        if job_id:            try:                lsf_status = self.check_lsf_job_status(int(job_id))                if lsf_status == "PEND":                    # Job is pending - check for PEND timeout if configured                    if self.pend_timeout_seconds is not None:                        now_ts = time.time()                        last_update_ts = self.log_last_update_times.get(log_key, now_ts)                        if now_ts - last_update_ts >= self.pend_timeout_seconds:                            mins = int(self.pend_timeout_seconds // 60)                            result.finish("TIMEOUT", f"Job pending for {mins} minutes (resource timeout)")                            print(f"\n{Colors.YELLOW}⏱{Colors.END} {test_name} seed={seed} TIMEOUT - pending for {mins} minutes")                            self.log_last_update_times[log_key] = now_ts                            return False                    # No PEND timeout configured or not yet reached - don't check for hang timeout                    return False            except Exception:                # If we can't check LSF status, assume it's running and proceed with timeout check                pass                try:            with open(log_file_path, 'r', encoding='utf-8', errors='ignore') as f:                # Seek to the last read position                f.seek(last_position)                                # Read new content                new_content = f.read()                current_position = f.tell()
                # Update last update time if file advanced, else check for hang                now_ts = time.time()                last_update_ts = self.log_last_update_times.get(log_key, now_ts)                if current_position > last_position:                    # File advanced; update last update time                    self.log_last_update_times[log_key] = now_ts                else:                    # No progress; if configured threshold without new lines, mark TIMEOUT (hung)                    # Only apply timeout to RUNNING jobs, not PEND jobs                    if now_ts - last_update_ts >= self.hang_timeout_seconds:                        mins = int(self.hang_timeout_seconds // 60)                        result.finish("TIMEOUT", f"No new log lines for {mins} minutes (assumed hang)")                        print(f"\n{Colors.YELLOW}⏱{Colors.END} {test_name} seed={seed} TIMEOUT - no log updates for {mins} minutes")                        # Reset tracking to avoid repeated triggers                        self.log_last_update_times[log_key] = now_ts                                                # CRITICAL FIX: Trigger retry for TIMEOUT cases                        if getattr(self.args, 'retry', 0) > 0 and hasattr(result, 'job_id') and result.job_id:                            print(f"{Colors.CYAN}🚀 Triggering retry for TIMEOUT case {test_name} seed={seed}{Colors.END}")                            self._resubmit_from_stored_opcode(result.job_id)                                                return False                                # Update the last read position                self.log_read_positions[log_key] = current_position                                # Only scan content BEFORE 'UVM Report catcher Summary' to avoid false positives                summary_idx = new_content.find('UVM Report catcher Summary')                scan_text = new_content[:summary_idx] if summary_idx != -1 else new_content
                # Check for error keywords in the scan_text                first_error_found = None                for keyword in self.error_keywords:                    if keyword in scan_text:                        # Find the first occurrence of this error keyword                        lines = scan_text.split('\n')                        for i, line in enumerate(lines):                            if keyword in line:                                # Skip report-summary style lines that are not real errors                                if 'UVM_' in keyword or 'UVM_' in line:                                    if 'Number of' in line and 'reports' in line:                                        continue                                # Get some context around the error                                start_line = max(0, i - 2)                                end_line = min(len(lines), i + 3)                                context = '\n'.join(lines[start_line:end_line])                                first_error_found = {                                    'keyword': keyword,                                    'line': line.strip(),                                    'context': context                                }                                break                        if first_error_found:                            break                                if first_error_found:                    timestamp = datetime.now().strftime('%m-%d %H:%M:%S')                    print(f"\n{Colors.RED}🚨 FIRST ERROR DETECTED 🚨{Colors.END}")                    print(f"{Colors.RED}[{timestamp}] Test: {test_name} seed={seed} - running but had error{Colors.END}")                    print(f"{Colors.RED}Log File: {log_file_path}{Colors.END}")                    print(f"{Colors.RED}Error Type: {first_error_found['keyword']}{Colors.END}")                    print(f"{Colors.RED}Error Line: {first_error_found['line']}{Colors.END}")                    print(f"{Colors.RED}Error Context:{Colors.END}")                    for context_line in first_error_found['context'].split('\n'):                        print(f"{Colors.RED}  {context_line}{Colors.END}")                    print(f"{Colors.RED}{'='*80}{Colors.END}")                                        # Mark that we've reported the first error for this test                    result.error_reported = True                    result.error_detected = True                    result.first_error_details = first_error_found                    return True                        except Exception as e:            print(f"  Warning: Could not read log file {log_file_path}: {e}")                return False        def _reset_log_read_position(self, test_name, seed):        """Reset the log read position for a test (when it starts running)"""        log_key = f"{test_name}_{seed}"        self.log_read_positions[log_key] = 0        def _cleanup_log_read_positions(self):        """Clean up log read positions for completed tests"""        completed_tests = [result for result in self.results.values()                           if result.status in ["PASS", "RERUN PASS", "FAIL", "ERROR", "TIMEOUT"]]                for result in completed_tests:            test_name = result.name            seed = getattr(result, 'seed', 'unknown')            log_key = f"{test_name}_{seed}"                        if log_key in self.log_read_positions:                del self.log_read_positions[log_key]            if hasattr(self, 'log_last_update_times') and log_key in self.log_last_update_times:                del self.log_last_update_times[log_key]        def save_error_monitor_state(self):        """Save error monitoring state to file"""        try:            state_file = self.report_dir / "error_monitor_state.json"            state_data = {                'log_read_positions': self.log_read_positions,                'last_error_monitor_time': self.last_error_monitor_time,                'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')            }                        with open(state_file, 'w') as f:                json.dump(state_data, f, indent=2)                        except Exception as e:            print(f"Warning: Could not save error monitor state: {e}")        def load_error_monitor_state(self):        """Load error monitoring state from file"""        try:            state_file = self.report_dir / "error_monitor_state.json"            if state_file.exists():                with open(state_file, 'r') as f:                    state_data = json.load(f)                                self.log_read_positions = state_data.get('log_read_positions', {})                self.last_error_monitor_time = state_data.get('last_error_monitor_time', time.time())                print(f"Loaded error monitor state from {state_file}")                        except Exception as e:            print(f"Warning: Could not load error monitor state: {e}")

def parse_it_regress_alias(alias_file: str) -> List[Tuple[str, str]]:    """Parse it_regress.alias file.
    Expected format (example):      sim_1d:          cmd: ./hregress.py -g full_1die --auto_restart ...
    Returns:      List of (sim_dir_name, cmd_string)    """    items: List[Tuple[str, str]] = []    current_sim: Optional[str] = None
    with open(alias_file, "r", encoding="utf-8", errors="ignore") as f:        for raw in f:            line = raw.strip()            if not line or line.startswith("#"):                continue
            # Section header: sim_xxx:            if line.endswith(":") and not line.lower().startswith("cmd:"):                current_sim = line[:-1].strip()                continue
            # Command line: cmd: ...            if line.lower().startswith("cmd:"):                if not current_sim:                    raise ValueError(f"Found 'cmd:' before any sim section in {alias_file}: {raw!r}")                cmd = line.split(":", 1)[1].strip()                if not cmd:                    raise ValueError(f"Empty cmd for section {current_sim} in {alias_file}")                items.append((current_sim, cmd))                continue
    return items

def _maybe_prefix_python(cmd_tokens: List[str], sim_dir: str) -> List[str]:    """Best-effort: if entry is a .py under sim_dir but not executable, run via current python."""    if not cmd_tokens:        return cmd_tokens    first = cmd_tokens[0]    if first.endswith(".py"):        p = Path(sim_dir) / first        try:            if p.exists() and not os.access(str(p), os.X_OK):                return [sys.executable, first] + cmd_tokens[1:]        except Exception:            pass    return cmd_tokens

def submit_all_regress_from_alias(args) -> int:    """When args.all_regress_en == 1: read alias file and submit regressions in each sim dir."""    alias_file = getattr(args, "alias_file", "it_regress.alias")    sim_root = getattr(args, "all_regress_sim_root", ".")    template_sim_dir = getattr(args, "template_sim_dir", "sim")
    alias_path = os.path.abspath(alias_file) if not os.path.isabs(alias_file) else alias_file
    # Base directory resolution:    # - If user provides --all-regress-sim-root and it exists, use it.    # - Otherwise, use alias file's directory (most common in flow: cd sim_xxx and run).    # - Fallback to current working directory.    sim_root_abs = os.path.abspath(sim_root) if sim_root else ""    if sim_root_abs and os.path.isdir(sim_root_abs):        base_dir = sim_root_abs    else:        base_dir = os.path.abspath(os.path.dirname(alias_path)) if alias_path else os.getcwd()
    if not os.path.exists(alias_path):        print(f"{Colors.RED}Error: alias file not found: {alias_path}{Colors.END}")        return 1
    try:        items = parse_it_regress_alias(alias_path)    except Exception as e:        print(f"{Colors.RED}Error: failed to parse alias file {alias_path}: {e}{Colors.END}")        return 1
    if not items:        print(f"{Colors.YELLOW}Warning: no cmd entries found in {alias_path}{Colors.END}")        return 0
    print(f"{Colors.BOLD}=== ALL REGRESS MODE (from alias) ==={Colors.END}")    print(f"Alias file: {alias_path}")    print(f"Base dir:  {base_dir}")    if sim_root_abs and os.path.isdir(sim_root_abs):        print(f"Sim root:  {sim_root_abs} (enabled)")    else:        print(f"Sim root:  {sim_root_abs or '(not set)'} (ignored; using base dir)")    template_abs = template_sim_dir if os.path.isabs(template_sim_dir) else os.path.join(base_dir, template_sim_dir)    print(f"Template:  {template_abs}")    print(f"Found {len(items)} cmd entries")
    failed: List[Tuple[str, int]] = []    jobs: List[Dict[str, str]] = []    for sim_name, cmd_str in items:        sim_dir = sim_name if os.path.isabs(sim_name) else os.path.join(base_dir, sim_name)        if not os.path.isdir(sim_dir):            # Auto-create sim_xxx from template 'sim' directory (cp -rf sim sim_xxx)            template_path = template_sim_dir if os.path.isabs(template_sim_dir) else os.path.join(base_dir, template_sim_dir)            if not os.path.isdir(template_path):                print(f"{Colors.RED}Error: sim dir not found: {sim_dir}{Colors.END}")                print(f"{Colors.RED}Error: template sim dir not found: {template_path}{Colors.END}")                failed.append((sim_name, 127))                continue            ts = datetime.now().strftime("%m-%d %H:%M:%S")            print(f"{Colors.YELLOW}INFO: {ts} [{sim_name}] sim dir missing, creating via: cp -rf {template_path} {sim_dir}{Colors.END}")            try:                r_cp = subprocess.run(["cp", "-rf", template_path, sim_dir], cwd=base_dir)                if r_cp.returncode != 0 or not os.path.isdir(sim_dir):                    print(f"{Colors.RED}INFO: {ts} [{sim_name}] create FAIL rc={r_cp.returncode}{Colors.END}")                    failed.append((sim_name, int(r_cp.returncode) if r_cp.returncode is not None else 1))                    continue                print(f"{Colors.GREEN}INFO: {ts} [{sim_name}] created OK{Colors.END}")            except Exception as e:                print(f"{Colors.RED}INFO: {ts} [{sim_name}] create FAIL: {e}{Colors.END}")                failed.append((sim_name, 1))                continue
        try:            tokens = shlex.split(cmd_str)        except Exception as e:            print(f"{Colors.RED}Error: shlex split failed for {sim_name} cmd={cmd_str!r}: {e}{Colors.END}")            failed.append((sim_name, 2))            continue
        tokens = _maybe_prefix_python(tokens, sim_dir)        jobs.append({"name": sim_name, "sim_dir": sim_dir, "cmd_str": cmd_str, "tokens": tokens})
    if not jobs and failed:        # 所有条目都在准备阶段失败了        print(f"\n{Colors.RED}=== ALL REGRESS MODE SUMMARY: FAIL (no job started) ==={Colors.END}")        for name, rc in failed:            print(f"  - {name}: rc={rc}")        return 1
    # 并行启动所有 sim_xxx 的 hregress 命令    procs: List[Tuple[Dict[str, str], subprocess.Popen]] = []    for job in jobs:        ts = datetime.now().strftime("%m-%d %H:%M:%S")        print(f"INFO: {ts} [{job['name']}] START cwd={job['sim_dir']} cmd: {job['cmd_str']}")        try:            p = subprocess.Popen(                job["tokens"],                cwd=job["sim_dir"],            )            procs.append((job, p))        except FileNotFoundError as e:            print(f"{Colors.RED}INFO: {ts} [{job['name']}] start FAIL: {e}{Colors.END}")            failed.append((job["name"], 127))        except Exception as e:            print(f"{Colors.RED}INFO: {ts} [{job['name']}] start FAIL: {e}{Colors.END}")            failed.append((job["name"], 1))
    # 等待所有并行回归结束,统计返回码    for job, p in procs:        rc = p.wait()        ts = datetime.now().strftime("%m-%d %H:%M:%S")        if rc == 0:            print(f"{Colors.GREEN}INFO: {ts} [{job['name']}] FINISH OK{Colors.END}")        else:            print(f"{Colors.RED}INFO: {ts} [{job['name']}] FINISH FAIL rc={rc}{Colors.END}")            failed.append((job["name"], int(rc)))
    if failed:        print(f"\n{Colors.RED}=== ALL REGRESS MODE SUMMARY: FAIL ==={Colors.END}")        for name, rc in failed:            print(f"  - {name}: rc={rc}")        return 1
    print(f"\n{Colors.GREEN}=== ALL REGRESS MODE SUMMARY: OK ==={Colors.END}")    return 0

def parse_arguments():    """Parse the operation"""    parser = argparse.ArgumentParser(description="regress")        # Required arguments (conditionally required based on other parameters)    parser.add_argument("-g", "--groups", nargs="+", required=False,                       help="group tag (required unless using -lst/--list)")    # Optional arguments    parser.add_argument("-d", "--dienum", type=int, nargs="?", default=2,                       help="die num :1 to 4")    parser.add_argument("-v", "--rtl_ver", nargs="?", default="STUB NOC_TOP",                       help="rtl vision")    # parser.add_argument("-v", "--rtl_ver", nargs="?", default="FULL",    #                    help="rtl vision")    parser.add_argument("-m", "--mode", nargs="?", default="",                       help="mode")      parser.add_argument("-def", "--define", type=str, nargs="?",                       help="rtl define marco")    parser.add_argument("-q", "--queue", nargs="?", default="pron_normal",                       help="queue")    parser.add_argument("--timestamp", nargs="*",                       help="add timestamp or not, use True or False")    parser.add_argument("--bypass", nargs="*", default="1",                       help="bypass the pre_full_run: 0=compile, 1=skip compile if files exist")    parser.add_argument("--wait-timeout", type=int, nargs="?", default=100,                       help="waiting timeout (h)")    parser.add_argument('--max_concurrent', type=int, default=50,                       help='max concurrent job count')    # Legacy arguments for backward compatibility    parser.add_argument('--legacy-mode', choices=['compile_regression'], default='compile_regression',                       help='Legacy run mode: compile_regression(compile then run regression)')    parser.add_argument('--timeout', type=int, default=60,                       help='Single test timeout (minutes) (default: 60)')    parser.add_argument('--output-dir', default='.',                       help='Output directory for compile and regression (default: ./output)')    parser.add_argument('--dir', default='.',                       help='Simulation output directory path (default: ./output)')    parser.add_argument('--p2-mode', default='normal',                       help='P2 mode for compilation (default: normal)')    parser.add_argument('--seed', default='random',                       help='Random seed (default: random)')    parser.add_argument('--wave', action='store_true',                       help='Enable FSDB wave format (default: no wave)')    parser.add_argument('--wave-on-fail', action='store_true',                       help='Generate wave file only when test fails')    parser.add_argument('--coverage', action='store_true',                       help='Enable coverage collection')    parser.add_argument('--cov', type=str, default=None,                       choices=['all', 'tgl', 'line', 'cond', 'fsm', 'branch', 'assert'],                       help='Coverage type: all, tgl, line, cond, fsm, branch, assert (default: None)')    parser.add_argument('--vcs-optimize', action='store_true', default=False,                       help='Enable VCS optimization (parallel compilation and simulation) (default: disabled)')    parser.add_argument('--vcs-cores', type=int, default=1,                       help='Number of cores for VCS parallel compilation/simulation (default: 1, single-threaded)')    parser.add_argument('--vcs-xa', action='store_true', default=False,                       help='Enable VCS-XA acceleration (default: disabled)')    parser.add_argument('--verbosity', default='UVM_MEDIUM',                       choices=['UVM_NONE', 'UVM_LOW', 'UVM_MEDIUM', 'UVM_HIGH', 'UVM_FULL'],                       help='UVM verbosity level (default: UVM_MEDIUM)')    parser.add_argument('--plusargs', default='',                       help='Additional plusargs parameters')    parser.add_argument('--retry', type=int, default=1,                       help='Failed test retry count (default: 1)')    parser.add_argument('--debug', action='store_true',                       help='Enable debug mode')    parser.add_argument('--keep-logs', action='store_true',                       help='Keep all log files')    parser.add_argument('--status-interval', type=int, default=5,                       help='Status print interval (minutes) (default: 5)')    parser.add_argument('--error-monitor-interval', type=int, default=30,                       help='Error monitoring interval (minutes) (default: 30)')    parser.add_argument('--hang-timeout-minutes', type=int, default=30,                       help='Timeout for no new log lines (minutes) (default: 30)')    parser.add_argument('--pend-timeout-minutes', type=int, default=None,                       help='Timeout for PEND jobs (minutes). If not set, PEND jobs will wait indefinitely for resources (default: None)')    parser.add_argument('--memory', type=int, default=None,                       help='Memory reservation in GB for LSF jobs (default: not specified, use LSF default)')    parser.add_argument('--cpu-cores', type=int, default=1,                       help='CPU cores to request for LSF jobs (default: 1)')    parser.add_argument('--failed-regression', type=str, default=None,                       help='Path to failed regression JSON file to re-run failed tests only')    parser.add_argument('-lst', '--list', type=str, default=None,                       help='Path to JSON regression list file to run all test cases in the list')    parser.add_argument('--auto-restart', action='store_true', default=False,                       help='Automatically restart regression after completion (default: False)')    parser.add_argument('--restart-interval-hours', type=float, default=None,                       help='Auto-restart interval in hours (e.g., 12.0 for 12 hours). If set, regression will restart after this interval (default: None)')    parser.add_argument('--max-restarts', type=int, default=None,                       help='Maximum number of auto-restarts (default: None, unlimited)')
    # Multi-topology one-click mode    parser.add_argument('--all-regress-en', type=int, default=0,                       help='Enable all-regress mode: when set to 1, read alias file and submit regress in each sim_xxx dir (default: 0)')    parser.add_argument('--alias-file', type=str, default='it_regress.alias',                       help='Alias file path (it_regress.alias format) used when --all-regress-en=1')    parser.add_argument('--all-regress-sim-root', type=str, default='.',                       help='Sim root dir that contains sim_xxx subdirs, used when --all-regress-en=1')    parser.add_argument('--template-sim-dir', type=str, default='sim',                       help="Template sim directory name/path under sim-root. If sim_xxx doesn't exist, create it by running: cp -rf <template> <sim_xxx>")        return parser.parse_args()
def main():    args = parse_arguments()
    # Short-circuit: all-regress mode (submit commands from alias file in each sim_xxx dir)    try:        if int(getattr(args, 'all_regress_en', 0)) == 1:            rc = submit_all_regress_from_alias(args)            sys.exit(int(rc))    except Exception as e:        print(f"{Colors.RED}Error in all-regress mode: {e}{Colors.END}")        sys.exit(1)        # Validate arguments based on mode    if hasattr(args, 'failed_regression') and args.failed_regression:        # Validate failed regression file        if not os.path.exists(args.failed_regression):            print(f"{Colors.RED}Error: Failed regression file not found {args.failed_regression}{Colors.END}")            sys.exit(1)        print(f"Running failed tests from: {args.failed_regression}")    elif hasattr(args, 'list') and args.list:        # Validate regression list file - check in ../def/case_def/ directory        regression_list_path = os.path.join(os.getcwd(), "..", "def", "case_def", args.list)        if not os.path.exists(regression_list_path):            print(f"{Colors.RED}Error: Regression list file not found {regression_list_path}{Colors.END}")            sys.exit(1)        print(f"Running tests from regression list: {regression_list_path}")        # For regression list mode, groups are not required    else:        # For normal regression mode, groups are required        if not args.groups:            print(f"{Colors.RED}Error: -g/--groups is required when not using -lst/--list or --failed-regression{Colors.END}")            sys.exit(1)        # Check if json_list file exists (only for normal regression)        json_list_path = os.path.join(os.getcwd(), "../def/json_list")        if not os.path.exists(json_list_path):            print(f"{Colors.RED}Error: Test list file not found {json_list_path}{Colors.END}")            sys.exit(1)        # Run regression test    runner = RegressionRunner(args)    try:        runner.run()    except KeyboardInterrupt:        print(f"\n{Colors.YELLOW}User interrupted, cleaning up...{Colors.END}")        runner.cleanup()        sys.exit(1)    except Exception as e:        print(f"{Colors.RED}Regression test exception: {e}{Colors.END}")        runner.cleanup()        sys.exit(1)
if __name__ == "__main__":    main()
 
posted @ 2026-02-03 17:13  yuesheng1325  阅读(3)  评论(0)    收藏  举报