RLLM工具:Python 沙箱(LCB沙箱)

主函数定义在/rllm/tools/code_tools/python_interpreter.py

    def _init_backend(self):
        """初始化沙箱"""
        # 默认使用LCBPythonInterpreter
        if self.backend_type == "local":
            self.backend: LCBPythonInterpreter | E2BPythonInterpreter | TogetherCodeTool = LCBPythonInterpreter()
        elif self.backend_type == "e2b":
            self.backend = E2BPythonInterpreter(n_sandboxes=self.n_sandboxes, api_key=self.api_key)
        elif self.backend_type == "together":
            self.backend = TogetherCodeTool(api_key=self.api_key)
        else:
            raise ValueError(f"Unsupported backend type: {self.backend_type}")

    def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
        """
        在沙箱中执行code
        Args:
            code: Python code to execute
            timeout: Maximum execution time in seconds
            **kwargs: Additional parameters specific to the backend implementation

        Returns:
            CodeToolOutput containing execution results, stdout, and stderr
        """
        return self.backend.forward(code=code, timeout=timeout, **kwargs)

LCB解释器定义在tools/code_tools/lcb_tool.py中,可以看到,这个本地沙箱的主要的防护为:

  1. 防止有害操作(例如对本地文件的读写等)
  2. 进行超时处理
import ast
import faulthandler
import multiprocessing
import queue
import signal
import traceback

from rllm.rewards.code_utils.livecodebench import (
    Capturing,
    clean_if_name,
    compile_code,
    get_function,
    make_function,
    reliability_guard,
    timeout_handler,
)
from rllm.tools.code_tools.code_tool import CodeTool, CodeToolOutput


def ensure_return_value(code):
    """
    Ensures the code has a return statement for the last expression.
    Only converts the last statement to a return statement if it's an expression.

    Args:
        code (str): Python code to process

    Returns:
        str: Modified code with return statement if needed
    """
    if not code.strip():
        return code

    try:
        # Parse the code
        tree = ast.parse(code)
        body = tree.body

        # If the last element is an expression, convert it to a return statement
        if body and isinstance(body[-1], ast.Expr):
            value = body[-1].value
            body[-1] = ast.Return(value=value)

            # Preserve the line numbers and column offsets for better error messages
            ast.fix_missing_locations(tree)

        # Unparse the modified AST back to code
        return ast.unparse(tree)
    except SyntaxError:
        # If the code has syntax errors, return the original code
        return code
    except Exception as e:
        # Log other unexpected errors but return the original code
        print(f"Warning: Could not process code: {e}")
        return code


def execute_code(code, timeout):
    """
    Execute the provided code with safety measures and timeout handling.

    Args:
        code (str): Python code to execute
        timeout (int): Maximum execution time in seconds

    Returns:
        tuple: (stdout, stderr, result) containing execution output and result
    """
    signal.signal(signal.SIGALRM, timeout_handler)
    stdout, stderr, result = None, None, None
    # Disable functionalities that can make destructive changes to the test.
    # 限制读写操作
    reliability_guard()
    signal.alarm(timeout)
    try:
        code = clean_if_name(code)
        ## 将代码包裹在一个函数中
        code = make_function(code)
        # 执行代码
        compiled_sol = compile_code(code, timeout)
        if compiled_sol is None:
            stderr = "Failed to compile code"
            return stdout, stderr, result
        method = get_function(compiled_sol, "wrapped_function")
        if method is None:
            stderr = "Failed to get function 'wrapped_function'"
            return stdout, stderr, result
        # 用于记录是否超时
        signal.alarm(timeout)
        faulthandler.enable()
        signal.alarm(timeout)
        # 捕获标准的输入输出
        with Capturing() as captured_output:
            try:
                try:
                    result = method()
                except SystemExit as e:
                    stderr = f"SystemExit: {e}"
                finally:
                    pass
                # reset the alarm
                signal.alarm(0)
            except Exception as e:
                signal.alarm(0)
                if "timeoutexception" in repr(e).lower():
                    stderr = "Time Limit Exceeded."
                else:
                    stderr = traceback.format_exc()
            finally:
                signal.alarm(0)
                faulthandler.disable()
        stdout = captured_output[0] if captured_output else ""
        return stdout, stderr, result
    except Exception:
        return stdout, stderr, result
    finally:
        signal.alarm(0)


def _wrapper_exec_fn(sample, timeout, result_queue):
  	# 执行代码并且放入队列
    """Helper function to execute code and put results in the queue"""
    res = execute_code(sample, timeout=timeout)
    result_queue.put(res)


def lcb_sandbox(code, timeout):
    """
    防止有害操作,进行超时处理。是执行代码的主函数
    Args:
        code (str): Python code to execute
        timeout (int): Maximum execution time in seconds

    Returns:
        tuple: (stdout, stderr, result) containing the execution output and result
    """
    # Preprocess the code to ensure the last expression is returned
    code = ensure_return_value(code)

    # Use multiprocessing to isolate code execution in a separate process
    manager = multiprocessing.Manager()
    result_queue = manager.Queue()

    # Create and start the process
    p = multiprocessing.Process(
        target=_wrapper_exec_fn,
        args=(code, timeout, result_queue),
    )
    p.start()

    # Wait for the process to complete with additional buffer time
    p.join(timeout=(timeout + 1) + 5)

    try:
        # Get the result from the queue
        res = result_queue.get()
        return res
    except queue.Empty:
        # Return timeout message if no result is available
        return "Timeout", "", ""
    finally:
        # Ensure the process is terminated if still running
        if p.is_alive():
            p.terminate()
            p.join(timeout=1)
            if p.is_alive():
                p.kill()


class LCBPythonInterpreter(CodeTool):
    """
    A tool for executing Python code in a sandboxed environment.

    This tool provides a safe way to execute Python code with timeout protection
    and isolation from the main process, using the LiveCodeBench execution environment.
    """

    def __init__(self):
        """Initialize the Python interpreter tool with appropriate settings."""
        super().__init__(
            name="python",
            description="Execute python code in the same environment as the LiveCodeBench benchmark.",
            n_sandboxes=-1,
        )

    def forward(self, code: str, timeout: int = 12, **kwargs) -> CodeToolOutput:
        """
        Execute Python code using the LiveCodeBench sandbox environment.

        Args:
            code (str): Python code to execute
            timeout (int): Maximum execution time in seconds, defaults to 12
            **kwargs: Additional parameters (unused but kept for compatibility)

        Returns:
            CodeToolOutput: Contains execution results with stdout, stderr, and result fields
        """
        try:
            stdout, stderr, result = lcb_sandbox(code, timeout=timeout)
            return CodeToolOutput(name=self.name or "python", stdout=stdout, stderr=stderr, output=result)
        except Exception as e:
            return CodeToolOutput(
                name=self.name or "python",
                error=f"Sandbox Error: {type(e).__name__} - {str(e)}",
            )


if __name__ == "__main__":
    # Create a Python interpreter instance
    interpreter = LCBPythonInterpreter()

    # Example code to execute
    test_code = """
# Generate a large amount of code
result = 0
for i in range(1000):
    exec(f"var_{i} = {i}")
    result += i

# Final expression after lots of code
result  # Should be converted to return
"""

    # Run code
    print(interpreter(code=test_code))

posted @ 2025-12-20 17:45  Brain404  阅读(2)  评论(0)    收藏  举报