httpimport 内部处理简单说明

以前也简单说明了下httpimport 的功能。实际内部是对于python import 语义的实现，只是没有明确的基于继承的模式

标准模块finder 以及loader 的处理

由importlib.abc.Loader 以及importlib.abc.MetaPathFinder定义的

class HTTPModuleLoader(importlib.abc.Loader):
    def __init__(self, base_url):
        self.base_url = base_url

    def create_module(self, spec):
        return None  # 使用默认模块创建逻辑

    def exec_module(self, module):
        module_url = f"{self.base_url}/{module.__name__}.py"
        response = requests.get(module_url)
        if response.status_code != 200:
            raise ImportError(f"Cannot load module {module.__name__} from {module_url}")
        
        code = response.text
        exec(code, module.__dict__)

class HTTPModuleFinder(importlib.abc.MetaPathFinder):
    def __init__(self, base_url):
        self.base_url = base_url
        self.loader = HTTPModuleLoader(base_url)

    def find_spec(self, fullname, path, target=None):
        module_url = f"{self.base_url}/{fullname}.py"
        response = requests.head(module_url)
        if response.status_code == 200:
            return importlib.util.spec_from_loader(fullname, self.loader)
        return None

httpimport基础类的定义

httpimport 支持不少模式的模块加载，内部核心是基于了 HttpImporter

httpimport 定义

可以看到同时实现了上边的基础定义

class HttpImporter(object):
    """ The class that implements the Importer API. Contains the `find_module` and `load_module` methods.
    It is better to not use this class directly, but through its wrappers ('remote_repo', 'github_repo', etc),
    that automatically load and unload this class' objects to the 'sys.meta_path' list.

    Args:
        url (str): Contains a URL that can point to an Archive -(compressed) Tar or Zip-
        or an HTTP/S / WebDAV directory (listable or not) to be queried for Python module/packages files
        zip_pwd (bytes): The password to be used for password encrypted ZIP files
        headers (dict): The HTTP Headers to be used in all HTTP requests issued by this Importer.
            Can be used for authentication, logging, etc.
        proxy (str): The URL for the HTTP proxy to be used for all requests
    """

    def __init__(
            self,
            url,
            zip_pwd=b'',
            headers={},
            proxy=None,
            allow_plaintext=False,
            ca_verify=True, ca_file=None, **kw):
        # remove trailing '/' from URL parameter
        self.url = url if not url.endswith('/') else url[:-1]
        self.modules = {}

        if not _isHTTPS(url):
            logger.warning(
                "[-] Using HTTP URLs (%s) with 'httpimport' is a security hazard!" %
                (url))
            if not (allow_plaintext or INSECURE):
                logger.error("""[*] Using plaintext protocols needs to be enabled through 'INSECURE' global or explicitly allowed through 'allow-plaintext'!
                """)
                raise ImportError(
                    "[-] HTTP used while plaintext is not allowed")

        if not ca_verify:
            logger.warning(
                "[-] Disabling TLS Certificate verification for URL (%s) is a security hazard!" %
                (url))

        self.zip_pwd = zip_pwd
        self.headers = headers
        self.proxy = proxy
        self.ca_verify = ca_verify
        self.ca_file = ca_file

        # Try a request that can fail in case of connectivity issues
        resp = http(url, headers=self.headers, proxy=self.proxy,
                    method='GET', ca_verify=self.ca_verify, ca_file=self.ca_file)

        # Try to extract an archive from URL
        self.archive = _retrieve_archive(resp['body'], url)

    def find_spec(self, fullname, path, target=None):
        loader = self.find_module(fullname, path)
        if loader is not None:
            return importlib.machinery.ModuleSpec(
            fullname, loader)
        return None

    def find_module(self, fullname, path=None):
        """ Method that determines whether a module/package can be loaded through this Importer object. Part of Importer API

        Args:
            fullname (str): The name of the package/module to be searched.
            path (str): Part of the Importer API. Not used in this object.

        Returns:
          (object): This Importer object (`self`) if the module can be importer
            or `None` if the module is not available.
        """
        logger.info(
            "[*] Trying to find loadable code for module '%s', path: '%s'" %
            (fullname, path))

        paths = _create_paths(fullname)
        for path in paths:
            if self.archive is None:
                url = self.url + '/' + path
                resp = http(url, headers=self.headers, proxy=self.proxy, ca_verify=self.ca_verify, ca_file=self.ca_file)
                if resp['code'] == 200:
                    logger.debug(
                        "[+] Fetched Python code from '%s'. The module can be loaded!" %
                        (url))
                    self.modules[fullname] = {}
                    self.modules[fullname]['content'] = resp['body']
                    self.modules[fullname]['filepath'] = url
                    self.modules[fullname]['package'] = path.endswith(
                        '__init__.py')
                    return self
                else:
                    logger.debug(
                        "[-] URL '%s' return HTTP Status Code '%d'. Trying next URL..." %
                        (url, resp['code']))
                    continue
            else:
                try:
                    content = _open_archive_file(
                        self.archive, path, zip_pwd=self.zip_pwd)
                    logger.debug(
                        "[+] Extracted '%s' from archive. The module can be loaded!" %
                        (path))
                    self.modules[fullname] = {}
                    self.modules[fullname]['content'] = content
                    self.modules[fullname]['filepath'] = self.url + "#" + path
                    self.modules[fullname]['package'] = path.endswith(
                        '__init__.py')
                    return self
                except KeyError:
                    logger.debug(
                        "[-] Extraction of '%s' from archive failed. Trying next filepath..." %
                        (path))
                    continue
            logger.info(
                "[-] Module '%s' cannot be loaded from '%s'. Skipping..." %
                (fullname, self.url))
        # Instruct 'import' to move on to next Importer
        return None

    def create_module(self, spec):
        fullname = spec.name

        if fullname not in self.modules:
            logger.debug(
                "[*] Module '%s' has not been attempted before. Trying to load..." % fullname)
            # Run 'find_module' and see if it is loadable through this Importer
            # object
            if self.find_module(fullname) is not self:
                logger.info(
                    "[-] Module '%s' has not been found as loadable. Failing..." % fullname)
                # If it is not loadable ('find_module' did not return 'self' but 'None'):
                # throw error:
                raise ImportError(
                    "Module '%s' cannot be loaded from '%s'" %
                    (fullname, self.url))

        logger.debug(
            "[*] Creating Python Module object for '%s'" % (fullname))

        mod = types.ModuleType(fullname)
        mod.__loader__ = self
        mod.__file__ = self.modules[fullname]['filepath']
        # Set module path - get filepath and keep only the path until filename
        mod.__path__ = ['/'.join(mod.__file__.split('/')[:-1]) + '/']
        mod.__url__ = self.modules[fullname]['filepath']

        mod.__package__ = fullname

        # Populate subpackage '__package__' metadata with parent package names
        pkg_name = '.'.join(fullname.split('.')[:-1])
        if len(fullname.split('.')[:-1]) > 1 and not self.modules[fullname]['package']:
            # recursively find the parent package
            while sys.modules[pkg_name].__package__ != pkg_name:
                pkg_name = '.'.join(pkg_name.split('.')[:-1])
            mod.__package__ = pkg_name
        elif not self.modules[fullname]['package']:
            mod.__package__ = pkg_name.split('.')[0]

        logger.debug(
            "[*] Metadata (__package__) set to '%s' for %s '%s'" %
            (mod.__package__,
             'package' if self.modules[fullname]['package'] else 'module',
             fullname))

        self.modules[fullname]['module'] = mod
        return mod

    def exec_module(self, module):
        fullname = module.__name__
        return self._create_module(fullname)

    def _create_module(self, fullname, sys_modules=True):
        """ Method that loads module/package code into a Python Module object

        Args:
          fullname (str): The name of the module/package to be loaded
          sys_modules (bool, optional): Set to False to not inject the module into sys.modules
            It will fail for packages/modules that contain relative imports

        Returns:
          (object): Module object containing the executed code of the specified module/package

        """

        # If the module has not been found as loadable
        # through 'find_module' method (yet)
        if fullname not in self.modules:
            spec = self.find_spec(fullname, "")
            if spec is not None:
                module = self.create_module(spec)
            else:
                raise ImportError
        else:
            module = self.modules[fullname]['module']

        if sys_modules:
            sys.modules[fullname] = module

        # Execute the module/package code into the Module object
        try:
            exec(self.modules[fullname]['content'], module.__dict__)
        except BaseException:
            if not sys_modules:
                logger.warning(
                    "[-] Module/Package '%s' cannot be imported without adding it to sys.modules. Might contain relative imports." %
                    fullname)
            else:
                del sys.modules[fullname]
        return module

其他子类其他子类包含的比较多，比图github，gitlab，pypip ，http 等内部都使用了此类，比如github 处理是一个contextmanager 包装的，一个 add_remote_repo 一个remove_remote_repo

@contextmanager
def github_repo(username=None, repo=None, ref='master',
                domain=None, profile=None):
    """ Context Manager that enables importing modules/packages from Github repositories.

    Args:
      username (str): The username which is the repository's owner in the Git Service.
      repo (str): The name of the repository that contains the modules/packages to be imported
      ref (str): The commit hash, branch or tag to be fetched
      domain (str): The domain to be used for the URL (service domains service raw content)

    """
    url = __create_git_url('github',
                           username, repo, ref=ref, domain=domain)
    add_remote_repo(url=url, profile=profile)
    try:
        yield
    except ImportError as e:
        raise e
    finally:  # Always remove the added HttpImporter from sys.meta_path
        remove_remote_repo(url)

add_remote_repo 的处理

def add_remote_repo(url=None, profile=None, importer_class=HttpImporter):
    """ Creates an HttpImporter object and adds it to the `sys.meta_path`.

    Args:
      url (str): The URL of an HTTP/WebDav directory (either listable or not)
    or of an archive (supported: .zip, .tar, .tar.bz, .tar.gz, .tar.xz - Python3 only)

    Returns:
      HttpImporter: The `HttpImporter` object added to the `sys.meta_path`
    """
    options = __extract_profile_options(url, profile)
    url = options.get('url', url)
    del options['url']
    logger.debug(
        "[*] Adding '%s' (profile: %s) with options: %s " %
        (importer_class, profile, options))
    importer = importer_class(
        url,
        **options,
    )
    sys.meta_path.append(importer)
    return importer

remove_remote_repo处理

def remove_remote_repo(url):
    """ Removes from the 'sys.meta_path' an HttpImporter object given its HTTP/S URL.

    Args:
      url (str): The URL of the `HttpImporter` object to remove

    """
    # Remove trailing '/' in case it is there
    url = url if not url.endswith('/') else url[:-1]
    for importer in sys.meta_path:
        try:
            if importer.url.startswith(url):
                sys.meta_path.remove(importer)
                return True
        except AttributeError as e:
            pass
    return False

说明

以上是一个简单说明，实际内部有不少细节（比如不同协议的内部python 代码获取加载等，通过了解内部的机制可以更好的使用httpimport

参考资料

https://github.com/operatorequals/httpimport

https://github.com/rongfengliang/httpimport

posted on 2025-04-08 08:00 荣锋亮阅读(33) 评论(0) 收藏举报

刷新页面返回顶部

rongfengliang-荣锋亮

httpimport 内部处理简单说明

标准模块finder 以及loader 的处理

httpimport基础类的定义

说明

参考资料

导航

公告