httpimport 内部处理简单说明
以前也简单说明了下httpimport 的功能。实际内部是对于python import 语义的实现,只是没有明确的基于继承的模式
标准模块finder 以及loader 的处理
由importlib.abc.Loader 以及importlib.abc.MetaPathFinder定义的
class HTTPModuleLoader(importlib.abc.Loader):
def __init__(self, base_url):
self.base_url = base_url
def create_module(self, spec):
return None # 使用默认模块创建逻辑
def exec_module(self, module):
module_url = f"{self.base_url}/{module.__name__}.py"
response = requests.get(module_url)
if response.status_code != 200:
raise ImportError(f"Cannot load module {module.__name__} from {module_url}")
code = response.text
exec(code, module.__dict__)
class HTTPModuleFinder(importlib.abc.MetaPathFinder):
def __init__(self, base_url):
self.base_url = base_url
self.loader = HTTPModuleLoader(base_url)
def find_spec(self, fullname, path, target=None):
module_url = f"{self.base_url}/{fullname}.py"
response = requests.head(module_url)
if response.status_code == 200:
return importlib.util.spec_from_loader(fullname, self.loader)
return None
httpimport基础类的定义
httpimport 支持不少模式的模块加载,内部核心是基于了 HttpImporter
- httpimport 定义
可以看到同时实现了上边的基础定义
class HttpImporter(object):
""" The class that implements the Importer API. Contains the `find_module` and `load_module` methods.
It is better to not use this class directly, but through its wrappers ('remote_repo', 'github_repo', etc),
that automatically load and unload this class' objects to the 'sys.meta_path' list.
Args:
url (str): Contains a URL that can point to an Archive -(compressed) Tar or Zip-
or an HTTP/S / WebDAV directory (listable or not) to be queried for Python module/packages files
zip_pwd (bytes): The password to be used for password encrypted ZIP files
headers (dict): The HTTP Headers to be used in all HTTP requests issued by this Importer.
Can be used for authentication, logging, etc.
proxy (str): The URL for the HTTP proxy to be used for all requests
"""
def __init__(
self,
url,
zip_pwd=b'',
headers={},
proxy=None,
allow_plaintext=False,
ca_verify=True, ca_file=None, **kw):
# remove trailing '/' from URL parameter
self.url = url if not url.endswith('/') else url[:-1]
self.modules = {}
if not _isHTTPS(url):
logger.warning(
"[-] Using HTTP URLs (%s) with 'httpimport' is a security hazard!" %
(url))
if not (allow_plaintext or INSECURE):
logger.error("""[*] Using plaintext protocols needs to be enabled through 'INSECURE' global or explicitly allowed through 'allow-plaintext'!
""")
raise ImportError(
"[-] HTTP used while plaintext is not allowed")
if not ca_verify:
logger.warning(
"[-] Disabling TLS Certificate verification for URL (%s) is a security hazard!" %
(url))
self.zip_pwd = zip_pwd
self.headers = headers
self.proxy = proxy
self.ca_verify = ca_verify
self.ca_file = ca_file
# Try a request that can fail in case of connectivity issues
resp = http(url, headers=self.headers, proxy=self.proxy,
method='GET', ca_verify=self.ca_verify, ca_file=self.ca_file)
# Try to extract an archive from URL
self.archive = _retrieve_archive(resp['body'], url)
def find_spec(self, fullname, path, target=None):
loader = self.find_module(fullname, path)
if loader is not None:
return importlib.machinery.ModuleSpec(
fullname, loader)
return None
def find_module(self, fullname, path=None):
""" Method that determines whether a module/package can be loaded through this Importer object. Part of Importer API
Args:
fullname (str): The name of the package/module to be searched.
path (str): Part of the Importer API. Not used in this object.
Returns:
(object): This Importer object (`self`) if the module can be importer
or `None` if the module is not available.
"""
logger.info(
"[*] Trying to find loadable code for module '%s', path: '%s'" %
(fullname, path))
paths = _create_paths(fullname)
for path in paths:
if self.archive is None:
url = self.url + '/' + path
resp = http(url, headers=self.headers, proxy=self.proxy, ca_verify=self.ca_verify, ca_file=self.ca_file)
if resp['code'] == 200:
logger.debug(
"[+] Fetched Python code from '%s'. The module can be loaded!" %
(url))
self.modules[fullname] = {}
self.modules[fullname]['content'] = resp['body']
self.modules[fullname]['filepath'] = url
self.modules[fullname]['package'] = path.endswith(
'__init__.py')
return self
else:
logger.debug(
"[-] URL '%s' return HTTP Status Code '%d'. Trying next URL..." %
(url, resp['code']))
continue
else:
try:
content = _open_archive_file(
self.archive, path, zip_pwd=self.zip_pwd)
logger.debug(
"[+] Extracted '%s' from archive. The module can be loaded!" %
(path))
self.modules[fullname] = {}
self.modules[fullname]['content'] = content
self.modules[fullname]['filepath'] = self.url + "#" + path
self.modules[fullname]['package'] = path.endswith(
'__init__.py')
return self
except KeyError:
logger.debug(
"[-] Extraction of '%s' from archive failed. Trying next filepath..." %
(path))
continue
logger.info(
"[-] Module '%s' cannot be loaded from '%s'. Skipping..." %
(fullname, self.url))
# Instruct 'import' to move on to next Importer
return None
def create_module(self, spec):
fullname = spec.name
if fullname not in self.modules:
logger.debug(
"[*] Module '%s' has not been attempted before. Trying to load..." % fullname)
# Run 'find_module' and see if it is loadable through this Importer
# object
if self.find_module(fullname) is not self:
logger.info(
"[-] Module '%s' has not been found as loadable. Failing..." % fullname)
# If it is not loadable ('find_module' did not return 'self' but 'None'):
# throw error:
raise ImportError(
"Module '%s' cannot be loaded from '%s'" %
(fullname, self.url))
logger.debug(
"[*] Creating Python Module object for '%s'" % (fullname))
mod = types.ModuleType(fullname)
mod.__loader__ = self
mod.__file__ = self.modules[fullname]['filepath']
# Set module path - get filepath and keep only the path until filename
mod.__path__ = ['/'.join(mod.__file__.split('/')[:-1]) + '/']
mod.__url__ = self.modules[fullname]['filepath']
mod.__package__ = fullname
# Populate subpackage '__package__' metadata with parent package names
pkg_name = '.'.join(fullname.split('.')[:-1])
if len(fullname.split('.')[:-1]) > 1 and not self.modules[fullname]['package']:
# recursively find the parent package
while sys.modules[pkg_name].__package__ != pkg_name:
pkg_name = '.'.join(pkg_name.split('.')[:-1])
mod.__package__ = pkg_name
elif not self.modules[fullname]['package']:
mod.__package__ = pkg_name.split('.')[0]
logger.debug(
"[*] Metadata (__package__) set to '%s' for %s '%s'" %
(mod.__package__,
'package' if self.modules[fullname]['package'] else 'module',
fullname))
self.modules[fullname]['module'] = mod
return mod
def exec_module(self, module):
fullname = module.__name__
return self._create_module(fullname)
def _create_module(self, fullname, sys_modules=True):
""" Method that loads module/package code into a Python Module object
Args:
fullname (str): The name of the module/package to be loaded
sys_modules (bool, optional): Set to False to not inject the module into sys.modules
It will fail for packages/modules that contain relative imports
Returns:
(object): Module object containing the executed code of the specified module/package
"""
# If the module has not been found as loadable
# through 'find_module' method (yet)
if fullname not in self.modules:
spec = self.find_spec(fullname, "")
if spec is not None:
module = self.create_module(spec)
else:
raise ImportError
else:
module = self.modules[fullname]['module']
if sys_modules:
sys.modules[fullname] = module
# Execute the module/package code into the Module object
try:
exec(self.modules[fullname]['content'], module.__dict__)
except BaseException:
if not sys_modules:
logger.warning(
"[-] Module/Package '%s' cannot be imported without adding it to sys.modules. Might contain relative imports." %
fullname)
else:
del sys.modules[fullname]
return module
- 其他子类 其他子类包含的比较多,比图github,gitlab,pypip ,http 等内部都使用了此类,比如github 处理 是一个contextmanager 包装的,一个 add_remote_repo 一个remove_remote_repo
@contextmanager
def github_repo(username=None, repo=None, ref='master',
domain=None, profile=None):
""" Context Manager that enables importing modules/packages from Github repositories.
Args:
username (str): The username which is the repository's owner in the Git Service.
repo (str): The name of the repository that contains the modules/packages to be imported
ref (str): The commit hash, branch or tag to be fetched
domain (str): The domain to be used for the URL (service domains service raw content)
"""
url = __create_git_url('github',
username, repo, ref=ref, domain=domain)
add_remote_repo(url=url, profile=profile)
try:
yield
except ImportError as e:
raise e
finally: # Always remove the added HttpImporter from sys.meta_path
remove_remote_repo(url)
add_remote_repo 的处理
def add_remote_repo(url=None, profile=None, importer_class=HttpImporter):
""" Creates an HttpImporter object and adds it to the `sys.meta_path`.
Args:
url (str): The URL of an HTTP/WebDav directory (either listable or not)
or of an archive (supported: .zip, .tar, .tar.bz, .tar.gz, .tar.xz - Python3 only)
Returns:
HttpImporter: The `HttpImporter` object added to the `sys.meta_path`
"""
options = __extract_profile_options(url, profile)
url = options.get('url', url)
del options['url']
logger.debug(
"[*] Adding '%s' (profile: %s) with options: %s " %
(importer_class, profile, options))
importer = importer_class(
url,
**options,
)
sys.meta_path.append(importer)
return importer
remove_remote_repo处理
def remove_remote_repo(url):
""" Removes from the 'sys.meta_path' an HttpImporter object given its HTTP/S URL.
Args:
url (str): The URL of the `HttpImporter` object to remove
"""
# Remove trailing '/' in case it is there
url = url if not url.endswith('/') else url[:-1]
for importer in sys.meta_path:
try:
if importer.url.startswith(url):
sys.meta_path.remove(importer)
return True
except AttributeError as e:
pass
return False
说明
以上是一个简单说明,实际内部有不少细节(比如不同协议的内部python 代码获取加载等,通过了解内部的机制可以更好的使用httpimport
浙公网安备 33010602011771号