pip install builtwit
该模块将URL作为参数,下载该URL并对其进行分析,然后返回该网站使
用的技术。下面是使用该模块的-一个例子。
import builtwith
builtwith.parse('http://example.webscraping.com')
{'web-servers': ['Nginx'], 'web-frameworks': ['Web2py', 'Twitter Bootstrap'], 'programming-languages': ['Python'], 'javascript-frameworks': ['jQuery', 'Modernizr', 'jQuery UI']}
寻找网站所有者 pip install python-whois
import whois
print (whois.whois('http://example.webscraping.com/'))
{
"domain_name": "WEBSCRAPING.COM",
"registrar": "GoDaddy.com, LLC",
"whois_server": "whois.godaddy.com",
"referral_url": null,
"updated_date": [
"2013-08-20 08:08:30",
"2013-08-20 08:08:29"
],
"creation_date": "2004-06-26 18:01:19",
"expiration_date": "2020-06-26 18:01:19",
"name_servers": [
"NS1.WEBFACTION.COM",
"NS2.WEBFACTION.COM",
"NS3.WEBFACTION.COM",
"NS4.WEBFACTION.COM"
],
"status": [
"clientDeleteProhibited https://icann.org/epp#clientDeleteProhibited",
"clientRenewProhibited https://icann.org/epp#clientRenewProhibited",
"clientTransferProhibited https://icann.org/epp#clientTransferProhibited",
"clientUpdateProhibited https://icann.org/epp#clientUpdateProhibited",
"clientTransferProhibited http://www.icann.org/epp#clientTransferProhibited",
"clientUpdateProhibited http://www.icann.org/epp#clientUpdateProhibited",
"clientRenewProhibited http://www.icann.org/epp#clientRenewProhibited",
"clientDeleteProhibited http://www.icann.org/epp#clientDeleteProhibited"
],
"emails": "abuse@godaddy.com",
"dnssec": "unsigned",
"name": null,
"org": null,
"address": null,
"city": null,
"state": "Victoria",
"zipcode": null,
"country": "AU"
}