• 博客园logo
  • 会员
  • 众包
  • 新闻
  • 博问
  • 闪存
  • 赞助商
  • HarmonyOS
  • Chat2DB
    • 搜索
      所有博客
    • 搜索
      当前博客
  • 写随笔 我的博客 短消息 简洁模式
    用户头像
    我的博客 我的园子 账号设置 会员中心 简洁模式 ... 退出登录
    注册 登录
sm0nk
Exercise one's inventive mind.
博客园    首页    新随笔    联系   管理    订阅  订阅

批量文本读取URL获取正常访问且保留对应IP

#coding=utf-8 
import sys 
import requests 
for i in range(3000,4999,1): 
  url = 'http://192.168.88.139:8888/20150602'+str(i)+'.html' 
  r = requests.get(url) 
  if r.status_code == 200: 
    print url 
    print r.content

 

 

原文

http://zone.wooyun.org/content/20885

 

 

 

多线程+文本逐行读取+URL的IP转换+写入

# -*-coding:utf-8-*-
import os
import sys
import Queue
import getopt
import logging
import requests
import threading
import time
import socket
print "start:" + (time.strftime("%H:%M:%S"))

logging.basicConfig(
    level=logging.WARNING,
    format="[%(asctime)s] %(message)s"
)

class BatchThreads(threading.Thread):
    def __init__(self, queue):
        super(BatchThreads, self).__init__()
        self.queue = queue

    def run(self):
        while True:
            if self.queue.empty():
                break
            else:
                tempurl = self.queue.get()
                #print tempurl
                try:
                    url = 'http://'+tempurl
                    #print url
                    r = requests.get(url, timeout=5)
                    if r.status_code == 200 :
                        print url+' '+'access-comman:200'                       
                        #print tempurl
                        ip = socket.gethostbyname(tempurl)
                        #print ip
                        yes = open('yes.txt','a')
                        yes.write(url+'    ')
                        yes.write('    '+ip+'\n')
                        yes.close()

                except:
                    pass
                    print url+" error"
                    noaccess = open('noaccess.txt','a')
                    noaccess.write(url+'\n')
                    noaccess.close()


def batch_queue(_queue, _thread_number):
    with open('url-hz.txt') as f:
        urls = [line.strip() for line in f.readlines()]
    urls = set(filter(lambda url: url and not url.startswith("#"), urls))
    if urls:
        for url in urls:
            queue.put(url)
        if _thread_number > (queue.qsize() / 2):
            _thread_number = (queue.qsize())

        for _ in xrange(_thread_number):
            threads.append(BatchThreads(_queue))

        for t in threads:
            t.start()
        for t in threads:
            t.join()

threads = []
queue = Queue.Queue()
thread_number = 20
batch_queue(queue, thread_number)

print"end:" + (time.strftime("%H:%M:%S"))

 

posted @ 2015-07-08 09:22  sm0nk  阅读(620)  评论(0)    收藏  举报
刷新页面返回顶部
博客园  ©  2004-2025
浙公网安备 33010602011771号 浙ICP备2021040463号-3