抓取新浪的每日星座运势

从新浪上抓取每日的星座运势,然后往求实的Astrology版发.

#!/usr/bin/python
# encoding: utf-8
from sgmllib import SGMLParser
from datetime import datetime, timedelta
import re
import urllib
starurl='http://image2.sina.com.cn/ast/2007index/tmp/star_php/star.gif'
class Parser(SGMLParser):
    rlt=[]
    def reset(self):
        SGMLParser.reset(self)
        self.rlt=[]
    def start_img(self, attrs):
        src=[v for k,v in attrs if k=='src']
        if src and src[0]==starurl:
            self.rlt.append('★')
    def start_p(self, attrs):
        self.rlt.append(': ')

    def end_p(self):
        self.rlt.append('\n')
   
    def handle_data(self, text):
        self.rlt.append(text.rstrip())

    def output(self):
        return re.sub(': \n', '\n', "".join(self.rlt))

def gettext(url):
    txt=urllib.urlopen(url).read().decode('gb18030').encode('utf-8')
    pattern=re.compile(r'<div class="lotstars">(.*)</div>', re.S)
    return pattern.findall(txt)[0]

astros = ['牡羊座 Aries       (03/21-04/19)', '金牛座 Taurus      (04/20-05/20)',
        '双子座 Gemini      (05/21-06/21)','巨蟹座 Cancer      (06/22-07/22)',
        '狮子座 Leo         (07/23-08/22) ', '处女座 Virgo       (08/23-09/22)',
        '天秤座 Libra       (09/23-10/23)', '天蝎座 Scorpio     (10/24-11/22)',
        '射手座 Sagittarius (11/23-12/21)', '摩羯座 Capricorn   (12/22-01/19)',
        '水瓶座 Aquarius    (01/20-02/18)', '双鱼座 Pisces      (02/19-03/20)']
def genedict():
    datestr = datetime.now().strftime('%Y-%m-%d')
    url='http://astro.sina.com.cn/pc/west/frame0_%d.html'
    rlt=[]
    parser=Parser()
    for i in range(12):
        parser.reset()
        key = datestr+' '+astros[i]
        urli=url % i
        parser.feed(gettext(urli))
        rlt.append((key, parser.output()))
    return rlt

if __name__ == '__main__':
    rlt=genedict()
    for k, v in rlt:
        print k, '\n', v, '\n'

 

 

#!/usr/bin/python
# encoding: utf-8
import telnetlib
import getpass
import parser
import time

HOST='202.113.13.188'
PORT=23
user = raw_input('Enter your account: ')
password = getpass.getpass()

tn = telnetlib.Telnet(HOST, PORT)
tn.write(user+'\n')
tn.write(password+'\n')
tn.write('\n'*7)
tn.write('F\n')
tn.write('\n')
tn.write('sAstrology\n')
rlt=parser.genedict()
for k,v in rlt:
    tn.write(chr(0x10))
    tn.write(k.decode('utf-8').encode('gb18030'))
    print k
    tn.write('\n\n')
    tn.write(v.decode('utf-8').encode('gb18030'))
    print v
    tn.write(chr(0x17))
    tn.write('\n')
    time.sleep(5)
tn.write('!')
tn.write('\n')
tn.close()

 

程序需要改进的地方:

  1. 进站之后的留言版留言较多,这时至少需要输入两个字符才能离开留言版页
  2. 用户有好友并且登录时有好友在线的情况
  3. 用户登录时收到了MSG

下一部计划:

  1. 试着用web方式实现同样的功能
  2. 实现求实的灌水机
posted @ 2009-09-11 21:09  千里快哉  阅读(249)  评论(0)    收藏  举报