网页抓取- 3

http://www.diyifanwen.com/jinyici/jinyici-A/

页面抓取

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "lyGetHttpResult.h"
#include "lyPublic/lyCodeConvert.h"
int main()
{
    
    char szUrl[512] = "";
    char *svData= NULL;
    char *szData = NULL;
    FILE *fp;
    char *p, *q, *s, *t;
    char strFrom[100] = "http://www.diyifanwen.com/", strTo[100] = "";
    //char andStr[20] = "</span> - ";//查找标记串
    char outStr[1024*50]= "";
    char str[500]= "",str2[500]= "";
    char next[100]= "",End[100] = "http://www.diyifanwen.com//jinyici/jinyici-A/";
    int len;
    fp = fopen("1.txt","rt+");
    sprintf(szUrl,"http://www.diyifanwen.com/jinyici/jinyici-A/");
    szData = GetDataFromWeb(szUrl,NULL,NULL,1, 5);
//    fputs(szData, fp);
//    CodeConvert(szData, svData, sizeof(svData),1);
    p = strstr(szData , " title=");
//    printf("%c", End[43]);
//    p = strstr(szData , "昂首挺立");
    while(p !=NULL)
    {
        q=p;
        q-=60;
        while(*q != '/')
            q++;
        strcpy(str, strFrom);
        strcpy(strTo,str);
        len = strlen(strTo);
        --p;
        while(q<p)
        {
            strTo[len++] = *(q++);
        }
        q+=9;
        while(*q != '"')
        {
            printf("%c",*q);
            fputc(*(q++) , fp);
        }
        fputs("  ",fp);
        puts(strTo);
        svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5);
        while(!szData)
            svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5);
        strcpy(szData,szData);
    //    CodeConvert(svData, outStr,sizeof(outStr),1);
    //    fputs(svData,fp);
        s = strstr(svData, "】</span>");
        t = strstr(svData, "<br><span>");
        s+=3;;
        while(s < t)
        {
            if(*s != '' && *s!='<'&&*s != '/'&& *s != '>' && *s != '&' &&(*s < 'a'||*s > 'z'))
            {
                printf("%c",*s);
                fputc(*s, fp);
            }
            else if(*s==';')
                fputc(' ',fp);
            s++;
        }
        fputc('\n',fp);
        p+=20;
        q = strstr(p, " title=");
        p = q;
        memset(strTo,0,sizeof(strTo));
        memset(outStr,0,sizeof(outStr));
        if(p == NULL)
        {
            strcpy(strTo,str);
            if(!strstr(szData,"下一页</a> <a href"))            
            {
                printf("oooo");
                End[43]++;
                if(End[43]>'Z')
                    break;
                szData =GetDataFromWeb(End,NULL,NULL,1 ,5);
                while(!szData)
                    svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5);
                p = strstr(szData," title=");
                continue;
            }
            s = strstr(szData ,"下一页");
            t = s-55;
            s-=15;
            while(*t != '=')
                t++;
            t+=2;
            s-=3;
            len = strlen(strTo);
            while(t < s)
            {
                strTo[len++] = *t;
                t++;
            }
            szData = GetDataFromWeb(strTo,NULL,NULL,1 ,5);
                while(!szData)
                    svData = GetDataFromWeb(strTo , NULL, NULL , 1, 5);
            p = strstr(szData, " title=");
        }
        memset(str,0,sizeof(str));
        memset(strTo,0,sizeof(strTo));
        memset(outStr,0,sizeof(outStr));
    }


    fclose(fp);
    free(svData);
    free(szData);
    free(p);
    free(q);
    free(s);
    free(t);
    return 0;
}

 

posted @ 2013-07-22 12:01  煮人为乐  阅读(255)  评论(0)    收藏  举报