c++实验二
实验目的:
熟练使用类
了解http网络协议
利用http网络协议保存网页
实验内容:
1、 实现http网络应用
2、 分析程序基本功能
3、 用http协议方式构建自己的一个网络应用,获取网页中超链接1层以上的内容(即不但要保存当前的页面,还要链接到相关页面,并保存其内容)
4、 撰写实验报告
先在C盘下建立一个名为lab3的文件夹,运行输入:tear http://www/.***(你要获取的链接),结果保留在此文件夹内。
头文件:
// tear.h : class definitions for CTearSession and CTearException
//
// This is a part of the Microsoft Foundation Classes C++ library.
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// This source code is only intended as a supplement to the
// Microsoft Foundation Classes Reference and related
// electronic documentation provided with the library.
// See these sources for detailed information regarding the
// Microsoft Foundation Classes product.
// Created by Mike Blaszczak.
#include <afx.h>
#include <afxwin.h>
#include <afxinet.h>
#include <stdlib.h>
#include <iostream>
using namespace std;
/////////////////////////////////////////////////////////////////////////////
// Globals
LPCTSTR pszURL = NULL;
BOOL bStripMode = FALSE;
BOOL bProgressMode = FALSE;
DWORD dwAccessType = PRE_CONFIG_INTERNET_ACCESS;
DWORD dwHttpRequestFlags = INTERNET_FLAG_EXISTING_CONNECT | INTERNET_FLAG_NO_AUTO_REDIRECT;
const TCHAR szHeaders[] = _T("Accept: text/*\r\nUser-Agent: MFC_Tear_Sample\r\n");
/////////////////////////////////////////////////////////////////////////////
// CTearSession object
// TEAR wants to use its own derivative of the CInternetSession class
// just so it can implement an OnStatusCallback() override.
class CTearSession : public CInternetSession
{
public:
CTearSession(LPCTSTR pszAppName, int nMethod);
virtual void OnStatusCallback(DWORD dwContext, DWORD dwInternetStatus,
LPVOID lpvStatusInfomration, DWORD dwStatusInformationLen);
};
CTearSession::CTearSession(LPCTSTR pszAppName, int nMethod)
: CInternetSession(pszAppName, 1, nMethod)
{
}
void CTearSession::OnStatusCallback(DWORD
/* dwContext */
, DWORD dwInternetStatus,
LPVOID
/* lpvStatusInfomration */
, DWORD
/* dwStatusInformationLen */
)
{
if (!bProgressMode)
return;
if (dwInternetStatus == INTERNET_STATUS_CONNECTED_TO_SERVER)
cerr << _T("Connection made!") << endl;
}
/////////////////////////////////////////////////////////////////////////////
// CTearException -- used if something goes wrong for us
// TEAR will throw its own exception type to handle problems it might
// encounter while fulfilling the user's request.
class CTearException : public CException
{
DECLARE_DYNCREATE(CTearException)
public:
CTearException(int nCode = 0);
~CTearException() { }
int m_nErrorCode;
};
IMPLEMENT_DYNCREATE(CTearException, CException)
CTearException::CTearException(int nCode)
: m_nErrorCode(nCode)
{
}
void ThrowTearException(int nCode)
{
CTearException* pEx = new CTearException(nCode);
throw pEx;
}
cpp文件:
// tear.cpp : implements the TEAR console application
//
// This is a part of the Microsoft Foundation Classes C++ library.
// Copyright (c) Microsoft Corporation. All rights reserved.
//
// This source code is only intended as a supplement to the
// Microsoft Foundation Classes Reference and related
// electronic documentation provided with the library.
// See these sources for detailed information regarding the
// Microsoft Foundation Classes product.
#include "tear.h"
#include <vector>
#include <string>
#include <iostream>
#include <time.h>
#pragma warning(push)
#pragma warning(disable: 4100 4667)
#pragma warning(pop)
using namespace std;
/////////////////////////////////////////////////////////////////////////////
// Routines
void ShowBanner()
{
cerr << _T("TEAR - Tear a Page Off the Internet!") << endl;
cerr << _T("Version 7.0 - Copyright (C) Microsoft Corp.") << endl;
cerr << endl;
}
void ShowUsage()
{
cerr << _T("Usage: TEAR [options] <URL>") << endl << endl;
cerr << _T("\t<URL> points at a HTTP resource") << endl;
cerr << _T("\t[options] are any of:") << endl;
cerr << _T("\t\t/F force reload of requested page") << endl;
cerr << _T("\t\t/P show detailed progress information") << endl;
cerr << _T("\t\t/S strip HTML tags from stream") << endl << endl;
cerr << _T("\t\t/L use local Internet access") << endl;
cerr << _T("\t\t/D use pre-configured Internet access (default)") << endl;
cerr << endl;
exit(1);
}
// ParseOptions() looks for options on the command line and sets global
// flags so the rest of the program knows about them. ParseOptions()
// also initializes pszURL to point at the URL the user wanted.
BOOL ParseOptions(int argc, char* argv[])
{
int nIndex;
for (nIndex = 1; nIndex < argc; nIndex++)
{
// an option or a URL?
if (*argv[nIndex] == '-' || *argv[nIndex] == '/')
{
if (argv[nIndex][1] == 'D' || argv[nIndex][1] == 'd')
dwAccessType = PRE_CONFIG_INTERNET_ACCESS;
else if (argv[nIndex][1] == 'L' || argv[nIndex][1] == 'l')
dwAccessType = LOCAL_INTERNET_ACCESS;
else if (argv[nIndex][1] == 'S' || argv[nIndex][1] == 's')
bStripMode = TRUE;
else if (argv[nIndex][1] == 'P' || argv[nIndex][1] == 'p')
bProgressMode = TRUE;
else if (argv[nIndex][1] == 'F' || argv[nIndex][1] == 'f')
dwHttpRequestFlags |= INTERNET_FLAG_RELOAD;
else
{
cerr << _T("Error: unrecognized option: ") << argv[nIndex] << endl;
return FALSE;
}
}
else
{
// can't have too many URLs
if (pszURL != NULL)
{
cerr << _T("Error: can only specify one URL!") << endl;
return FALSE;
}
else
pszURL = argv[nIndex];
}
}
return TRUE;
}
// StripTags() rips through a buffer and removes HTML tags from it.
// The function uses a static variable to remember its state in case
// a HTML tag spans a buffer boundary.
void StripTags(LPTSTR pszBuffer)
{
static BOOL bInTag = FALSE;
LPTSTR pszSource = pszBuffer;
LPTSTR pszDest = pszBuffer;
while (*pszSource != '\0')
{
if (bInTag)
{
if (*pszSource == '>')
bInTag = FALSE;
pszSource++;
}
else
{
if (*pszSource == '<')
bInTag = TRUE;
else
{
*pszDest = *pszSource;
pszDest++;
}
pszSource++;
}
}
*pszDest = '\0';
}
/////////////////////////////////////////////////////////////////////////////
// The main() Thang
int main(int argc, char* argv[])
{
// ShowBanner();
if (!AfxWinInit(::GetModuleHandle(NULL), NULL, ::GetCommandLine(), 0))
{
cerr << _T("MFC Failed to initialize.\n");
return 1;
}
if (argc < 2 || !ParseOptions(argc, argv) || pszURL == NULL)
ShowUsage();
int nRetCode = 0;
CTearSession session(_T("TEAR - MFC Sample App"), dwAccessType);
CHttpConnection* pServer = NULL; //MFC类CHttpConnection管理与HTTP服务器的连接
CHttpFile* pFile = NULL; //If your Internet session reads data from an HTTP server, you must create an instance of CHttpFile.
try
{
// check to see if this is a reasonable URL
///////////////////////////////////////////////////////
CString vec[10000];
int idx = 0, ca = 0;
vec[idx++] = pszURL;
while(true)
{
CString strServerName;
CString strObject;
INTERNET_PORT nPort;
DWORD dwServiceType;
char fileName[100];
if(ca >= idx) break;
if(ca == 0) freopen("c:/lab3/1.html", "w", stdout);
else{
sprintf(fileName, "c:/lab3/a%d.html", ca);
freopen(fileName, "w", stdout);
}
pszURL = vec[ca];
if (!AfxParseURL(pszURL, dwServiceType, strServerName, strObject, nPort) ||
dwServiceType != INTERNET_SERVICE_HTTP)
{
cerr << _T("Error: can only use URLs beginning with http://") << endl;
++ ca; continue;
ThrowTearException(1);
}
if (bProgressMode)
{
cerr << _T("Opening Internet...");
VERIFY(session.EnableStatusCallback(TRUE));
}
pServer = session.GetHttpConnection(strServerName, nPort);
pFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET,
strObject, NULL, 1, NULL, NULL, dwHttpRequestFlags);
pFile->AddRequestHeaders(szHeaders);
pFile->SendRequest();
DWORD dwRet;
pFile->QueryInfoStatusCode(dwRet);
// if access was denied, prompt the user for the password
if (dwRet == HTTP_STATUS_DENIED)
{
DWORD dwPrompt;
dwPrompt = pFile->ErrorDlg(NULL, ERROR_INTERNET_INCORRECT_PASSWORD,
FLAGS_ERROR_UI_FLAGS_GENERATE_DATA | FLAGS_ERROR_UI_FLAGS_CHANGE_OPTIONS, NULL);
// if the user cancelled the dialog, bail out
if (dwPrompt != ERROR_INTERNET_FORCE_RETRY)
{
cerr << _T("Access denied: Invalid password\n");
++ ca; continue;
ThrowTearException(1);
}
pFile->SendRequest();
pFile->QueryInfoStatusCode(dwRet);
}
CString strNewLocation;
pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation);
// were we redirected?
// these response status codes come from WININET.H
if (dwRet == HTTP_STATUS_MOVED ||
dwRet == HTTP_STATUS_REDIRECT ||
dwRet == HTTP_STATUS_REDIRECT_METHOD)
{
CString strNewLocation;
pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strNewLocation);
int nPlace = strNewLocation.Find(_T("Location: "));
if (nPlace == -1)
{
cerr << _T("Error: Site redirects with no new location") << endl;
++ ca; continue;
ThrowTearException(2);
}
strNewLocation = strNewLocation.Mid(nPlace + 10);
nPlace = strNewLocation.Find('\n');
if (nPlace > 0)
strNewLocation = strNewLocation.Left(nPlace);
// close up the redirected site
pFile->Close();
delete pFile;
pServer->Close();
delete pServer;
if (bProgressMode)
{
cerr << _T("Caution: redirected to ");
cerr << (LPCTSTR) strNewLocation << endl;
}
// figure out what the old place was
if (!AfxParseURL(strNewLocation, dwServiceType, strServerName, strObject, nPort))
{
cerr << _T("Error: the redirected URL could not be parsed.")<<endl;
++ ca;
continue;
ThrowTearException(2);
}
if (dwServiceType != INTERNET_SERVICE_HTTP)
{
cerr << _T("Error: the redirected URL does not reference a HTTP resource.") << endl;
++ ca; continue;
ThrowTearException(2);
}
// try again at the new location
pServer = session.GetHttpConnection(strServerName, nPort);
pFile = pServer->OpenRequest(CHttpConnection::HTTP_VERB_GET,
strObject, NULL, 1, NULL, NULL, dwHttpRequestFlags);
pFile->AddRequestHeaders(szHeaders);
pFile->SendRequest();
pFile->QueryInfoStatusCode(dwRet);
if (dwRet != HTTP_STATUS_OK)
{
cerr << _T("Error: Got status code ") << dwRet << endl;
++ ca; continue;
ThrowTearException(2);
}
}
cerr << _T("Status Code is ") << dwRet << endl;
TCHAR sz[1024];
while (pFile->ReadString(sz, 1023))
{
if (bStripMode)
StripTags(sz);
cout<<sz<<endl;
if(ca == 0){
CString cs;
cs.Format(_T("%s"), sz); //change TCHAR to CString
string str = cs.GetBuffer(0); //change CString to string
string::size_type pos1 = 0, pos2;
while((pos1 = str.find("href=\"http:/", pos1)) != string::npos){ //查找二层URL
pos1 += 6;
pos2 = str.find("\"",pos1);
string temp = str.substr(pos1, pos2-pos1);
pos1 = pos2;
CString temp2;
temp2.Format(_T("%s"), temp.c_str());
vec[idx++] = temp2;
}
}
}
// NOTE: Since HTTP servers normally spit back plain text, the
// above code (which reads line by line) is just fine. However,
// other data sources (eg, FTP servers) might provide binary data
// which should be handled a buffer at a time, like this:
#if 0
while (nRead > 0)
{
sz[nRead] = '\0';
if (bStripMode)
StripTags(sz);
cout << sz;
nRead = pFile->Read(sz, 1023);
}
#endif
pFile->Close();
pServer->Close();
++ ca;
}
}
catch (CInternetException* pEx)
{
// catch errors from WinINet
TCHAR szErr[1024];
pEx->GetErrorMessage(szErr, 1024);
cerr << _T("Error: (") << pEx->m_dwError << _T(") ");
cerr << szErr << endl;
nRetCode = 2;
pEx->Delete();
}
catch (CTearException* pEx)
{
// catch things wrong with parameters, etc
nRetCode = pEx->m_nErrorCode;
TRACE1("Error: Exiting with CTearException(%d)\n", nRetCode);
pEx->Delete();
}
if (pFile != NULL)
delete pFile;
if (pServer != NULL)
delete pServer;
session.Close();
return nRetCode;
}
浙公网安备 33010602011771号