^_^最近开始关注股票,因此闲来无事的时候做了一个股票信息抓取的小玩意。
可以首先将自己关注的股票code保存在电子表格中,然后通过程序读取并循环从网站中获取股票的当前信息。
Code
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using Microsoft.Office.Core;
using Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop;
using System.Collections;
using System.Reflection;
using System.Drawing.Imaging;
using System.Drawing;
namespace ConsoleApplication1
{
class Program
{
static string fileName = Environment.CurrentDirectory + "\\" + "Stock.xls";
static void Main(string[] args)
{
Application app = GetApplicationObject();
Workbook book = GetWorkBook(app,fileName);
Worksheet stockCodeSheet = GetWorkSheet(book, 2);
Worksheet stockInforSheet = GetWorkSheet(book,1);
try
{
string[] stockCodes = GetStockCodes(stockCodeSheet);
ArrayList al = new ArrayList();
foreach (string item in stockCodes)
{
al.Add(GetStockInfor(item));
}
InsertIntoExcel(al, stockInforSheet);
//Disable the alert function of the excel.
app.DisplayAlerts = false;
book.Saved = true;
book.SaveCopyAs(Environment.CurrentDirectory + "\\Stock.xls");
}
finally
{
app.Quit();
app = null;
GC.Collect();
}
}
//Analyse the structure of the page and return the stock information
protected static string[] GetStockInfor(string stockCode)
{
try
{
WebClient wc = new WebClient();
Stream s = wc.OpenRead("http://baidu.hexun.com/stock/q.php?code=" + stockCode);
StreamReader sr = new StreamReader(s, Encoding.GetEncoding("GB2312"));
string strStockInfor = sr.ReadToEnd();
strStockInfor = Regex.Replace(strStockInfor, @"[\r\n\t]*", "", RegexOptions.IgnoreCase);
strStockInfor = strStockInfor.Replace("<br>", "");
strStockInfor = strStockInfor.Replace("\r", "");
string pattern = "<div[^>]*id=\"stockName\"[^>]*>";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(strStockInfor);
if (mc.Count == 0)
{
return null;
}
int indexStart = strStockInfor.IndexOf(mc[0].Value);
strStockInfor = strStockInfor.Remove(0, indexStart);
int indexEnd = strStockInfor.IndexOf("</div>", 0);
strStockInfor = strStockInfor.Substring(0, indexEnd + 6);
//get the titile of the stock
pattern = "<span[^>]*>";
reg = new Regex(pattern);
mc = reg.Matches(strStockInfor);
if (mc.Count == 0)
{
return null;
}
string spanTitle = mc[0].Value;
int titleStartIndex = strStockInfor.IndexOf(spanTitle) + spanTitle.Length;
int titleEndIndex = strStockInfor.IndexOf("</span>", 0);
string title = strStockInfor.Substring(titleStartIndex, titleEndIndex - titleStartIndex);
//get the closed price
pattern = "<span[^>]*id=\"bfn_la_" + stockCode + "\"" + @"[^>]*>\d*\.?\d*</span>";
mc = Regex.Matches(strStockInfor, pattern);
if (mc.Count == 0)
{
return null;
}
string closePri = mc[0].Value;
closePri = Regex.Replace(closePri, "<span[^>]*id=\"bfn_la_" + stockCode + "\"" + @"[^>]*>", "", RegexOptions.IgnorePatternWhitespace);
closePri = Regex.Replace(closePri, "</span>", "", RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
//get the statistics data
int sumStartIndex = strStockInfor.LastIndexOf("<td class=\"s_num\">");
int sumEndIndex = strStockInfor.LastIndexOf("</td>");
string strSum = strStockInfor.Substring(sumStartIndex, sumEndIndex - sumStartIndex + 5);
strSum = strSum.Replace(" ", "");
strSum = Regex.Replace(strSum, @"[\u4e00-\u9fa5]*:", "");
strSum = Regex.Replace(strSum, @"<td[^>]*>", "", RegexOptions.IgnoreCase);
strSum = Regex.Replace(strSum, @"</td>", "", RegexOptions.IgnoreCase);
strSum = Regex.Replace(strSum, @"<span[^>]*>", "", RegexOptions.IgnoreCase);
string[] sum = strSum.Split(new string[] { "</span>" }, StringSplitOptions.RemoveEmptyEntries);
if (sum.Length != 10)
{
throw (new Exception("The structure of the website has been changed!"));
}
string yesterdayPri = sum[0];
string openPri = sum[1];
string highestPri = sum[3];
string lowestPri = sum[4];
return new string[] { stockCode, title, yesterdayPri, openPri, highestPri, lowestPri, closePri, System.DateTime.Now.ToShortDateString() };
}
catch
{
return null;
}
}
protected static Application GetApplicationObject()
{
Microsoft.Office.Interop.Excel.Application app =new Application();
app.Visible = false;
return app;
}
protected static Workbook GetWorkBook(Application app,string file)
{
Workbook workbook =app.Workbooks.Add(file);
return workbook;
}
protected static Worksheet GetWorkSheet(Workbook book, int sheetIndex)
{
return book.Sheets.get_Item(sheetIndex) as Worksheet;
}
protected static string[] GetStockCodes(Worksheet sheet)
{
int rows = sheet.UsedRange.Rows.Count;
string [] stockCodes = new string[rows];
for (int i = 1; i <= rows; i++)
{
stockCodes[i-1] = (sheet.get_Range("A"+i.ToString(),Missing.Value) as Range).Value2 as string;
}
return stockCodes;
}
protected static void InsertIntoExcel(ArrayList al,Worksheet sheet)
{
int rows = sheet.UsedRange.Rows.Count;
int columns = sheet.UsedRange.Columns.Count;
for (int i =0; i < al.Count;i++ )
{
string[] stockInfors = al[i] as string[];
if (stockInfors != null)
{
for (int j = 0; j < stockInfors.Length; j++)
{
(sheet.get_Range(Convert.ToChar((65 + j)).ToString() + (i + rows + 1).ToString(), Missing.Value) as Range).Value2 = stockInfors[j];
}
double yesPri = Convert.ToDouble(stockInfors[2]);
double opePri = Convert.ToDouble(stockInfors[3]);
double higPri = Convert.ToDouble(stockInfors[4]);
double lowPri = Convert.ToDouble(stockInfors[5]);
double cloPri = Convert.ToDouble(stockInfors[6]);
(sheet.get_Range("D" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = yesPri < opePri ? Color.Red.ToArgb() : Color.Green.ToArgb();
(sheet.get_Range("E" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = higPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
(sheet.get_Range("F" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = lowPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
(sheet.get_Range("G" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = cloPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
}
}
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using Microsoft.Office.Core;
using Microsoft.Office.Interop.Excel;
using Microsoft.Office.Interop;
using System.Collections;
using System.Reflection;
using System.Drawing.Imaging;
using System.Drawing;
namespace ConsoleApplication1
{
class Program
{
static string fileName = Environment.CurrentDirectory + "\\" + "Stock.xls";
static void Main(string[] args)
{
Application app = GetApplicationObject();
Workbook book = GetWorkBook(app,fileName);
Worksheet stockCodeSheet = GetWorkSheet(book, 2);
Worksheet stockInforSheet = GetWorkSheet(book,1);
try
{
string[] stockCodes = GetStockCodes(stockCodeSheet);
ArrayList al = new ArrayList();
foreach (string item in stockCodes)
{
al.Add(GetStockInfor(item));
}
InsertIntoExcel(al, stockInforSheet);
//Disable the alert function of the excel.
app.DisplayAlerts = false;
book.Saved = true;
book.SaveCopyAs(Environment.CurrentDirectory + "\\Stock.xls");
}
finally
{
app.Quit();
app = null;
GC.Collect();
}
}
//Analyse the structure of the page and return the stock information
protected static string[] GetStockInfor(string stockCode)
{
try
{
WebClient wc = new WebClient();
Stream s = wc.OpenRead("http://baidu.hexun.com/stock/q.php?code=" + stockCode);
StreamReader sr = new StreamReader(s, Encoding.GetEncoding("GB2312"));
string strStockInfor = sr.ReadToEnd();
strStockInfor = Regex.Replace(strStockInfor, @"[\r\n\t]*", "", RegexOptions.IgnoreCase);
strStockInfor = strStockInfor.Replace("<br>", "");
strStockInfor = strStockInfor.Replace("\r", "");
string pattern = "<div[^>]*id=\"stockName\"[^>]*>";
Regex reg = new Regex(pattern);
MatchCollection mc = reg.Matches(strStockInfor);
if (mc.Count == 0)
{
return null;
}
int indexStart = strStockInfor.IndexOf(mc[0].Value);
strStockInfor = strStockInfor.Remove(0, indexStart);
int indexEnd = strStockInfor.IndexOf("</div>", 0);
strStockInfor = strStockInfor.Substring(0, indexEnd + 6);
//get the titile of the stock
pattern = "<span[^>]*>";
reg = new Regex(pattern);
mc = reg.Matches(strStockInfor);
if (mc.Count == 0)
{
return null;
}
string spanTitle = mc[0].Value;
int titleStartIndex = strStockInfor.IndexOf(spanTitle) + spanTitle.Length;
int titleEndIndex = strStockInfor.IndexOf("</span>", 0);
string title = strStockInfor.Substring(titleStartIndex, titleEndIndex - titleStartIndex);
//get the closed price
pattern = "<span[^>]*id=\"bfn_la_" + stockCode + "\"" + @"[^>]*>\d*\.?\d*</span>";
mc = Regex.Matches(strStockInfor, pattern);
if (mc.Count == 0)
{
return null;
}
string closePri = mc[0].Value;
closePri = Regex.Replace(closePri, "<span[^>]*id=\"bfn_la_" + stockCode + "\"" + @"[^>]*>", "", RegexOptions.IgnorePatternWhitespace);
closePri = Regex.Replace(closePri, "</span>", "", RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
//get the statistics data
int sumStartIndex = strStockInfor.LastIndexOf("<td class=\"s_num\">");
int sumEndIndex = strStockInfor.LastIndexOf("</td>");
string strSum = strStockInfor.Substring(sumStartIndex, sumEndIndex - sumStartIndex + 5);
strSum = strSum.Replace(" ", "");
strSum = Regex.Replace(strSum, @"[\u4e00-\u9fa5]*:", "");
strSum = Regex.Replace(strSum, @"<td[^>]*>", "", RegexOptions.IgnoreCase);
strSum = Regex.Replace(strSum, @"</td>", "", RegexOptions.IgnoreCase);
strSum = Regex.Replace(strSum, @"<span[^>]*>", "", RegexOptions.IgnoreCase);
string[] sum = strSum.Split(new string[] { "</span>" }, StringSplitOptions.RemoveEmptyEntries);
if (sum.Length != 10)
{
throw (new Exception("The structure of the website has been changed!"));
}
string yesterdayPri = sum[0];
string openPri = sum[1];
string highestPri = sum[3];
string lowestPri = sum[4];
return new string[] { stockCode, title, yesterdayPri, openPri, highestPri, lowestPri, closePri, System.DateTime.Now.ToShortDateString() };
}
catch
{
return null;
}
}
protected static Application GetApplicationObject()
{
Microsoft.Office.Interop.Excel.Application app =new Application();
app.Visible = false;
return app;
}
protected static Workbook GetWorkBook(Application app,string file)
{
Workbook workbook =app.Workbooks.Add(file);
return workbook;
}
protected static Worksheet GetWorkSheet(Workbook book, int sheetIndex)
{
return book.Sheets.get_Item(sheetIndex) as Worksheet;
}
protected static string[] GetStockCodes(Worksheet sheet)
{
int rows = sheet.UsedRange.Rows.Count;
string [] stockCodes = new string[rows];
for (int i = 1; i <= rows; i++)
{
stockCodes[i-1] = (sheet.get_Range("A"+i.ToString(),Missing.Value) as Range).Value2 as string;
}
return stockCodes;
}
protected static void InsertIntoExcel(ArrayList al,Worksheet sheet)
{
int rows = sheet.UsedRange.Rows.Count;
int columns = sheet.UsedRange.Columns.Count;
for (int i =0; i < al.Count;i++ )
{
string[] stockInfors = al[i] as string[];
if (stockInfors != null)
{
for (int j = 0; j < stockInfors.Length; j++)
{
(sheet.get_Range(Convert.ToChar((65 + j)).ToString() + (i + rows + 1).ToString(), Missing.Value) as Range).Value2 = stockInfors[j];
}
double yesPri = Convert.ToDouble(stockInfors[2]);
double opePri = Convert.ToDouble(stockInfors[3]);
double higPri = Convert.ToDouble(stockInfors[4]);
double lowPri = Convert.ToDouble(stockInfors[5]);
double cloPri = Convert.ToDouble(stockInfors[6]);
(sheet.get_Range("D" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = yesPri < opePri ? Color.Red.ToArgb() : Color.Green.ToArgb();
(sheet.get_Range("E" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = higPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
(sheet.get_Range("F" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = lowPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
(sheet.get_Range("G" + (i + rows + 1).ToString(), Missing.Value) as Range).Font.Color = cloPri < opePri ? Color.Green.ToArgb() : Color.Red.ToArgb();
}
}
}
}
}