nkyling

 

.net 使用正则表达式获取视频标题和文件地址(优酷、酷6、土豆)

 1  static string[] rgUrlArr = new string[] 
 2         {
 3             @"^http://d?v\.youku\.com",
 4             @"^http://www\.tudou\.com",
 5             @"^http://v\.ku6\.com"
 6         };
 7 
 8         static string[] rgVideoUrlArr = new string[] 
 9         {
10             @"http://player\.youku\.com/player\.php/sid/[\w-]{13}/v\.swf", //优酷的文件地址正则
11             @"icode\:\s?[\'\042][\w-]{11}[\'\042]",                        //土豆的itemCode正则
12             @"http://player\.ku6\.com/refer/[\w\.-]{16,24}/v\.swf"         //酷6的文件地址正则
13         };
14             
15         /// <summary>
16         /// 获取视频文件地址
17         /// </summary>
18         /// <param name="url">视频地址</param>
19         /// <returns></returns>
20         public static string GetVideoUrlFromWebPage(string url) 
21         {
22             string htmlContent = GetWebContent(url);
23             if (htmlContent == null)
24                 return null;
25 
26             if (new Regex(rgUrlArr[0]).IsMatch(url))              //判断是否为优酷
27             {
28                 if (new Regex(rgVideoUrlArr[0]).IsMatch(htmlContent))
29                     return new Regex(rgVideoUrlArr[0]).Match(htmlContent).ToString();                  
30             }
31             else if (new Regex(rgUrlArr[1]).IsMatch(url))         //判断是否为土豆  
32             {
33                 if (new Regex(rgVideoUrlArr[1]).IsMatch(htmlContent)) 
34                 {
35                     string code = new Regex(rgVideoUrlArr[1]).Match(htmlContent).ToString();
36                     code = code.Substring(code.Length - 12, 11);
37                     return @"http://www.tudou.com/v/" + code + "/v.swf";
38                 }
39             }
40             else if (new Regex(rgUrlArr[2]).IsMatch(url))         //判断是否为酷6  
41             {
42                 if (new Regex(rgVideoUrlArr[2]).IsMatch(htmlContent))
43                     return new Regex(rgVideoUrlArr[2]).Match(htmlContent).ToString();
44             }  
45          
46             return null;
47         }
48 
49         /// <summary>
50         /// 获取页面源码
51         /// </summary>
52         /// <param name="url">地址url</param>
53         /// <returns></returns>
54         private static string GetWebContent(string url) 
55         {
56             string htmlContent = "";
57             try
58             {
59                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
60                 request.Headers.Set("Pragma", "no-cache");
61                 request.Timeout = 90000;
62 
63                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
64                 Stream streamReceive = response.GetResponseStream();
65                 Encoding encode = Encoding.GetEncoding(response.CharacterSet);
66                 StreamReader sr = new StreamReader(streamReceive, encode);
67                 htmlContent = sr.ReadToEnd();
68             }
69             catch 
70             {
71                 return null;
72             }
73 
74             return htmlContent;
75         }
76 
77         /// <summary>
78         /// 获取视频标题
79         /// </summary>
80         /// <param name="url">视频地址</param>
81         /// <returns></returns>
82         public static string GetVideoTitle(string url) 
83         {
84             string htmlContent = GetWebContent(url);
85             if (htmlContent == null) 
86             {
87                 return null;
88             }
89 
90             Regex rg = new Regex(@"\<title\>[^\f\n]{1,100}\<\/title\>"); ;
91             if (rg.IsMatch(htmlContent))
92             {
93                 string title = rg.Match(htmlContent).ToString();
94                 return title.Substring(7, title.Length - 15);
95             }
96             return null;
97         }

 

posted on 2013-01-18 14:56  nqllin  阅读(554)  评论(0)    收藏  举报

导航