DHL 快递跟踪查询

 

思路描述:主要使用正则表达式解析。

返回一个跟踪步骤列表。

  1. public class TrackingData
  2.     {
  3.         public string time { get; set; }
  4.         public string context { get; set; }
  5.     }
  6.  
  7.     public class DHLExpressTrackingHelper
  8.     {
  9.         private static string urlFormat = "http://webtrack.dhlglobalmail.com/?trackingnumber={0}";
  10.  
  11.         public static List<TrackingData> GetTrackingData(string trackCode)
  12.         {
  13.             //trackCode = "A150529020091";
  14.             WebMocker dhlMocker = new WebMocker();
  15.             var url = string.Format(urlFormat, trackCode);
  16.             var html = dhlMocker.Get(url);
  17.  
  18.             //1
  19.             var patternOLTag = "<ol.*?>(?<olvalue>.*?)</ol>";
  20.             var match = Regex.Match(html, patternOLTag, RegexOptions.Multiline | RegexOptions.Singleline);
  21.             if (match.Success)
  22.             {
  23.                 var olValue = match.Groups["olvalue"].Value;
  24.                 return ParseTrackingData(olValue);
  25.             }
  26.  
  27.             return new List<TrackingData>();
  28.         }
  29.  
  30.         private static List<TrackingData> ParseTrackingData(string olValue)
  31.         {
  32.             var patternLITag = "<li.*? class=\"(?<className>.*?)\".*?>(?<content>.*?)</li>";
  33.  
  34.             var list = new List<TrackingData>();
  35.  
  36.             string datePrefix = null;
  37.  
  38.             foreach (Match item in Regex.Matches(olValue, patternLITag, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase))
  39.             {
  40.                 var className = item.Groups["className"].Value;
  41.                 var content = item.Groups["content"].Value;
  42.                 if (className == "timeline-date")
  43.                 {
  44.                     datePrefix = content;
  45.                 }
  46.                 else if (className.Contains("timeline-event"))
  47.                 {
  48.                     var pp = "<div.*?class=\"(?<divClassName>.*?)\".*?>(?<divContent>.*?)</div>";
  49.  
  50.                     TrackingData trackingData = null;
  51.  
  52.                     foreach (Match subItem in Regex.Matches(content, pp, RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase))
  53.                     {
  54.                         var divClassName = subItem.Groups["divClassName"].Value.Trim();
  55.                         var subContent = subItem.Groups["divContent"].Value.Trim();
  56.                         if (divClassName == "timeline-time")
  57.                         {
  58.                             subContent = Regex.Replace(subContent, "<[^<]*>", "");
  59.                             trackingData = new TrackingData();
  60.                             trackingData.time = datePrefix + " " + subContent;
  61.                         }
  62.                         else if (divClassName == "timeline-location")
  63.                         {
  64.                             trackingData.context += subContent + " ";
  65.                         }
  66.                         else if (divClassName == "timeline-description")
  67.                         {
  68.                             trackingData.context += subContent + " ";
  69.                         }
  70.                         //else if (divClassName == "timeline-location timeline-location-responsive")
  71.                         //{
  72.                         // trackingData.Description += subContent;
  73.                         //}
  74.                     }
  75.  
  76.                     list.Add(trackingData);
  77.                 }
  78.             }
  79.  
  80.             //foreach (var item in list)
  81.             //{
  82.             // Console.WriteLine(item.DateString + "--" + item.Description);
  83.             //}
  84.             return list;
  85.         }
  86.     }
posted @ 2015-09-23 16:55  _DN  阅读(659)  评论(0编辑  收藏  举报