扩展 Request.Browser.crawler 支持的searching engine

扩展crawler 支持的搜索引擎方法
The IIS uses the data in the <browsercaps> section in machine.config or web.config to determine the client browser is a crawler or not. Currently the crawler filter information is all blank, that's why you'd always get false.
To fix this problem, you should make change to machine.config by adding the following crawler filters in the <browsercaps> section. If you just want to apply the the change to a specific website, you should add these crawler filters into the <system.web> section in web.config.

<browserCaps>
<filter>
<!-- SEARCH ENGINES GROUP -->
<!-- check Google (Yahoo uses this as well) -->
<case match="^Googlebot(\-Image)?/(?'version'(?'major'\d+)(?'minor'\.\d+)).*">
browser=Google
version=${version}
majorversion=${major}
minorversion=${minor}
crawler=true
</case>
<!-- check Google -->
<case match="Googlebot">
browser=Googlebot
crawler=true
</case>

<!-- check Alta Vista (Scooter) -->
<case match="^Scooter(/|-)(?'version'(?'major'\d+)(?'minor'\.\d+)).*">
browser=AltaVista
version=${version}
majorversion=${major}
minorversion=${minor}
crawler=true
</case>

<!-- check Alta Vista (Mercator) -->
<case match="Mercator">
browser=AltaVista
crawler=true
</case>

<!-- check Slurp (Yahoo uses this as well) -->
<case match="Slurp">
browser=Slurp
crawler=true
</case>

<!-- check MSN -->
<case match="MSNBOT">
browser=MSN
crawler=true
</case>
<!-- check Northern Light -->
<case match="^Gulliver/(?'version'(?'major'\d+)(?'minor'\.\d+)).*">
browser=NorthernLight
version=${version}
majorversion=${major}
minorversion=${minor}
crawler=true
</case>

<!-- check Excite -->
<case match="ArchitextSpider">
browser=Excite
crawler=true
</case>

<!-- Lycos -->
<case match="Lycos_Spider">
browser=Lycos
crawler=true
</case>

<!-- Ask Jeeves -->
<case match="Ask Jeeves">
browser=AskJeaves
crawler=true
</case>

<!-- check Fast -->
<case match="^FAST-WebCrawler/(?'version'(?'major'\d+)(?'minor'\.\d+)).*">
browser=Fast
version=${version}
majorversion=${major}
minorversion=${minor}
crawler=true
</case>

<!-- IBM Research Web Crawler -->
<case match="http\:\/\/www\.almaden.ibm.com\/cs\/crawler">
browser=IBMResearchWebCrawler
crawler=true
</case>

</filter>
</browserCaps>
posted @ 2008-08-29 11:04  小树爸爸  阅读(632)  评论(0编辑  收藏  举报