常用配置

 

1. 反爬的配置

#禁止Scrapy等工具的抓取--允许Curl
if ($http_user_agent ~* (Scrapy|HttpClient|Crawlers)) {
     return 403;
}

#禁止指定UA的访问
if ($http_user_agent ~ "HeadlessChrome|Phantom|DotBot|SemrushBot|YisouSpider|WinHttp|WebZIP|FetchURL|node-superagent|FeedDemon|Jullo|JikeSpider|Indy Library|Alexa Toolbar|AskTbFXTV|AhrefsBot|CrawlDaddy|Feedly|Apache-HttpAsyncClient|UniversalFeedParser|ApacheBench|Microsoft URL Control|Swiftbot|ZmEu|oBot|jaunty|Python-urllib|lightDeckReports Bot|YYSpider|DigExt|MJ12bot|heritrix|EasouSpider|Ezooms|BOT/0.1|YandexBot|FlightDeckReports|Crawlers|Linguee Bot|ExtLinksBot|curl|Googlebot-Image|Bytespider|Baiduspider|Sogou|Googlebot|google|The Knowledge AI|Apache-HttpClient|SM-G900P" ) {
     return 403;
}
#禁止非GET|HEAD|POST方式的抓取
if ($request_method !~ ^(GET|HEAD|POST|PUT|DELETE|OPTIONS)$) {
    return 403;
}

 

posted @ 2024-09-05 09:34  羊脂玉净瓶  阅读(13)  评论(0)    收藏  举报