firecrawl docker 部署

1.先把项目拉取下来

git clone https://github.com/mendableai/firecrawl.git
cd firecrawl

2.备份一个docker镜像配置文件

 cp docker-compose.yaml docker-compose1.yaml

3.修改docker-compose.yaml文件(里面有的镜像录取不下来)version: '3.8'

name: firecrawl

x-common-service: &common-service
  image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/mendableai/firecrawl:latest
  ulimits:
    nofile:
      soft: 65535
      hard: 65535
  networks:
    - backend
  extra_hosts:
    - "host.docker.internal:host-gateway"

x-common-env: &common-env
  REDIS_URL: ${REDIS_URL:-redis://redis:6379}
  REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379}
  PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape}
  USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION}
  OPENAI_API_KEY: ${OPENAI_API_KEY}
  OPENAI_BASE_URL: ${OPENAI_BASE_URL}
  MODEL_NAME: ${MODEL_NAME}
  MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME}
  OLLAMA_BASE_URL: ${OLLAMA_BASE_URL}
  SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL}
  BULL_AUTH_KEY: ${BULL_AUTH_KEY}
  TEST_API_KEY: ${TEST_API_KEY}
  POSTHOG_API_KEY: ${POSTHOG_API_KEY}
  POSTHOG_HOST: ${POSTHOG_HOST}
  SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN}
  SUPABASE_URL: ${SUPABASE_URL}
  SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN}
  SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL}
  SERPER_API_KEY: ${SERPER_API_KEY}
  SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY}
  LOGGING_LEVEL: ${LOGGING_LEVEL}
  PROXY_SERVER: ${PROXY_SERVER}
  PROXY_USERNAME: ${PROXY_USERNAME}
  PROXY_PASSWORD: ${PROXY_PASSWORD}
  SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT}
  SEARXNG_ENGINES: ${SEARXNG_ENGINES}
  SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES}

services:
  playwright-service:
    #image: ghcr.io/mendableai/playwright-service:latest
    image:  swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/mendableai/playwright-service:latest
    environment:
      PORT: 3000
      PROXY_SERVER: ${PROXY_SERVER}
      PROXY_USERNAME: ${PROXY_USERNAME}
      PROXY_PASSWORD: ${PROXY_PASSWORD}
      BLOCK_MEDIA: ${BLOCK_MEDIA}
    networks:
      - backend

  api:
    <<: *common-service
    environment:
      <<: *common-env
      HOST: "0.0.0.0"
      PORT: ${INTERNAL_PORT:-3002}
      FLY_PROCESS_GROUP: app
      ENV: local
    depends_on:
      - redis
      - playwright-service
    ports:
      - "${PORT:-3002}:${INTERNAL_PORT:-3002}"
    command: [ "pnpm", "run", "start:production" ]

  worker:
    <<: *common-service
    environment:
      <<: *common-env
      FLY_PROCESS_GROUP: worker
      ENV: local
    depends_on:
      - redis
      - playwright-service
      - api
    command: [ "pnpm", "run", "workers" ]

  redis:
    image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/redis:7.0.12
    networks:
      - backend
    command: redis-server --bind 0.0.0.0

networks:
  backend:
    driver: bridge

 3.配置,根据需要修改

cd ./firecrawl/apps/api/
# 复制到 ./firecrawl 下
cp ./.env.example ../../.env

 

 

 

4.启动docker

docker compose build
docker compose up -d 或者docker-compose up -d 

 

 

5.测试

import logging
from firecrawl import FirecrawlApp

logging.basicConfig(level=logging.INFO) 
logger = logging.getLogger(__name__) 


def get():
 try: 
    app = FirecrawlApp(api_key="xxxx", api_url="http://10.101.92.218:3002")
    params = { 
     'formats': ['markdown'], 
    } 
    logger.info("开始抓取网页...") 
    scrape_status = app.scrape_url('https://www.kujiale.com/', params=params) 
    logger.info("抓取结果:") 
    print(scrape_status) 
 except Exception as e:
    logger.error(f"抓取过程中发生错误:{str(e)}") 





get()

 

posted @ 2025-07-10 20:56  阿布_alone  阅读(263)  评论(0)    收藏  举报
TOP