firecrawl docker 部署
1.先把项目拉取下来
git clone https://github.com/mendableai/firecrawl.git cd firecrawl
2.备份一个docker镜像配置文件
cp docker-compose.yaml docker-compose1.yaml
3.修改docker-compose.yaml文件(里面有的镜像录取不下来)version: '3.8'
name: firecrawl x-common-service: &common-service image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/mendableai/firecrawl:latest ulimits: nofile: soft: 65535 hard: 65535 networks: - backend extra_hosts: - "host.docker.internal:host-gateway" x-common-env: &common-env REDIS_URL: ${REDIS_URL:-redis://redis:6379} REDIS_RATE_LIMIT_URL: ${REDIS_URL:-redis://redis:6379} PLAYWRIGHT_MICROSERVICE_URL: ${PLAYWRIGHT_MICROSERVICE_URL:-http://playwright-service:3000/scrape} USE_DB_AUTHENTICATION: ${USE_DB_AUTHENTICATION} OPENAI_API_KEY: ${OPENAI_API_KEY} OPENAI_BASE_URL: ${OPENAI_BASE_URL} MODEL_NAME: ${MODEL_NAME} MODEL_EMBEDDING_NAME: ${MODEL_EMBEDDING_NAME} OLLAMA_BASE_URL: ${OLLAMA_BASE_URL} SLACK_WEBHOOK_URL: ${SLACK_WEBHOOK_URL} BULL_AUTH_KEY: ${BULL_AUTH_KEY} TEST_API_KEY: ${TEST_API_KEY} POSTHOG_API_KEY: ${POSTHOG_API_KEY} POSTHOG_HOST: ${POSTHOG_HOST} SUPABASE_ANON_TOKEN: ${SUPABASE_ANON_TOKEN} SUPABASE_URL: ${SUPABASE_URL} SUPABASE_SERVICE_TOKEN: ${SUPABASE_SERVICE_TOKEN} SELF_HOSTED_WEBHOOK_URL: ${SELF_HOSTED_WEBHOOK_URL} SERPER_API_KEY: ${SERPER_API_KEY} SEARCHAPI_API_KEY: ${SEARCHAPI_API_KEY} LOGGING_LEVEL: ${LOGGING_LEVEL} PROXY_SERVER: ${PROXY_SERVER} PROXY_USERNAME: ${PROXY_USERNAME} PROXY_PASSWORD: ${PROXY_PASSWORD} SEARXNG_ENDPOINT: ${SEARXNG_ENDPOINT} SEARXNG_ENGINES: ${SEARXNG_ENGINES} SEARXNG_CATEGORIES: ${SEARXNG_CATEGORIES} services: playwright-service: #image: ghcr.io/mendableai/playwright-service:latest image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/ghcr.io/mendableai/playwright-service:latest environment: PORT: 3000 PROXY_SERVER: ${PROXY_SERVER} PROXY_USERNAME: ${PROXY_USERNAME} PROXY_PASSWORD: ${PROXY_PASSWORD} BLOCK_MEDIA: ${BLOCK_MEDIA} networks: - backend api: <<: *common-service environment: <<: *common-env HOST: "0.0.0.0" PORT: ${INTERNAL_PORT:-3002} FLY_PROCESS_GROUP: app ENV: local depends_on: - redis - playwright-service ports: - "${PORT:-3002}:${INTERNAL_PORT:-3002}" command: [ "pnpm", "run", "start:production" ] worker: <<: *common-service environment: <<: *common-env FLY_PROCESS_GROUP: worker ENV: local depends_on: - redis - playwright-service - api command: [ "pnpm", "run", "workers" ] redis: image: swr.cn-north-4.myhuaweicloud.com/ddn-k8s/docker.io/library/redis:7.0.12 networks: - backend command: redis-server --bind 0.0.0.0 networks: backend: driver: bridge
3.配置,根据需要修改
cd ./firecrawl/apps/api/ # 复制到 ./firecrawl 下 cp ./.env.example ../../.env
4.启动docker
docker compose build
docker compose up -d 或者docker-compose up -d
5.测试
import logging from firecrawl import FirecrawlApp logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def get(): try: app = FirecrawlApp(api_key="xxxx", api_url="http://10.101.92.218:3002") params = { 'formats': ['markdown'], } logger.info("开始抓取网页...") scrape_status = app.scrape_url('https://www.kujiale.com/', params=params) logger.info("抓取结果:") print(scrape_status) except Exception as e: logger.error(f"抓取过程中发生错误:{str(e)}") get()