DAG之间的依赖关系可以通过编写ExternalTaskSensor()去指定依赖哪个dag的id和task。

1、execution_date_fn

 vim test.py

# coding: utf-8
from airflow import DAG
from airflow_clickhouse_plugin.operators.clickhouse_operator import ClickHouseOperator
from airflow_clickhouse_plugin.sensors.clickhouse_sql_sensor import ClickHouseSqlSensor
from airflow_clickhouse_plugin.hooks.clickhouse_hook import ClickHouseHook
from airflow.sensors.external_task import ExternalTaskSensor
from airflow.operators.python_operator import PythonOperator
from airflow.utils.task_group import TaskGroup
from airflow.models.dag import get_last_dagrun
from airflow.models import DagRun
from airflow.utils.db import provide_session
from datetime import datetime, timedelta
import time
import os
import sys
import pendulum

airflow_home = os.environ.get("AIRFLOW_HOME")
sys.path.append(airflow_home)
from src.label.main.label import LabelCommon

local_tz = pendulum.timezone("Asia/Shanghai")

default_args = {
'owner': 'airflow', # 拥有者名称
'start_date': datetime(2021, 12, 5,tzinfo=local_tz), # 第一次开始执行的时间
'retries': 3, # 失败重试次数
'retry_delay': timedelta(seconds=5) , # 失败重试间隔
'depends_on_past' : False #dag级别的触发规则,为True时,只有上一个dagrun被成功执行,下一个dag run才能被执行
}

# 定义DAG
dag = DAG(
dag_id='dag_label_person_security', # dag_id, 全局唯一
default_args=default_args, # 指定默认参数
is_paused_upon_creation=False,
description = "xxxxxxxxxxxxxx",
max_active_runs = 1, # 限制同时运行的 DAG 数量,比较消耗资源的DAG建议设置为 1
catchup = False, # 如果是 False 的话,表示不补数据. 设置为 True 会从 start_date 指定的日期开始后过一个 interval 开始补算数据,视实际情况选择 False 还是 True
tags= ["人员","小场景"],
start_date = datetime(2021, 12, 28, 1, 00, tzinfo=local_tz), # 可覆盖默认参数,不是第一次开始执行的时间,真实第一次开始执行时间是 start_date 向后推一个周期
#schedule_interval=timedelta(days=1)
schedule_interval="05 02 * * *"
)

@provide_session
def _get_execution_date_of_dag(exec_date, session=None, **kwargs):
dag_a_last_run = get_last_dagrun('calculate_security', session)
cnt = 0
while cnt <=3:
try:
print(cnt)
print(dag_a_last_run)
print(f"EXEC DATE: {dag_a_last_run.execution_date}")
break
except Exception as e:
time.sleep(10)
cnt += 1
dag_a_last_run = get_last_dagrun('calculate_security', session)

return dag_a_last_run.execution_date


wait_for_dag_a = ExternalTaskSensor(
task_id = 'wait_for_dag_a',
external_dag_id = 'calculate_security',
allowed_states = ["success", "failed"],
execution_date_fn = _get_execution_date_of_dag,
mode = 'reschedule',
timeout = 600
)

t1 =(

)

wait_for_dag_a >> t1

2、execution_delta

import airflow
from airflow.models import DAG
from airflow.sensors.external_task_sensor import ExternalTaskSensor
from datetime import timedelta

dag = DAG(
   dag_id= dag2, 
)

dag1_check_task=ExternalTaskSensor(
    @指定该dag的名称,在airflow列表页面显示的就是这个任务的id名称
    task_id="dag1_check_task", 
    @指定依赖哪一个dag的id
    external_dag_id='dag1',     
    @指定依赖dag的哪一个task任务
    external_task_id=None, 
    @列出允许的states,default是success
    allowed_states=['success'], 
    @与执行的external任务的时间差,即往前推8个小时内有一个成功的dag1的记录
    execution_delta=timedelta(hours=8), 
    dag=dag
)

dg2_task_2=BashOperator(...)
start >> dag1_check_task >> dag2_task_2 >> end
 posted on 2022-12-26 10:06  xibuhaohao  阅读(830)  评论(0)    收藏  举报