卷积引擎实现(p2):综合

P2:综合与实现流程

综合约束

####################################################################################
# 卷积引擎时序约束文件
# 目标器件: xczu9eg
# 创建日期: 2025/9/14
####################################################################################

# ===========================================
# 1. 时钟定义
# ===========================================

# 主处理时钟 - 200MHz (5ns周期)
create_clock -period 5.000 -name clk_main [get_ports clk]
set_property IOSTANDARD LVCMOS18 [get_ports clk]
set_property PACKAGE_PIN AL8 [get_ports clk]

# AXI时钟(如果独立)- 100MHz
# create_clock -period 10.000 -name axi_clk [get_ports axi_clk]

# ===========================================
# 2. 时钟不确定性和抖动
# ===========================================

# 设置时钟不确定性(对所有时钟)
set_clock_uncertainty -setup 0.200 [get_clocks clk_main]
set_clock_uncertainty -hold 0.100 [get_clocks clk_main]

# 输入抖动
set_input_jitter [get_clocks clk_main] 0.100

# ===========================================
# 3. 输入输出延迟约束
# ===========================================

# 定义虚拟时钟用于IO约束
create_clock -period 10.000 -name virtual_io_clk
set_input_delay -clock virtual_io_clk -max 2.000 [get_ports {s_axi_*}]
set_input_delay -clock virtual_io_clk -min 0.500 [get_ports {s_axi_*}]

set_output_delay -clock virtual_io_clk -max 2.000 [get_ports {s_axi_*}]
set_output_delay -clock virtual_io_clk -min 0.500 [get_ports {s_axi_*}]

# AXI-Stream接口约束
set_input_delay -clock clk_main -max 1.000 [get_ports {s_axis_*}]
set_input_delay -clock clk_main -min 0.200 [get_ports {s_axis_*}]

set_output_delay -clock clk_main -max 1.000 [get_ports {m_axis_*}]
set_output_delay -clock clk_main -min 0.200 [get_ports {m_axis_*}]

# ===========================================
# 4. 多周期路径约束
# ===========================================

# 权重加载路径可以是多周期
set_multicycle_path -setup 2 -from [get_pins */weight_buffer_reg*/C] -to [get_pins */weight_reg*/D]
set_multicycle_path -hold 1 -from [get_pins */weight_buffer_reg*/C] -to [get_pins */weight_reg*/D]

# 累加器路径优化
set_multicycle_path -setup 2 -from [get_pins */accumulator_reg*/C] -to [get_pins */partial_sum_out_reg*/D]
set_multicycle_path -hold 1 -from [get_pins */accumulator_reg*/C] -to [get_pins */partial_sum_out_reg*/D]

# ===========================================
# 5. 假路径定义
# ===========================================

# 复位信号是异步的
set_false_path -from [get_ports rst_n]

# 配置寄存器到数据路径的交叉
set_false_path -from [get_cells -hierarchical -filter {NAME =~ */ctrl_reg_reg*}] \
               -to [get_cells -hierarchical -filter {NAME =~ */sa_inst/*}]

# 状态寄存器读取路径
set_false_path -from [get_cells -hierarchical -filter {NAME =~ */performance_counter_reg*}] \
               -to [get_ports {s_axi_rdata[*]}]

# ===========================================
# 6. 最大延迟约束(关键路径)
# ===========================================

# DSP链路径
set_max_delay 4.500 -from [get_pins */pe_inst/weight_reg_reg*/C] \
              -to [get_pins */pe_inst/partial_sum_out_reg*/D]

# 脉动阵列数据传播路径
set_max_delay 4.000 -from [get_pins */activation_in_reg*/C] \
              -to [get_pins */activation_out_reg*/D]

# ===========================================
# 7. 扇出约束
# ===========================================

# 控制信号扇出限制
set_max_fanout 32 [get_nets -hierarchical -filter {NAME =~ */sa_enable}]
set_max_fanout 32 [get_nets -hierarchical -filter {NAME =~ */sa_compute}]
set_max_fanout 16 [get_nets -hierarchical -filter {NAME =~ */weight_load}]

# ===========================================
# 8. 物理约束(面积约束)
# ===========================================

# 为脉动阵列创建物理区域(Pblock)
create_pblock pblock_systolic_array
add_cells_to_pblock pblock_systolic_array [get_cells -hierarchical -filter {NAME =~ */sa_inst/*}]
resize_pblock pblock_systolic_array -add {SLICE_X40Y120:SLICE_X79Y179}

# DSP48E2放置约束
resize_pblock pblock_systolic_array -add {DSP48E2_X8Y48:DSP48E2_X15Y71}

# BRAM放置约束
create_pblock pblock_memory
add_cells_to_pblock pblock_memory [get_cells -hierarchical -filter {NAME =~ */*buffer*}]
resize_pblock pblock_memory -add {RAMB36_X4Y24:RAMB36_X7Y35}

# URAM放置约束
resize_pblock pblock_memory -add {URAM288_X2Y32:URAM288_X3Y39}

# ===========================================
# 9. 时钟域交叉约束(CDC)
# ===========================================

# 如果有多个时钟域,设置时钟组
# set_clock_groups -asynchronous \
#     -group [get_clocks clk_main] \
#     -group [get_clocks axi_clk]

# ===========================================
# 10. 功耗优化约束
# ===========================================

# 设置开关活动率用于功耗估算
set_switching_activity -default_toggle_rate 12.5 -default_static_probability 0.5

# 对低活动率信号设置特定值
set_switching_activity -toggle_rate 2.0 -static_probability 0.1 \
    [get_nets -hierarchical -filter {NAME =~ */weight_load}]

# ===========================================
# 11. 配置特定的实现策略
# ===========================================

# 设置综合策略属性
set_property STEPS.SYNTH_DESIGN.ARGS.DIRECTIVE AlternateRoutability [get_runs synth_1]
set_property STEPS.SYNTH_DESIGN.ARGS.RETIMING true [get_runs synth_1]
set_property STEPS.SYNTH_DESIGN.ARGS.FSM_EXTRACTION auto [get_runs synth_1]
set_property STEPS.SYNTH_DESIGN.ARGS.KEEP_EQUIVALENT_REGISTERS true [get_runs synth_1]

# 设置实现策略属性
set_property STEPS.OPT_DESIGN.ARGS.DIRECTIVE ExploreWithRemap [get_runs impl_1]
set_property STEPS.PLACE_DESIGN.ARGS.DIRECTIVE ExtraNetDelay_high [get_runs impl_1]
set_property STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE AggressiveExplore [get_runs impl_1]

# ===========================================
# 12. DRC豁免(如果需要)
# ===========================================

# 对特定的DRC检查创建豁免
# create_waiver -type DRC -id {TIMING-17} -user "conv_engine" \
#     -desc "The clock pin is driven by combinatorial logic acceptable for this design"

####################################################################################
# 约束文件结束
####################################################################################

物理约束

创建文件 conv_engine_physical.xdc

####################################################################################
# 物理实现约束文件
####################################################################################

# ===========================================
# 1. IO位置约束(根据ZCU102调整)
# ===========================================

# 系统复位(按钮SW19)
set_property PACKAGE_PIN AM13 [get_ports rst_n]
set_property IOSTANDARD LVCMOS33 [get_ports rst_n]

# LED指示器
set_property PACKAGE_PIN AG14 [get_ports conv_busy]
set_property PACKAGE_PIN AF13 [get_ports conv_done]
set_property IOSTANDARD LVCMOS33 [get_ports {conv_busy conv_done}]

# ===========================================
# 2. 配置约束
# ===========================================

# 配置电压
set_property CFGBVS GND [current_design]
set_property CONFIG_VOLTAGE 1.8 [current_design]

# 配置模式
set_property CONFIG_MODE SPIx4 [current_design]
set_property BITSTREAM.CONFIG.SPI_BUSWIDTH 4 [current_design]

# ===========================================
# 3. 性能优化设置
# ===========================================

# 启用增量编译
set_property INCREMENTAL_CHECKPOINT ./conv_engine_routed.dcp [get_runs impl_1]

# 保留层次结构
set_property KEEP_HIERARCHY true [get_cells -hierarchical -filter {NAME =~ */sa_inst}]

# ===========================================
# 4. 关键路径组定义
# ===========================================

group_path -name SYSTOLIC_ARRAY -from [get_pins */sa_inst/*/C] -to [get_pins */sa_inst/*/D]
group_path -name AXI_INTERFACE -from [get_ports s_axi_*] -to [get_ports s_axi_*]
group_path -name STREAM_INTERFACE -from [get_ports s_axis_*] -to [get_ports m_axis_*]

综合

配置综合

配置综合选项

set_property -name {STEPS.SYNTH_DESIGN.ARGS.MORE OPTIONS} \
    -value {-directive AlternateRoutability -retiming -no_lc -shreg_min_size 10} \
    -objects [get_runs synth_1]

# 并行作业数
set_property STEPS.SYNTH_DESIGN.ARGS.NUM_JOBS 8 [get_runs synth_1]

# 综合策略为性能优化
set_property strategy Performance_ExploreWithRemap [get_runs synth_1]

# 增量综合检查点
set_property AUTO_INCREMENTAL_CHECKPOINT 1 [get_runs synth_1]

在GUI中设置
more options: -shreg_min_size 10 -keep_equivalent_registers

启动

  1. 保存文件

    save_project_as conv_engine_synthesis ./conv_engine_synthesis -force
    
  2. 运行综合

    reset_run synth_1
    launch_runs synth_1 -jobs 8
    
  3. 监控进度

    • 查看右上角的 Design Runs 窗口
    • 或在TCL Console执行:
    wait_on_run synth_1
    
  4. 完成后打开

    open_run synth_1 -name synth_1
    

分析

资源利用报告

  1. 生成详细利用率报告
# 生成分层次的利用率报告
report_utilization -file ./reports/post_synth_utilization.rpt -hierarchical -hierarchical_depth 4

# 在控制台显示摘要
report_utilization -hierarchical

# 生成特定模块的利用率
report_utilization -cells [get_cells sa_inst]
  1. 创建自动分析脚本

analyze_synth_results.tcl

###############################################
# 综合结果分析脚本
###############################################

proc analyze_synthesis_results {} {
    puts "\n================================================"
    puts "      卷积引擎综合结果分析"
    puts "================================================\n"
    
    # 1. 资源利用率分析
    puts "1. 资源利用率:"
    puts "----------------"
    
    set lut_used [get_property STATS.LUT [current_run]]
    set ff_used [get_property STATS.FDRE [current_run]]
    set bram_used [get_property STATS.RAMB36 [current_run]]
    set uram_used [get_property STATS.URAM [current_run]]
    set dsp_used [get_property STATS.DSP [current_run]]
    
    puts "  LUT使用:   $lut_used / 274080 ([expr {$lut_used*100.0/274080}]%)"
    puts "  FF使用:    $ff_used / 548160 ([expr {$ff_used*100.0/548160}]%)"
    puts "  BRAM使用:  $bram_used / 912 ([expr {$bram_used*100.0/912}]%)"
    puts "  URAM使用:  $uram_used / 96 ([expr {$uram_used*100.0/96}]%)"
    puts "  DSP使用:   $dsp_used / 2520 ([expr {$dsp_used*100.0/2520}]%)"
    
    # 2. 时序分析
    puts "\n2. 时序摘要:"
    puts "-------------"
    
    # 获取时序摘要
    set timing_summary [report_timing_summary -return_string -max_paths 10]
    
    # 提取WNS和TNS
    regexp {WNS$ns$:\s+([\-0-9.]+)} $timing_summary match wns
    regexp {TNS$ns$:\s+([\-0-9.]+)} $timing_summary match tns
    
    if {[info exists wns]} {
        puts "  最差负时序裕量(WNS): ${wns} ns"
        if {$wns < 0} {
            puts "  ⚠️ 警告:时序未满足!"
        } else {
            puts "  ✓ 时序满足"
        }
    }
    
    if {[info exists tns]} {
        puts "  总负时序裕量(TNS): ${tns} ns"
    }
    
    # 3. 关键路径分析
    puts "\n3. 关键路径:"
    puts "------------"
    
    # 报告最差的5条路径
    report_timing -max_paths 5 -nworst 1 -delay_type max -sort_by slack
    
    # 4. DSP推断分析
    puts "\n4. DSP使用详情:"
    puts "---------------"
    
    set dsp_cells [get_cells -hierarchical -filter {PRIMITIVE_SUBGROUP == DSP}]
    set dsp_count [llength $dsp_cells]
    puts "  推断的DSP48E2数量: $dsp_count"
    
    if {$dsp_count > 0} {
        puts "  DSP实例列表(前10个):"
        set count 0
        foreach cell $dsp_cells {
            if {$count < 10} {
                puts "    - $cell"
                incr count
            }
        }
    }
    
    # 5. BRAM/URAM推断分析
    puts "\n5. 存储器使用详情:"
    puts "------------------"
    
    set bram_cells [get_cells -hierarchical -filter {PRIMITIVE_SUBGROUP == BRAM}]
    set uram_cells [get_cells -hierarchical -filter {PRIMITIVE_SUBGROUP == URAM}]
    
    puts "  BRAM实例: [llength $bram_cells]"
    puts "  URAM实例: [llength $uram_cells]"
    
    # 6. 层次化资源分析
    puts "\n6. 模块级资源分配:"
    puts "------------------"
    
    foreach module {sa_inst controller_inst} {
        if {[llength [get_cells $module]] > 0} {
            puts "\n  模块 $module:"
            set module_luts [llength [get_cells -hierarchical -filter "PARENT == $module && IS_PRIMITIVE && PRIMITIVE_SUBGROUP == LUT"]]
            set module_ffs [llength [get_cells -hierarchical -filter "PARENT == $module && IS_PRIMITIVE && PRIMITIVE_SUBGROUP == FLOP_LATCH"]]
            puts "    LUTs: $module_luts"
            puts "    FFs:  $module_ffs"
        }
    }
    
    # 7. 生成报告文件
    puts "\n7. 生成报告文件..."
    puts "-----------------"
    
    # 创建报告目录
    file mkdir ./reports
    
    # 生成各种报告
    report_utilization -file ./reports/utilization_hierarchical.rpt -hierarchical
    report_timing_summary -file ./reports/timing_summary.rpt
    report_power -file ./reports/power_estimate.rpt
    report_clock_utilization -file ./reports/clock_utilization.rpt
    
    puts "  ✓ 报告已保存到 ./reports/ 目录"
    
    puts "\n================================================"
    puts "            分析完成"
    puts "================================================\n"
}

# 执行分析
analyze_synthesis_results
  1. 运行分析脚本
    source analyze_synth_results.tcl
    

时序分析和优化

  1. 查看时序报告
# 生成详细时序报告
report_timing_summary -delay_type min_max -report_unconstrained \
    -check_timing_verbose -max_paths 10 -input_pins -routable_nets \
    -file ./reports/timing_detail.rpt

# 分析setup时序
report_timing -setup -delay_type max -max_paths 20 \
    -sort_by group -file ./reports/setup_timing.rpt

# 分析hold时序  
report_timing -hold -delay_type min -max_paths 20 \
    -sort_by group -file ./reports/hold_timing.rpt

# 查看时钟关系
report_clock_interaction -delay_type min_max \
    -file ./reports/clock_interaction.rpt
  1. 如果时序不满足,创建优化脚本

timing_optimization.tcl

###############################################
# 时序优化脚本
###############################################

proc optimize_timing_violations {} {
    puts "开始时序优化..."
    
    # 1. 识别关键路径
    set critical_paths [get_timing_paths -max_paths 10 -nworst 1 \
                       -delay_type max -sort_by slack]
    
    foreach path $critical_paths {
        set slack [get_property SLACK $path]
        if {$slack < 0} {
            puts "发现时序违例: Slack = $slack ns"
            set startpoint [get_property STARTPOINT_PIN $path]
            set endpoint [get_property ENDPOINT_PIN $path]
            puts "  起点: $startpoint"
            puts "  终点: $endpoint"
            
            # 尝试优化策略
            
            # 策略1: 插入流水线寄存器
            if {$slack < -2.0} {
                puts "  建议: 在该路径插入流水线寄存器"
                set_property STEPS.SYNTH_DESIGN.ARGS.RETIMING true [get_runs synth_1]
            }
            
            # 策略2: 逻辑复制
            if {[get_property FANOUT [get_nets -of $startpoint]] > 32} {
                puts "  建议: 复制高扇出网络"
                set_property STEPS.SYNTH_DESIGN.ARGS.MAX_FANOUT 16 [get_runs synth_1]
            }
            
            # 策略3: 改变综合策略
            if {$slack < -1.0} {
                puts "  建议: 使用Performance_ExploreWithRemap策略"
                set_property strategy Performance_ExploreWithRemap [get_runs synth_1]
            }
        }
    }
    
    # 2. 应用物理优化
    puts "\n应用物理优化选项..."
    
    # 为关键路径设置更严格的约束
    foreach path $critical_paths {
        set slack [get_property SLACK $path]
        if {$slack < 0 && $slack > -1.0} {
            # 增加该路径的权重
            set net [get_nets -of [get_pins [get_property ENDPOINT_PIN $path]]]
            set_property CRITICALITY HIGH $net
        }
    }
    
    # 3. 重新运行综合(如果需要)
    puts "\n是否需要重新运行综合?(y/n)"
    gets stdin answer
    if {$answer == "y"} {
        reset_run synth_1
        launch_runs synth_1 -jobs 8
        wait_on_run synth_1
    }
    
    puts "时序优化完成!"
}

# 执行优化
optimize_timing_violations

布局布线

策略

  1. 设置实现选项
# 配置实现运行
set_property STEPS.OPT_DESIGN.ARGS.DIRECTIVE Default [get_runs impl_1]
set_property STEPS.PLACE_DESIGN.ARGS.DIRECTIVE ExtraNetDelay_high [get_runs impl_1]
set_property STEPS.PHYS_OPT_DESIGN.IS_ENABLED true [get_runs impl_1]
set_property STEPS.PHYS_OPT_DESIGN.ARGS.DIRECTIVE AggressiveExplore [get_runs impl_1]
set_property STEPS.ROUTE_DESIGN.ARGS.DIRECTIVE AggressiveExplore [get_runs impl_1]

# 如果有时序问题,启用额外的物理优化
set_property STEPS.POST_ROUTE_PHYS_OPT_DESIGN.IS_ENABLED true [get_runs impl_1]
set_property STEPS.POST_ROUTE_PHYS_OPT_DESIGN.ARGS.DIRECTIVE Explore [get_runs impl_1]
  1. 运行实现
# 启动实现
launch_runs impl_1 -jobs 8

# 等待完成
wait_on_run impl_1

# 检查实现状态
set impl_status [get_property STATUS [get_runs impl_1]]
puts "实现状态: $impl_status"

if {$impl_status == "route_design Complete!"} {
    puts "实现成功完成!"
    open_run impl_1
} else {
    puts "实现失败!请检查错误信息"
}

分析实现结果

创建脚本 analyze_implementation.tcl

###############################################
# 实现结果分析脚本
###############################################

proc analyze_implementation_results {} {
    puts "\n================================================"
    puts "        实现结果详细分析"
    puts "================================================\n"
    
    # 确保打开实现后的设计
    open_run impl_1
    
    # 1. 最终资源利用率
    puts "1. 最终资源利用率:"
    puts "------------------"
    report_utilization
    
    # 2. 最终时序结果
    puts "\n2. 时序结果:"
    puts "------------"
    
    set timing_met [expr {[get_property STATS.WNS [current_run]] >= 0}]
    
    if {$timing_met} {
        puts "✓ 时序约束满足!"
        puts "  WNS: [get_property STATS.WNS [current_run]] ns"
        puts "  WHS: [get_property STATS.WHS [current_run]] ns"
    } else {
        puts "✗ 时序约束未满足!"
        puts "  WNS: [get_property STATS.WNS [current_run]] ns"
        puts "  TNS: [get_property STATS.TNS [current_run]] ns"
        
        # 显示失败的路径
        puts "\n  失败的时序路径:"
        report_timing -setup -max_paths 5
    }
    
    # 3. 功耗估算
    puts "\n3. 功耗估算:"
    puts "-----------"
    report_power -file ./reports/power_impl.rpt
    
    # 解析功耗报告
    set power_report [report_power -return_string]
    regexp {Total On-Chip Power $W$:\s+([\d.]+)} $power_report match total_power
    regexp {Dynamic $W$:\s+([\d.]+)} $power_report match dynamic_power
    regexp {Static $W$:\s+([\d.]+)} $power_report match static_power
    
    if {[info exists total_power]} {
        puts "  总功耗: ${total_power} W"
        puts "  动态功耗: ${dynamic_power} W"
        puts "  静态功耗: ${static_power} W"
    }
    
    # 4. 布线拥塞分析
    puts "\n4. 布线质量:"
    puts "------------"
    report_route_status -file ./reports/route_status.rpt
    
    # 5. 设计规则检查(DRC)
    puts "\n5. DRC检查:"
    puts "-----------"
    report_drc -file ./reports/drc_report.rpt
    
    # 解析DRC结果
    set drc_report [report_drc -return_string]
    regexp {(\d+) Infos, (\d+) Warnings, (\d+) Critical Warnings and (\d+) Errors} \
           $drc_report match infos warnings critical_warnings errors
    
    if {[info exists errors] && $errors > 0} {
        puts "  ✗ 发现 $errors 个错误!"
        puts "  请查看 ./reports/drc_report.rpt 获取详情"
    } else {
        puts "  ✓ 无DRC错误"
        if {[info exists warnings]} {
            puts "  ⚠ $warnings 个警告, $critical_warnings 个关键警告"
        }
    }
    
    # 6. 时钟域分析
    puts "\n6. 时钟域分析:"
    puts "--------------"
    report_clock_utilization -file ./reports/clock_util_impl.rpt
    report_clock_networks -file ./reports/clock_networks.rpt
    
    # 7. IO时序分析
    puts "\n7. IO时序:"
    puts "----------"
    report_io_timing -file ./reports/io_timing.rpt
    
    # 8. 生成实现统计
    puts "\n8. 实现统计:"
    puts "------------"
    
    set route_nets [get_property STATS.ROUTE_NETS [current_run]]
    set route_insts [get_property STATS.ROUTE_INSTS [current_run]]
    
    puts "  布线网络数: $route_nets"
    puts "  布线实例数: $route_insts"
    
    puts "\n================================================"
    puts "            分析完成"
    puts "================================================\n"
    
    # 决定是否生成比特流
    if {$timing_met && $errors == 0} {
        puts "设计准备好生成比特流!"
        puts "是否生成比特流?(y/n)"
        gets stdin answer
        if {$answer == "y"} {
            launch_runs impl_1 -to_step write_bitstream -jobs 8
            wait_on_run impl_1
            puts "比特流生成完成!"
        }
    } else {
        puts "⚠ 设计存在问题,建议先解决后再生成比特流"
    }
}

# 执行分析
analyze_implementation_results
posted @ 2025-09-15 19:40  李白的白  阅读(11)  评论(0)    收藏  举报