时序分析案例1
module frame_split_to_fifo_8bit #(
parameter integer MAX_FRAME_BYTES = 256 // 最大输入缓存字节数
)(
input wire sys_clk , // 系统时钟
input wire rst_n , // 同步低有效复位
input wire frame_valid_i , // 上游字节有效
input wire [7:0] frame_data_i , // 上游字节数据
input wire frame_last_i , // 上游最后字节
input wire [15:0] frame_length_i , // 上游长度,不含最后2字节0
output reg fifo_wr_en_o , // FIFO写使能
output reg [7:0] fifo_wr_data_o , // FIFO写数据,8bit
input wire fifo_full_i , // FIFO满
output reg busy_o , // 模块忙
output reg frame_done_o , // 一帧处理完成
output reg frame_err_o // 协议/溢出错误
);
localparam [4:0] IDLE = 5'b00001; // 空闲
localparam [4:0] RX_FRAME = 5'b00010; // 接收大帧
localparam [4:0] RX_CHECK = 5'b00100; // 检查长度
localparam [4:0] FIFO_WRITE = 5'b01000; // 写FIFO
localparam [4:0] DONE = 5'b10000; // 完成
reg [4:0] state; // 当前状态
reg [4:0] next_state; // 下一状态
(* ram_style = "distributed" *)
reg [7:0] frame_buf [0:MAX_FRAME_BYTES-1]; // 输入帧缓存
reg [7:0] hdr_r; // 帧头
reg [15:0] rx_cnt_r; // 实际接收字节数
reg [15:0] length_r; // 锁存输入长度
reg [15:0] item_cnt_r; // 指令条数
reg [15:0] item_idx_r; // 当前指令序号
reg [3:0] tx_byte_idx_r; // 小帧输出字节序号
reg overflow_r; // 接收溢出标志
reg length_err_r; // 长度错误标志
wire [15:0] item_base_w; // 当前指令起始地址
assign item_base_w = 16'd1 + item_idx_r * 16'd6;
always @(*) begin
next_state = state;
case (state)
IDLE: begin
if (frame_valid_i)
next_state = frame_last_i ? RX_CHECK : RX_FRAME;
end
RX_FRAME: begin
if (frame_valid_i && frame_last_i)
next_state = RX_CHECK;
end
RX_CHECK: begin
if (overflow_r || length_err_r)
next_state = DONE;
else if (item_cnt_r == 16'd0)
next_state = DONE;
else
next_state = FIFO_WRITE;
end
FIFO_WRITE: begin
if (!fifo_full_i && (item_idx_r == item_cnt_r - 1'b1) && (tx_byte_idx_r == 4'd8))
next_state = DONE;
end
DONE: begin
next_state = IDLE;
end
default: begin
next_state = IDLE;
end
endcase
end
always @(posedge sys_clk) begin
if (!rst_n)
state <= IDLE;
else
state <= next_state;
end
always @(posedge sys_clk) begin
if (!rst_n) begin
fifo_wr_en_o <= 1'b0;
fifo_wr_data_o <= 8'd0;
busy_o <= 1'b0;
frame_done_o <= 1'b0;
frame_err_o <= 1'b0;
hdr_r <= 8'd0;
rx_cnt_r <= 16'd0;
length_r <= 16'd0;
item_cnt_r <= 16'd0;
item_idx_r <= 16'd0;
tx_byte_idx_r <= 4'd0;
overflow_r <= 1'b0;
length_err_r <= 1'b0;
end
else begin
fifo_wr_en_o <= 1'b0;
frame_done_o <= 1'b0;
frame_err_o <= 1'b0;
busy_o <= (next_state != IDLE);
case (state)
IDLE: begin
rx_cnt_r <= 16'd0;
item_idx_r <= 16'd0;
tx_byte_idx_r <= 4'd0;
overflow_r <= 1'b0;
length_err_r <= 1'b0;
if (frame_valid_i) begin
frame_buf[0] <= frame_data_i;
hdr_r <= frame_data_i;
length_r <= frame_length_i;
rx_cnt_r <= 16'd1;
end
end
RX_FRAME: begin
if (frame_valid_i) begin
if (rx_cnt_r < MAX_FRAME_BYTES) begin
frame_buf[rx_cnt_r] <= frame_data_i;
rx_cnt_r <= rx_cnt_r + 1'b1;
end
else begin
overflow_r <= 1'b1;
end
end
end
RX_CHECK: begin
tx_byte_idx_r <= 4'd0;
item_idx_r <= 16'd0;
if (length_r < 16'd7)
length_err_r <= 1'b1;
else if (((length_r - 16'd1) % 16'd6) != 16'd0)
length_err_r <= 1'b1;
else if (rx_cnt_r != length_r + 16'd2)
length_err_r <= 1'b1;
else if ((frame_buf[rx_cnt_r-2] != 8'h00) || (frame_buf[rx_cnt_r-1] != 8'h00))
length_err_r <= 1'b1;
else
length_err_r <= 1'b0;
item_cnt_r <= (length_r - 16'd1) / 16'd6;
end
FIFO_WRITE: begin
if (!fifo_full_i) begin
fifo_wr_en_o <= 1'b1;
case (tx_byte_idx_r)
4'd0: fifo_wr_data_o <= hdr_r;
4'd1: fifo_wr_data_o <= frame_buf[item_base_w + 16'd0];
4'd2: fifo_wr_data_o <= frame_buf[item_base_w + 16'd1];
4'd3: fifo_wr_data_o <= frame_buf[item_base_w + 16'd2];
4'd4: fifo_wr_data_o <= frame_buf[item_base_w + 16'd3];
4'd5: fifo_wr_data_o <= frame_buf[item_base_w + 16'd4];
4'd6: fifo_wr_data_o <= frame_buf[item_base_w + 16'd5];
4'd7: fifo_wr_data_o <= 8'h00;
4'd8: fifo_wr_data_o <= 8'h00;
default: fifo_wr_data_o <= 8'h00;
endcase
if (tx_byte_idx_r == 4'd8) begin
tx_byte_idx_r <= 4'd0;
item_idx_r <= item_idx_r + 1'b1;
end
else begin
tx_byte_idx_r <= tx_byte_idx_r + 1'b1;
end
end
end
DONE: begin
frame_done_o <= 1'b1;
frame_err_o <= overflow_r | length_err_r;
end
default: begin
end
endcase
end
end
endmodule
分析:
看代码,违例主要不是 FIFO,而是这几处把除法/取模/乘法/动态 RAM 地址塞进组合路径了。
最明显:
assign item_base_w = 16'd1 + item_idx_r * 16'd6;
以及:
else if (((length_r - 16'd1) % 16'd6) != 16'd0)
...
item_cnt_r <= (length_r - 16'd1) / 16'd6;
你时序报告里 From 是:
length_r_reg[4]/C
To 是:
state_reg[x]/D
对应这里:
RX_CHECK: begin
if (overflow_r || length_err_r)
next_state = DONE;
else if (item_cnt_r == 16'd0)
next_state = DONE;
else
next_state = FIFO_WRITE;
end
表面看 next_state 只判断 item_cnt_r,但 item_cnt_r 在时序块里由:
item_cnt_r <= (length_r - 16'd1) / 16'd6;
算出来。Vivado 很可能把 length_r -> item_cnt_r相关逻辑 -> next_state 优化/重定时到一起,导致状态跳转路径很长。
还有一个大雷:
frame_buf[item_base_w + 16'd0]
item_base_w = item_idx_r * 6 + 1,这是动态地址读 distributed RAM。
LUTRAM 动态读本来就是组合读,再叠加乘法地址计算,路径会很长。
1. 不要 %6、/6
这里长度应该是固定协议,简单判断即可:
length_ok_r <= (length_r == 16'd7) ||
(length_r == 16'd13) ||
(length_r == 16'd19) ||
...
或者更简单:接收时直接按 6 计数,不做除法取模。
2. item_base_w 不要乘法
加一个地址寄存器递增:
reg [15:0] rd_addr_r; // 当前读地址
// 每条指令开始 rd_addr_r = 1 + item_idx*6
// 不要算乘法,直接每读完6字节后自然递增
FIFO_WRITE 里不要:
frame_buf[item_base_w + n]
改成:
fifo_wr_data_o <= frame_buf[rd_addr_r];
rd_addr_r <= rd_addr_r + 1'b1;
3. RX_CHECK 拆成多拍
现在 RX_CHECK 一拍做太多:
length判断 + %6 + rx_cnt比较 + frame_buf尾字节读 + /6
建议拆:
RX_CHECK_LEN
RX_CHECK_TAIL
RX_CALC_ITEM
FIFO_WRITE
200MHz 下别把这些都压一拍。
length_r 参与了取模/除法/长度判断,又影响状态跳转;item_idx 还参与乘法动态寻址 LUTRAM,导致组合逻辑和布线都太长。

浙公网安备 33010602011771号