HPM6450/HPM6750 在 freertos下 lwip 输入输出数据 优化

修改输入部分代码:

static struct pbuf *low_level_input(struct netif *netif)
{
    struct pbuf *p = NULL, *q;
    u32_t len;
    uint8_t *buffer;
    enet_frame_t frame = {0, 0, 0};
    enet_rx_desc_t *dma_rx_desc;
    uint32_t buffer_offset = 0;
    uint32_t payload_offset = 0;
    uint32_t bytes_left_to_copy = 0;
    uint32_t i = 0;

    /* Check and get a received frame */
    #if (defined(__ENABLE_ENET_RECEIVE_INTERRUPT) && __ENABLE_ENET_RECEIVE_INTERRUPT) || !NO_SYS
        frame = enet_get_received_frame_interrupt(&desc.rx_desc_list_cur, &desc.rx_frame_info, ENET_RX_BUFF_COUNT);
    #else
        if (enet_check_received_frame(&desc.rx_desc_list_cur, &desc.rx_frame_info) == 1) {
            frame = enet_get_received_frame(&desc.rx_desc_list_cur, &desc.rx_frame_info);
        }
    #endif

    /* Obtain the size of the packet and put it into the "len" variable. */
    len = frame.length;
    buffer = (uint8_t *)frame.buffer;

    if (len > 0) {
    /* Allocate a pbuf chain of pbufs from the Lwip buffer pool */
        p = pbuf_alloc(PBUF_RAW, len, PBUF_POOL);

        if (p != NULL) {
            dma_rx_desc = frame.rx_desc;
            buffer_offset = 0;
            for (q = p; q != NULL; q = q->next) {
                bytes_left_to_copy = q->len;
                payload_offset = 0;

                /* Check if the length of bytes to copy in current pbuf is bigger than Rx buffer size*/
                while ((bytes_left_to_copy + buffer_offset) > ENET_RX_BUFF_SIZE) {
                    /* Copy data to pbuf*/
                    memcpy((uint8_t *)((uint8_t *)q->payload + payload_offset), (uint8_t *)((uint8_t *)buffer + buffer_offset), (ENET_RX_BUFF_SIZE - buffer_offset));

                    /* Point to next descriptor */
                    dma_rx_desc = (enet_rx_desc_t *)(dma_rx_desc->rdes3_bm.next_desc);
                    buffer = (uint8_t *)(dma_rx_desc->rdes2_bm.buffer1);

                    bytes_left_to_copy = bytes_left_to_copy - (ENET_RX_BUFF_SIZE - buffer_offset);
                    payload_offset = payload_offset + (ENET_RX_BUFF_SIZE - buffer_offset);
                    buffer_offset = 0;
                }

            /* pass the buffer to pbuf */
            //q->payload = (void *)buffer;  //  exist bug
            /* Copy remaining data in pbuf */
            q->payload = (void *)sys_address_to_core_local_mem(BOARD_RUNNING_CORE, (uint32_t)buffer);
            
            buffer_offset = buffer_offset + bytes_left_to_copy;

            #if defined(LWIP_PTP) && LWIP_PTP
            /* Get the received timestamp */
            p->time_sec  = frame.rx_desc->rdes7_bm.rtsh;
            p->time_nsec = frame.rx_desc->rdes6_bm.rtsl;
            #endif
            }
        }

        /* Release descriptors to DMA */
        dma_rx_desc = frame.rx_desc;

        /* Set Own bit in Rx descriptors: gives the buffers back to DMA */
        for (i = 0; i < desc.rx_frame_info.seg_count; i++) {
            dma_rx_desc->rdes0_bm.own = 1;
            dma_rx_desc = (enet_rx_desc_t *)(dma_rx_desc->rdes3_bm.next_desc);
        }

        /* Clear Segment_Count */
        desc.rx_frame_info.seg_count = 0;
    }

    /* Resume Rx Process */
    enet_rx_resume(ENET);

    return p;
}

原版  pass the buffer to pbuf 处直接过滤掉了,会导致有些tcp数据块接收异常。

同理输出部分也需要调整:

/* pass payload to buffer */ //  exist bug

//desc.tx_desc_list_cur->tdes2_bm.buffer1 =
 core_local_mem_to_sys_address(BOARD_RUNNING_CORE, (uint32_t)q->payload);
/* Copy the remaining bytes */
buffer = (void *)sys_address_to_core_local_mem(BOARD_RUNNING_CORE, (uint32_t)buffer);
memcpy((uint8_t *)((uint8_t *)buffer + buffer_offset),
                    (uint8_t *)((uint8_t *)q->payload + payload_offset),
                    bytes_left_to_copy);

buffer_offset = buffer_offset + bytes_left_to_copy;
frame_length = frame_length + bytes_left_to_copy;

将接收发送缓冲数据定义到 .fast 部分,加快处理

//ATTR_PLACE_AT_NONCACHEABLE_WITH_ALIGNMENT(ENET_SOC_BUFF_ADDR_ALIGNMENT)
ATTR_PLACE_AT_WITH_ALIGNMENT(".fast", ENET_SOC_BUFF_ADDR_ALIGNMENT)
__RW uint8_t rx_buff[ENET_RX_BUFF_COUNT][ENET_RX_BUFF_SIZE]; /* Ethernet Receive Buffer */

//ATTR_PLACE_AT_NONCACHEABLE_WITH_ALIGNMENT(ENET_SOC_BUFF_ADDR_ALIGNMENT)
ATTR_PLACE_AT_WITH_ALIGNMENT(".fast", ENET_SOC_BUFF_ADDR_ALIGNMENT)
__RW uint8_t tx_buff[ENET_TX_BUFF_COUNT][ENET_TX_BUFF_SIZE]; /* Ethernet Transmit Buffer */

 接收的  ENET_RX_BUFF_COUNT 增加

#define ENET_TX_BUFF_COUNT  (10U)
#define ENET_RX_BUFF_COUNT  (30U)
#define ENET_RX_BUFF_SIZE   ENET_MAX_FRAME_SIZE
#define ENET_TX_BUFF_SIZE   ENET_MAX_FRAME_SIZE

lwip 中 lwipopts.h 内存分配采用固定块的方式,不采用自带的分配方式,否则容易产生内存碎片,这样也方便带cache的MCU按照字节对齐的方式加快处理内存,避免不必要的内存复制操作。

/*
  lwip共有三种内存分配方式,第一种是使用C库的malloc,第二种是lwip自己实现的动态内存堆分配,
  第三种是使用lwip自己实现的动态内存池分配,C库的malloc是最不推荐使用的,内存大的情况下,
  推荐使用内存池分配.内存池相比于内存堆,优点是分配和释放速度快且不会产生内存碎片
*/
#define MEM_LIBC_MALLOC                 0    //是否采用LWIP自带函数分配动态内存   
#define MEM_USE_POOLS                   1    //内存堆不通过内存池分配
#define MEMP_USE_CUSTOM_POOLS           1    //同上
#define MEM_USE_POOLS_TRY_BIGGER_POOL   0    //申请内存失败不选择大内存池

TCP数据块参数优化

/* the number of memp struct pbufs. If the application sends a lot of data out of ROM (or other static memory),this should be set high */
/* MEMP_NUM_PBUF: 设置内存池的数量  */
#define MEMP_NUM_PBUF                           32        
/* the number of UDP protocol control blocks, oneper active UDP "connection" */               
/* MEMP_NUM_UDP_PCB: UDP协议控制块的数量. */
#define MEMP_NUM_UDP_PCB                        10      
/* the number of simulatenously active TCP connections */                  
/* MEMP_NUM_TCP_PCB: TCP的数量. */
#define MEMP_NUM_TCP_PCB                        20  //会影响tcp数据处理性能          
/* the number of listening TCP connections */             
/* MEMP_NUM_TCP_PCB_LISTEN: 监听TCP的数量. */
#define MEMP_NUM_TCP_PCB_LISTEN                 20                    
/* the number of simultaneously queued TCP segments */    
/* MEMP_NUM_TCP_SEG: 同时排队的TCP的数量段. */
#define MEMP_NUM_TCP_SEG                        120 //32 会影响tcp数据处理性能                       
/* the number of simulateously active timeouts */
/* MEMP_NUM_SYS_TIMEOUT: 超时模拟活动的数量. */
#define MEMP_NUM_SYS_TIMEOUT                    (LWIP_NUM_SYS_TIMEOUT_INTERNAL + 1)     

需要注意加入 lwippools.h 文件的支持,该文件对于不同的lwip版本,加入代码需要做区分

/* OPTIONAL: Pools to replace heap allocation
 * Optional: Pools can be used instead of the heap for mem_malloc. If
 * so, these should be defined here, in increasing order according to
 * the pool element size.
 *
 * LWIP_MALLOC_MEMPOOL(number_elements, element_size)
 */
#if MEM_USE_POOLS

//lwip 2.x.x define three pools
LWIP_MALLOC_MEMPOOL_START
//LWIP_MALLOC_MEMPOOL(100, 256)
//LWIP_MALLOC_MEMPOOL(50, 512)
//LWIP_MALLOC_MEMPOOL(20, 1024)
//LWIP_MALLOC_MEMPOOL(20, 1536)

//lwip 1.4.1 define three pools with sizes 256, 512, and 1512 bytes
//LWIP_MALLOC_MEMPOOL(20, 256)
//LWIP_MALLOC_MEMPOOL(10, 512)
//LWIP_MALLOC_MEMPOOL(5, 1512)

//user define
LWIP_MALLOC_MEMPOOL(20, 256)
LWIP_MALLOC_MEMPOOL(10, 512)
LWIP_MALLOC_MEMPOOL(5, 1024)
LWIP_MALLOC_MEMPOOL(5, 1536)

LWIP_MALLOC_MEMPOOL_END
#endif /* MEM_USE_POOLS */

/* Optional: Your custom pools can go here if you would like to use
 * lwIP's memory pools for anything else.
 */
LWIP_MEMPOOL(SYS_MBOX, 22, 100, "SYS_MBOX")

查看官方代码发现,官方将 lwip 的 heap 分配指定到了 fast_ram 部分,该文件在  arch.h 文件处定义

/** Allocates a memory buffer of specified size that is of sufficient size to align
 * its start address using LWIP_MEM_ALIGN.
 * You can declare your own version here e.g. to enforce alignment without adding
 * trailing padding bytes (see LWIP_MEM_ALIGN_BUFFER) or your own section placement
 * requirements.\n
 * e.g. if you use gcc and need 32 bit alignment:\n
 * \#define LWIP_DECLARE_MEMORY_ALIGNED(variable_name, size) u8_t variable_name[size] \_\_attribute\_\_((aligned(4)))\n
 * or more portable:\n
 * \#define LWIP_DECLARE_MEMORY_ALIGNED(variable_name, size) u32_t variable_name[(size + sizeof(u32_t) - 1) / sizeof(u32_t)]
 */
#ifndef LWIP_DECLARE_MEMORY_ALIGNED
#define LWIP_DECLARE_MEMORY_ALIGNED(variable_name, size) __attribute__ ((section(LWIP_MEM_SECTION))) u8_t variable_name[LWIP_MEM_ALIGN_BUFFER(size)]
#endif

其中  LWIP_MEM_SECTION 定义

#define LWIP_PLATFORM_ASSERT(x) printf(x)

#ifndef LWIP_MEM_SECTION
#define LWIP_MEM_SECTION ".fast_ram"
#endif

根据自己使用的接收任务处理能力来重新规划这个位置,同时也可自行扩展 malloc  的数量。

修改优化测试,长时间处理 tcp 的任务带宽可轻松跑满百兆带宽,且不掉线

 对 freertos 内存堆重新映射,使能 configAPPLICATION_ALLOCATED_HEAP = 1

#define configSUPPORT_STATIC_ALLOCATION                 0                       /* 1: 支持静态申请内存, 默认: 0 */
#define configSUPPORT_DYNAMIC_ALLOCATION                1                       /* 1: 支持动态申请内存, 默认: 1 */
#define configAPPLICATION_ALLOCATED_HEAP                1                       /* 1: 用户手动分配FreeRTOS内存堆(ucHeap), 默认: 0 */
#define configSTACK_ALLOCATION_FROM_SEPARATE_HEAP       0                       /* 1: 用户自行实现任务创建时使用的内存申请与释放函数, 默认: 0 */

指定分配到外部 sdram 

/* Allocate the memory for the heap. */
#if ( configAPPLICATION_ALLOCATED_HEAP == 1 )

/* The application writer has already defined the array used for the RTOS
* heap - probably so it can be placed in a special segment or address. */
    //extern uint8_t ucHeap[ configTOTAL_HEAP_SIZE ];
    ATTR_PLACE_AT(".sdram") static uint8_t ucHeap[ configTOTAL_HEAP_SIZE ];
    //ATTR_PLACE_AT(".fast") static uint8_t ucHeap[ configTOTAL_HEAP_SIZE ];
    //ATTR_PLACE_AT(".axi_ram") static uint8_t ucHeap[ configTOTAL_HEAP_SIZE ];
#else
    PRIVILEGED_DATA static uint8_t ucHeap[ configTOTAL_HEAP_SIZE ];
#endif /* configAPPLICATION_ALLOCATED_HEAP */

 

posted @ 2023-12-07 16:48  求隐  阅读(798)  评论(0)    收藏  举报