arm Linux中dma的cache管理

作者

付汉杰 hankf@amd.com

概述

前两周有人询问DMA下的cache操作和dma-coherent。以前零碎看过代码。临时找,还没有找到。

这两天整理了调用流程,也找到了dma-coherent的用法。Linux的文档里没有详细说明dma-coherent的用法。根据代码,如果dma的设备树里有dma-coherent,Linux则认为硬件会维护cache一致性,不会在dma运行过程中执行cache操作。

dma_map_single/dma_unmap_single的使用

设备驱动里一般调用dma_map_single()/dma_unmap_single()处理cache。调用dma_map_single函数时需要指定DMA的方向,DMA_TO_DEVICE或者DMA_FROM_DEVICE。Linux会根据direction的值invalidate或者clean cache。

drivers\net\ethernet\cadence\macb_main.c的函数macb_tx_map()里,调用dma_map_single()刷新cache,macb_tx_interrupt()的macb_tx_unmap()再调用dma_unmap_single()。

代码简化后如下:

macb_tx_map( )
{
.......
	mapping = dma_map_single(&bp->pdev->dev,
			 skb->data + offset,
			 size, DMA_TO_DEVICE);
	.......		
}
			 
macb_tx_unmap( )
{
	.......				
	 
	dma_unmap_single(&bp->pdev->dev, tx_skb->mapping,
			 tx_skb->size, DMA_TO_DEVICE);
	.......		
}
	 
					 					
gem_rx( )
{
.......
	dma_unmap_single(&bp->pdev->dev, addr,
		bp->rx_buffer_size, DMA_FROM_DEVICE);
	.......		
}					
					
gem_rx_refill()
{
.......
	/* now fill corresponding descriptor entry */
	paddr = dma_map_single(&bp->pdev->dev, skb->data,
					bp->rx_buffer_size,
					DMA_FROM_DEVICE);
	.......		
}

dma_map_single/dma_unmap_single的定义

dma_map_single()和dma_unmap_single()都在include\linux\dma-mapping.h里定义。如果没有特殊情况,会调用dma_direct_map_page()、dma_direct_unmap_page()。
arm64的特殊情况包括iommu和Xen虚拟机。 iommu和Xen虚拟机都需要提供dma_map_ops,于是使用其中的map、unmap函数。iommu的dma_map_ops是drivers\iommu\Dma-iommu.c中定义的iommu_dma_ops。 Xen的dma_map_ops是drivers/xen/swiotlb-xen.c中定义的xen_swiotlb_dma_ops。

#define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0)
#define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0)

static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	debug_dma_map_single(dev, ptr, size);
	return dma_map_page_attrs(dev, virt_to_page(ptr), offset_in_page(ptr),
			size, dir, attrs);
}

static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	return dma_unmap_page_attrs(dev, addr, size, dir, attrs);
}


static inline dma_addr_t dma_map_page_attrs(struct device *dev,
		struct page *page, size_t offset, size_t size,
		enum dma_data_direction dir, unsigned long attrs)
{
	const struct dma_map_ops *ops = get_dma_ops(dev);
	dma_addr_t addr;

	BUG_ON(!valid_dma_direction(dir));
	if (dma_is_direct(ops))
		addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
	else
		addr = ops->map_page(dev, page, offset, size, dir, attrs);
	debug_dma_map_page(dev, page, offset, size, dir, addr);

	return addr;
}

static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	const struct dma_map_ops *ops = get_dma_ops(dev);

	BUG_ON(!valid_dma_direction(dir));
	if (dma_is_direct(ops))
		dma_direct_unmap_page(dev, addr, size, dir, attrs);
	else if (ops->unmap_page)
		ops->unmap_page(dev, addr, size, dir, attrs);
	debug_dma_unmap_page(dev, addr, size, dir);
}

dma_direct_map_page/dma_direct_unmap_page的定义

dma_direct_map_page()、dma_direct_unmap_page()在kernel\dma\direct.c中定义。

dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
		unsigned long offset, size_t size, enum dma_data_direction dir,
		unsigned long attrs)
{
	phys_addr_t phys = page_to_phys(page) + offset;
	dma_addr_t dma_addr = phys_to_dma(dev, phys);

	if (unlikely(!dma_direct_possible(dev, dma_addr, size)) &&
	    !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) {
		report_addr(dev, dma_addr, size);
		return DMA_MAPPING_ERROR;
	}

	if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
		arch_sync_dma_for_device(dev, phys, size, dir);
	return dma_addr;
}
EXPORT_SYMBOL(dma_direct_map_page);


void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
		size_t size, enum dma_data_direction dir, unsigned long attrs)
{
	phys_addr_t phys = dma_to_phys(dev, addr);

	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
		dma_direct_sync_single_for_cpu(dev, addr, size, dir);

	if (unlikely(is_swiotlb_buffer(phys)))
		swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
}
EXPORT_SYMBOL(dma_direct_unmap_page);


void dma_direct_sync_single_for_cpu(struct device *dev,
        dma_addr_t addr, size_t size, enum dma_data_direction dir)
{
    phys_addr_t paddr = dma_to_phys(dev, addr);
 
    if (!dev_is_dma_coherent(dev)) {
        arch_sync_dma_for_cpu(dev, paddr, size, dir);
        arch_sync_dma_for_cpu_all(dev);
    }
 
    if (unlikely(is_swiotlb_buffer(paddr)))
        swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
}

一路跟踪,dma_map_single()会最终调用到arch_sync_dma_for_device(), dma_unmap_single()会最终调用到arch_sync_dma_for_cpu(), 和arch_sync_dma_for_cpu_all()。 而arch_sync_dma_for_cpu_all()对Arm64是空函数。

arch_sync_dma_for_device/arch_sync_dma_for_cpu的定义

arch_sync_dma_for_device/arch_sync_dma_for_cpu的定义在文件arch\arm64\mm\dma-mapping.c中。

void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
		size_t size, enum dma_data_direction dir)
{
	__dma_map_area(phys_to_virt(paddr), size, dir);
}


void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
		size_t size, enum dma_data_direction dir)
{
	__dma_unmap_area(phys_to_virt(paddr), size, dir);
}

__dma_map_area/__dma_unmap_area的定义

__dma_map_area/__dma_unmap_area的定义在文件arch\arm64\mm\cache.S中。

所有汇编实现,也在文件arch\arm64\mm\cache.S中。

/*
 *	__dma_map_area(start, size, dir)
 *	- start	- kernel virtual start address
 *	- size	- size of region
 *	- dir	- DMA direction
 */
ENTRY(__dma_map_area)
	cmp	w2, #DMA_FROM_DEVICE
	b.eq	__dma_inv_area
	b	__dma_clean_area
ENDPIPROC(__dma_map_area)

/*
 *	__dma_unmap_area(start, size, dir)
 *	- start	- kernel virtual start address
 *	- size	- size of region
 *	- dir	- DMA direction
 */
ENTRY(__dma_unmap_area)
	cmp	w2, #DMA_TO_DEVICE
	b.ne	__dma_inv_area
	ret
ENDPIPROC(__dma_unmap_area)

可以看到,map系列函数调用的__dma_map_area,方向如果是DMA_FROM_DEVICE,执行__dma_inv_area; 否则执行 __dma_clean_area。
unmap系列函数调用的__dma_unmap_area,方向如果不是DMA_TO_DEVICE,执行__dma_inv_area; 否则执行__dma_clean_area。

总结如下:

Operation map unmap
DMA_FROM_DEVICE __dma_inv_area __dma_inv_area
DMA_TO_DEVICE __dma_clean_area None

__dma_inv_area 完成invalidate操作,丢弃cache数据。 它的注释是:

Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
 * 	are invalidated. Any partial lines at the ends of the interval are
 *	also cleaned to PoC to prevent data loss。

关于Invalidate, ARM的手册"Arm Architecture Reference Manual for A-profile architecture"说明如下:

Invalidate  A cache invalidate instruction ensures that updates made visible by observers that access memory 
at the point to which the invalidate is defined, are made visible to an observer that controls the cache. 
This might result in the loss of updates to the locations affected by the invalidate instruction that 
have been written by observers that access the cache, if those updates have not been cleaned from 
the cache since they were made.
If the address of an entry on which the invalidate instruction operates is Normal, Non-cacheable or 
any type of Device memory then an invalidate instruction also ensures that this address is not 
present in the cache.

__dma_clean_area 完成clean操作,把接收数据更新到DDR。 它的的注释是:

 Ensure that any D-cache lines for the interval [kaddr, kaddr+size)
 * 	are cleaned to the PoC.

关于Clean, ARM的手册说明如下:

Clean  A cache clean instruction ensures that updates made by an observer that controls the cache are made 
visible to other observers that can access memory at the point to which the instruction is performed. 
Once the Clean has completed, the new memory values are guaranteed to be visible to the point to 
which the instruction is performed, for example to the Point of Unification.
The cleaning of a cache entry from a cache can overwrite memory that has been written by another 
observer only if the entry contains a location that has been written to by an observer in the 
shareability domain of that memory location.

以太网发送前是 map DMA_TO_DEVICE, 执行__dma_clean_area,把数据从cache更新到DDR; 发送后是 unmap DMA_TO_DEVICE, 空操作。

以太网接收前是 map DMA_FROM_DEVICE,执行的 __dma_inv_area,丢弃cache数据; 接收后是unmap DMA_FROM_DEVICE,执行的 __dma_clean_area,把数据从cache更新到DDR。

更新表格:

Operation map,for_device unmap,for_cpu
DMA_FROM_DEVICE 接收前,__dma_inv_area 接收后,__dma_inv_area
DMA_TO_DEVICE 发送前,__dma_clean_area 发送后,None

以前一直没有想通linux驱动里为什么要做两次cache操作。Standalone的驱动里,只需要发送前flush cache, 接收后invalidate cache;发送后和接收前不需要操作cache。根据上述代码分析,发送后的DMA_TO_DEVICE的unmap操作,实际不会操作cache,相当于空操作。

接收做两次操作还稍微可以理解,可能是别的模块导致旧数据又被加载到cache。以前也处理过一个问题,arm的预测执行会导致软件完全没有使用的ddr被读取,必须在mmu table里设置对应地址的表项完全无效,才能杜绝这种情况。也许Linux也遇到了类似问题,才改成这样的操作。

cache操作的函数名字也有点费解,先map, 再unmap。曾经猜想map/unmap函数里,更改了MMU table的设置。这两天看了一下,还是没有看到相关代码。

dma-coherent

DMA的设备树里可以配置属性"dma-coherent"。

drivers\of\address.c里的of_dma_is_coherent( )读取属性"dma-coherent"。

bool of_dma_is_coherent(struct device_node *np)
{
    struct device_node *node = of_node_get(np);
 
    while (node) {
        if (of_property_read_bool(node, "dma-coherent")) {
            of_node_put(node);
            return true;
        }
        node = of_get_next_parent(node);
    }
    of_node_put(node);
    return false;
}

drivers\of\Device.c中的of_dma_configure( )调用of_dma_is_coherent( )读取属性"dma-coherent",然后再调用arch_setup_dma_ops( ),保存在变量“dev->dma_coherent”中。

/**
 * of_dma_configure - Setup DMA configuration
 * @dev:	Device to apply DMA configuration
 * @np:		Pointer to OF node having DMA configuration
 * @force_dma:  Whether device is to be set up by of_dma_configure() even if
 *		DMA capability is not explicitly described by firmware.
 *
 * Try to get devices's DMA configuration from DT and update it
 * accordingly.
 *
 * If platform code needs to use its own special DMA configuration, it
 * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events
 * to fix up DMA configuration.
 */
int of_dma_configure(struct device *dev, struct device_node *np, bool force_dma)
{
    ........

	coherent = of_dma_is_coherent(np);
	dev_dbg(dev, "device is%sdma coherent\n",
		coherent ? " " : " not ");

	iommu = of_iommu_configure(dev, np);
	if (IS_ERR(iommu) && PTR_ERR(iommu) == -EPROBE_DEFER)
		return -EPROBE_DEFER;

	dev_dbg(dev, "device is%sbehind an iommu\n",
		iommu ? " " : " not ");

	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);

	return 0;
}

arch\arm64\mm\Dma-mapping.c中的arch_setup_dma_ops( ),把设置保存在变量“dev->dma_coherent”中。

void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
			const struct iommu_ops *iommu, bool coherent)
{
	int cls = cache_line_size_of_cpu();

	WARN_TAINT(!coherent && cls > ARCH_DMA_MINALIGN,
		   TAINT_CPU_OUT_OF_SPEC,
		   "%s %s: ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
		   dev_driver_string(dev), dev_name(dev),
		   ARCH_DMA_MINALIGN, cls);

	dev->dma_coherent = coherent;
	if (iommu)
		iommu_setup_dma_ops(dev, dma_base, size);

#ifdef CONFIG_XEN
	if (xen_initial_domain())
		dev->dma_ops = &xen_swiotlb_dma_ops;
#endif

dev_is_dma_coherent的定义和使用

在dma_direct_map_page和中,调用dev_is_dma_coherent(),检查上述变量dev->dma_coherent,检查是否支持硬件cache同步。如果是,则不进行cache操作。

dev_is_dma_coherent的定义在文件include\linux\dma-noncoherent.h中。

static inline bool dev_is_dma_coherent(struct device *dev)
{
    return dev->dma_coherent;
}

后记

广东卫健委

2022年12月22日广东省新冠肺炎疫情情况

12月21日0-24时,全省新增本土确诊病例1325例(广州546例,深圳90例,珠海93例,汕头93例,佛山58例,韶关9例,梅州34例,惠州28例,汕尾40例,东莞8例,中山40例,江门7例,阳江47例,湛江21例,茂名86例,肇庆40例,清远14例,潮州30例,揭阳13例,云浮28例)。全省新增境外输入确诊病例47例(广州46例,佛山1例)。

全省新增出院659例(境外输入31例),目前在院10982例(境外输入163例)。

截至12月21日24时,全省累计报告新冠肺炎确诊病例64114例(境外输入6777例)。

个人感受

12月20号刚写完这篇文章,身体就觉得一股不同于以往的凉意。再过两个小时,也发烧了。当天晚上10点,体温升高到39.5度。前几天只买了小孩的翻退烧药美林。只好将就,喝了10ml美林。谁说的90%无症状,出来解释一下。
第二天早上去社康看发热门诊。社康门口排长龙,前面有超过50人排队。护士检查了体温,还高于38.6度,给了一盒新冠抗原。等待的时候咳嗽声此起彼伏,于是转去药店。有两个人发烧,在药店买到了4粒布洛芬胶囊。药店防囤货做得不错,每人限买两粒。买到了退烧药,赶紧回家。
第三天退烧了。但是肚子有点不舒服,身体乏力。
第四天嘴唇上长疱疹,再去社康。早上9点,刚开门半小时,排队号已经超过40人。社康门口贴了多个招牌,说没有退烧药,止咳药只剩几瓶。有一对年轻人说连续两天高烧39度,希望输液,被护士拒绝。
希望国泰民安。

posted @ 2022-12-20 16:09  HankFu  阅读(1372)  评论(0编辑  收藏  举报