cluster light Vulkan实现(cpu)
准备工作
以下代码基于开源项目vulkanExample中pbr部分
1、创建多光源场景
设置均匀分布的64个光源,并绘制光源形状
为简化操作,光源形状为球形,并设置衰减
void updateLights()
{
const float p = 15.0f; // 空间范围参数
const int gridSize = static_cast<int>(ceil(sqrt(static_cast<float>(maxnumLights)))); // 计算网格大小,例如 8x8
const float spacing = 2.0f * p / (gridSize - 1); // 每个光源之间的间距
int lightIndex = 0;
for (int y = 0; y < gridSize && lightIndex < maxnumLights; y++) {
for (int x = 0; x < gridSize && lightIndex < maxnumLights; x++) {
// 计算光源位置
float posX = -p + x * spacing; // 从 -p 到 p
float posZ = -p + y * spacing; // 从 -p 到 p
float posY = -p * 0.5f; // 固定高度
uboParams.lights[lightIndex].position = glm::vec4(posX, posY, posZ, 1.0f);
// 设置光源颜色(循环使用几种颜色)
glm::vec3 color;
switch (lightIndex % 4) {
case 0: color = glm::vec3(1.0f, 0.0f, 0.0f); break; // 红
case 1: color = glm::vec3(0.0f, 1.0f, 0.0f); break; // 绿
case 2: color = glm::vec3(0.0f, 0.0f, 1.0f); break; // 蓝
case 3: color = glm::vec3(1.0f, 1.0f, 0.0f); break; // 黄
}
uboParams.lights[lightIndex].colorAndRadius = glm::vec4(color, 15.1f);
// 设置方向(可选,指向原点)
glm::vec3 direction = glm::normalize(glm::vec3(0.0f, 0.0f, 0.0f) - glm::vec3(posX, posY, posZ));
uboParams.lights[lightIndex].direction = glm::vec4(direction, 1.0f);
// 设置截止角度(保持与原代码一致)
uboParams.lights[lightIndex].cutOff = glm::vec4(12.5f, 18.5f, 0.0f, 0.0f);
lightIndex++;
}
}
设置衰减
float radiance(float radius, float3 lightVec, float3 N,float3 L )
{
float distance=length(lightVec);
// 半径范围裁剪
if(distance>radius) return 0.0;
//计算衰减
float attenuation = pow(clamp(1.0-distance / radius,0.0,1.0), 2.0);
float dotNL=max(dot(N,L),0.0);
return attenuation*dotNL;
}

2、确定初始渲染方式和着色模型
初始渲染方式,逐个像素遍历所有光源
着色模型 BRDF
float3 BRDF(float3 L, float3 V, float3 N, float metallic, float roughness)
{
// Precalculate vectors and dot products
float3 H = normalize (V + L);
float dotNV = clamp(dot(N, V), 0.0, 1.0);
float dotNL = clamp(dot(N, L), 0.0, 1.0);
float dotLH = clamp(dot(L, H), 0.0, 1.0);
float dotNH = clamp(dot(N, H), 0.0, 1.0);
// Light color fixed
float3 lightColor = float3(1.0, 1.0, 1.0);
float3 color = float3(0.0, 0.0, 0.0);
if (dotNL > 0.0)
{
float rroughness = max(0.05, roughness);
// D = Normal distribution (Distribution of the microfacets)
float D = D_GGX(dotNH, roughness);
// G = Geometric shadowing term (Microfacets shadowing)
float G = G_SchlicksmithGGX(dotNL, dotNV, rroughness);
// F = Fresnel factor (Reflectance depending on angle of incidence)
float3 F = F_Schlick(dotNV, metallic);
float3 spec = D * F * G / (4.0 * dotNL * dotNV);
color += spec * dotNL * lightColor;
}
return color;
}
具体工作
3、确定cluster light要使用的数据结构并定义
3.1 场景分割
将视锥体划分为三维网格(x, y, z 维度),每个网格单元称为“集群”(Cluster)。
确定每个维度划分的精细度,并计算得到cluster的总数
// C++ 端:cluster的精细度
const uint32_t CLUSTER_SIZE_X = 16; // 屏幕宽度方向的集群数
const uint32_t CLUSTER_SIZE_Y = 16; // 屏幕高度方向的集群数
const uint32_t CLUSTER_SIZE_Z = 16; // 深度方向的集群数
const uint32_t TOTAL_CLUSTERS = CLUSTER_SIZE_X * CLUSTER_SIZE_Y * CLUSTER_SIZE_Z;
// GLSL 端
#define CLUSTER_SIZE_X 16
#define CLUSTER_SIZE_Y 16
#define CLUSTER_SIZE_Z 16
3.2 cluster 光源映射缓冲区buffer
需要一个缓冲区来存储每一个cluster的光源数量和光源索引。
// C++ 端:集群光源数据结构
struct ClusterLightData {
uint32_t lightCount; // 当前集群影响的光源数量
uint32_t lightOffset; // 在全局光源索引列表中的起始偏移
};
struct ClusterData {
std::vector<ClusterLightData> clusters; // TOTAL_CLUSTERS 个元素
std::vector<uint32_t> lightIndexList; // 全局光源索引列表
};
定义 ClusterLightData这个结构体
lightCount为cluster中的光源数量,lightoffset为全局索引列表中的起始偏移,该数据结构与Tiled-based Light Culling类似

3.3 Uniform Buffer内容数据定义
需要为cluster的数据传输做准备,考虑在cpu端完成数据的处理和存储,以Uniform Buffer更新的方式实现。为此,需要先确定数据内容的大小和格式。
由于整个过程不涉及浮点运算,所以与此前定义相同,均采用unit32_t格式。
cpp
struct UBOParams {
Light lights[maxnumLights]; // 光源数组
uint32_t clusterLightCounts[TOTAL_CLUSTERS]; // 每个集群的光源数量
uint32_t clusterLightOffsets[TOTAL_CLUSTERS]; // 每个集群的偏移量
uint32_t lightIndexList[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表(假设每个集群最多影响所有光源)
}uboParams;
hlsl
struct UBOShared {
Light lights[NUM_LIGHTS]; // 光源数组
uint32_t clusterLightCounts[TOTAL_CLUSTERS]; // 每个集群的光源数量
uint32_t clusterLightOffsets[TOTAL_CLUSTERS]; // 每个集群的偏移量
uint32_t lightIndexList[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表(假设每个集群最多影响所有光源)
};
cbuffer uboParams : register(b1) { UBOShared uboParams; };
在已知光源均匀分布的情况下,此处lightIndexList的计算可能造成空间浪费,但影响不大。
要尤其注意整个部分的数据包含关系
最顶层,即第一层为uniform Buffers。
uniformBuffers 的成员:
object:存储矩阵数据(uboMatrices)。
params:存储光源数据(uboParams)。
struct{
vks::Buffer object;//存储变换矩阵和摄像机位置的缓冲区
vks::Buffer params;//存储光源属性
}
第二层为uniformBuffers.object和uniformBuffers.params
分别为
struct UBOMatrices {
glm::mat4 projection; // 投影矩阵
glm::mat4 model; // 模型矩阵
glm::mat4 view; // 视图矩阵
glm::vec3 camPos; // 摄像机位置
} uboMatrices;
struct Light {
glm::vec4 position; //光源位置 16bt
glm::vec4 colorAndRadius;//光源属性,前三个表示颜色,最后一个表示radiance
glm::vec4 direction;
glm::vec4 cutOff; //outercutoff cutoff minimum pow
};
struct UBOParams {
Light lights[maxnumLights]; // 光源数组
uint32_t clusterLightCounts[TOTAL_CLUSTERS]; // 每个集群的光源数量
uint32_t lightIndexList[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表(假设每个集群最多影响所有光源)
}uboParams;
3.4 更新描述符集并更新
此处可以在原有light属性上增加,但考虑到cluster相关数据的数据量较大,使用uniform buffer可能造成性能问题,可以考虑使用SSBO
void setupDescriptors()
{
// 1 Pool 描述符池创建
/*参数说明
VkDescriptorPoolSize:定义了描述符池中每种类型的描述符数量。
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:指定描述符类型为Uniform Buffer。
4:表示该描述符池可以分配4个Uniform Buffer类型的描述符。
*/
std::vector<VkDescriptorPoolSize> poolSizes = {
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 4),
};
/*
VkDescriptorPoolCreateInfo:描述符池的创建信息。
poolSizes:描述符池的大小和类型。
2:描述符池可以分配的描述符集数量。
*/
VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 2);
//创建描述符池 (Descriptor Pool)
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
//2 创建描述符集布局(Descriptor Set Layout)
// VkDescriptorSetLayoutBinding:描述符集布局的绑定点信息。
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 1),
};
/*
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:描述符类型为Uniform Buffer。
VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT:描述符在顶点和片段着色器中可用。
nullptr:描述符的immutable samplers(通常为nullptr)。
0:绑定点索引。
1:每个绑定点可以有一个描述符。
*/
VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout));
// 3 Set // 分配描述符集
VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
//device 是gpu
/*
VkDescriptorSetAllocateInfo:描述符集分配信息。
descriptorPool:描述符池。
&descriptorSetLayout:描述符集布局。
1:分配一个描述符集。
vkAllocateDescriptorSets:从描述符池中分配描述符集
*/
//4 Update 更新描述符集
std::vector<VkWriteDescriptorSet> writeDescriptorSets = {
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniformBuffers.object.descriptor),
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, &uniformBuffers.params.descriptor),
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, NULL);
/*
VkWriteDescriptorSet:描述符集更新信息。
descriptorSet:要更新的描述符集。
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:描述符类型。
0 或 1:绑定点索引。
&uniformBuffers.object.descriptor 或 &uniformBuffers.params.descriptor:描述符的缓冲区信息。
vkUpdateDescriptorSets:更新描述符集。
*/
}
4、
4.1 创建Uniform Buffer
在prepareUniformBuffers中扩展uniformBuffers.params,加入cluster内容数据的存储。
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.params,
sizeof(uboParams)
));
/*
VK_CHECK_RESULT(uniformBuffers.params.map());
vulkanDevice->createBuffer:Vulkan工具库中的辅助函数,封装了vkCreateBuffer(创建缓冲区)和vkAllocateMemory(分配内存)的调用。
参数1:VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,指定缓冲区用途为Uniform Buffer,表示其数据将传递给着色器。
参数2:VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT:
HOST_VISIBLE:允许CPU直接映射和访问缓冲区内存。
HOST_COHERENT:确保CPU写入后,GPU自动可见,无需手动同步。
参数3:&uniformBuffers.object或&uniformBuffers.params,指向缓冲区对象,创建后存储句柄和元数据。
参数4:sizeof(uboMatrices)(约208字节)和sizeof(uboParams)(256字节),根据数据结构大小分配缓冲区。
VK_CHECK_RESULT:宏,用于检查Vulkan函数调用是否成功,若失败则抛出异常。
*/
此处可借用原本uboParams的相关定义,确保后续数据对齐即可。
4.2 加载光源数据,更新uniform buffer
创建函数updateLightsCluster,处理并存储cluster信息
void updateLights() {
ClusterData clusterData;
clusterData.clusters.resize(TOTAL_CLUSTERS);
clusterData.lightIndexList.clear();
// 视锥体参数
glm::mat4 invViewProj = glm::inverse(uboMatrices.projection * uboMatrices.view);
float zNear = 0.1f;
float zFar = 256.0f;
for (int i = 0; i < maxnumLights; i++) {
glm::vec4 lightPos = uboParams.lights[i].position;
float radius = uboParams.lights[i].colorAndRadius.w;
// 转换到 NDC 空间
glm::vec4 clipPos = uboMatrices.projection * uboMatrices.view * lightPos;
clipPos /= clipPos.w;
// 转换为集群坐标
uint32_t clusterX = static_cast<uint32_t>((clipPos.x * 0.5f + 0.5f) * CLUSTER_SIZE_X);
uint32_t clusterY = static_cast<uint32_t>((clipPos.y * 0.5f + 0.5f) * CLUSTER_SIZE_Y);
float depth = (clipPos.z * 0.5f + 0.5f) * (zFar - zNear) + zNear;
uint32_t clusterZ = static_cast<uint32_t>((log(depth / zNear) / log(zFar / zNear)) * CLUSTER_SIZE_Z);
// 限制范围
clusterX = glm::clamp(clusterX, 0u, CLUSTER_SIZE_X - 1);
clusterY = glm::clamp(clusterY, 0u, CLUSTER_SIZE_Y - 1);
clusterZ = glm::clamp(clusterZ, 0u, CLUSTER_SIZE_Z - 1);
// 添加到集群
uint32_t clusterIdx = clusterZ * CLUSTER_SIZE_X * CLUSTER_SIZE_Y + clusterY * CLUSTER_SIZE_X + clusterX;
clusterData.clusters[clusterIdx].lightCount++;
clusterData.clusters[clusterIdx].lightOffset = static_cast<uint32_t>(clusterData.lightIndexList.size());
clusterData.lightIndexList.push_back(i);
}
// 更新 UBOParams
for (uint32_t i = 0; i < TOTAL_CLUSTERS; i++) {
uboParams.clusterLightCounts[i] = clusterData.clusters[i].lightCount;
}
memcpy(uboParams.lightIndexList, clusterData.lightIndexList.data(), clusterData.lightIndexList.size() * sizeof(uint32_t));
memcpy(uniformBuffers.params.mapped, &uboParams, sizeof(uboParams));
}
在updateLights后运行
virtual void render()
{
if (!prepared) return; // 如果未准备好,直接返回
updateUniformBuffers(); // 更新矩阵缓冲区
if (!paused) { updateLights(); updateLightsCluster(); } // 如果未暂停,更新光源
draw(); // 绘制帧
}
4.3 应用cluster数据着色
踩坑记录
1、数据对齐
在vulkan中,数据对齐的主要体现是内存对齐。根据硬件(通常是gpu,受显存位宽等因素影响)的不同,某些数据必须存储在16字节,64字节的整数倍地址上。
数据没有正确对齐时,可能会导致性能下降、硬件错误,某些地址无法访问、数据一致性被破坏
在vulkan中内存对齐主要在以下几个场景
(1)缓冲区(Buffer):存储顶点、索引数据、统一缓冲区(UBO)等
(2)图像(Image):纹理数据的存储和访问
(3)描述符集(Descriptor Sets):着色器资源绑定
(4)推送常量(Push Constants):快速传递少量数据到着色器
(5)内存分配:通过vkAllocateMemory分配的设备内存
2.1 缓冲区对齐(Buffer Alignment)
创建缓冲区时的对齐:
当使用vkCreateBuffer创建缓冲区时,Vulkan会返回VkMemoryRequirements结构体,其中包含alignment字段,指定缓冲区内存的起始地址必须满足的对齐要求。
例如,alignment可能是64字节(常见于统一缓冲区)或256字节(某些专用硬件)。
开发者必须确保分配的内存满足这个对齐要求,通常通过vkAllocateMemory或内存分配器(如VMA)来处理。
偏移对齐:
如果一个缓冲区被划分为多个子区域(如动态统一缓冲区),每个子区域的偏移量(offset)必须满足、VkPhysicalDeviceLimits::minUniformBufferOffsetAlignment或minStorageBufferOffsetAlignment。
例如,如果minUniformBufferOffsetAlignment是64字节,那么统一缓冲区的每个动态绑定偏移量必须是64的倍数。
1.2 统一缓冲区(Uniform Buffer)和存储缓冲区(Storage Buffer)
标量对齐(Scalar Alignment):
Vulkan遵循std140或std430布局规则(由GLSL/HLSL定义),具体取决于缓冲区类型。
std140(常用于统一缓冲区):
每个标量(如float、int)对齐到4字节。
向量(如vec3、vec4)对齐到16字节(即使vec3只占用12字节,也会填充到16字节)。
数组的每个元素对齐到其类型的对齐边界。例如,float[4]数组的每个元素对齐到4字节,而vec4[4]数组的每个元素对齐到16I字节。
结构体对齐到其最大成员的对齐边界(通常是16字节)。
std430(常用于存储缓冲区):
比std140更紧凑,vec3不再填充到16字节,而是按实际大小(12字节)对齐。
数组和结构体的对齐更灵活,减少了填充。
glsl中,可对对齐规则进行显示声明布局
layout(std140, binding = 0) uniform UBO {
vec4 data1;
vec3 data2;
float data3;
} ubo;
动态统一缓冲区:
动态统一缓冲区允许多个对象共享一个缓冲区,但每个对象的偏移量必须满足minUniformBufferOffsetAlignment。
例如,如果硬件要求64字节对齐,开发者需要手动在缓冲区中添加填充。
在软件RenderDoc中,可以截取某一帧,查看cpu和gpu端的数据传输情况

可以借此判断数据传输是否符合要求,同时可以查看相关变量的值是否正确。
起初,我在完成定义完相关数据后,更新数据后,在数据对齐上出现问题。但误以为程序逻辑存在问题,反复检查无果。
(1)对于单个数据而言,满足16字节对齐即可,例如:
struct UBO {
glm::mat4 model; // 64字节
glm::vec4 color; // 16字节
// 总大小:80字节
};
(2)对于大型连续数组而言,单个数组也需要满足16字节,不能在定义完成后发现数据没有对齐,直接padding补全所有空缺内存。
此过程,可使用结构体嵌套,通过两层结构体完成定义。因此,本部分关于cluster数据的相关定义如下
// 修正后的集群数据结构
struct ClusterCountsandOffsets {
struct Cluster {
uint32_t count; // 4 字节
uint32_t offset; // 4 字节
float padding[2]; // 8 字节,确保 16 字节对齐
};
Cluster cluster[TOTAL_CLUSTERS];
};
struct ClusterIndexList {
struct Indices {
uint32_t clusterIndexList; // 4 字节
float padding[3]; // 12 字节,确保 16 字节对齐
};
Indices indices[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表
};
确保单个传输最小单位的大小为16字节的整数倍即可。但是ClusterIndexList 的定义浪费了大量内存。
如果进一步追求性能,可以考虑
struct ClusterIndexList{
struct Indices{
unit32_t clusterIndexList[4]//4+4=16字节
}
Indices indices[(maxnumLights*TOTAL_CLUSTERS+3)/4];//向上取整
}
也可以尝试使用aligans(16)强制分配内存,但效果未知,结构体嵌套时可能不起作用,且内存不明不白,不建议使用
alignas(16) struct Indices {
uint32_t clusterIndexList; // 4 字节
// 由于 alignas(16),结构体的大小会被自动调整为16字节的倍数
};
传输效率比之前高很多,不过由于每个Indices包含四个数组,因此访问逻辑需要多一个转换。考虑简单的实现,此次代码不使用这种方式;
大坑
struct ClusterIndexList {
uint32_t clusterIndexList[maxnumLights * TOTAL_CLUSTERS];
};
实际情况中,这种方式,会使得每个4字节的数组被对齐为16字节,使得数据传输发生错误。即使padding上不足的数据,但由于传输过程中数据结构已经被破坏,其数值已经无法使用了。
关于传输cluster相关变量的总结:
(1)定义合适的结构体数据
// 新的 ClusterIndexList
struct ClusterIndexList {
struct Indices {
uint32_t clusterIndexList; // 4 字节
float padding[3]; // 12 字节 (3 个 float,每个 4 字节)
// 总大小:4 + 12 = 16 字节,对齐正确
};
Indices indices[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表
};
(2)更新类成员变量
class VulkanExample:public VulkanExampleBase{
public:
struct{
vks::Buffer object;
vks::Buffer params;
vks::Buffer clusterIndexList;//加入成员变量
}uniformBuffers;
UBOParams uboParams;
ClusterIndexList clusrerIndexList;//创建类实例,用于后续绑定操作
}
(3)更新析构函数
~VulkanExample() {
if (device) {
vkDestroyPipeline(device, pipeline, nullptr);
vkDestroyPipelineLayout(device, pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
uniformBuffers.object.destroy();
uniformBuffers.params.destroy();
uniformBuffers.clusterData.destroy();
uniformBuffers.clusterIndexList.destroy(); // 更新名称
}
}
(4)更新prepareUniformBuffers()
调整缓冲区创建和映射:
void prepareUniformBuffers() {
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
VkDeviceSize minAlignment = properties.limits.minUniformBufferOffsetAlignment;
VkDeviceSize alignedSizeClusterIndexList = ((sizeof(clusterIndexList) + minAlignment - 1) / minAlignment) * minAlignment; // 更新变量名
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.object,
sizeof(uboMatrices)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.params,
sizeof(uboParams)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.clusterData,
sizeof(clusterData)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.clusterIndexList, // 更新名称
alignedSizeClusterIndexList)); // 更新大小变量
VK_CHECK_RESULT(uniformBuffers.object.map());
VK_CHECK_RESULT(uniformBuffers.params.map());
VK_CHECK_RESULT(uniformBuffers.clusterData.map());
VK_CHECK_RESULT(uniformBuffers.clusterIndexList.map()); // 更新名称
}
其中VkDeviceSize alignedSizeClusterIndexList = ((sizeof(clusterIndexList) + minAlignment - 1) / minAlignment) * minAlignment;可以实现硬件级的缓冲区对齐,
例如,假设最小内存块minAlignment 为64字节,sizeof clusterIndexList为4000字节。那么alignedSizeClusterIndexList = ((4000 + 64 - 1) / 64) * 64 = (4063 / 64) * 64 = 64 * 64 = 4096 字节
但是,这并不能解决元素级别的对齐问题。alignedSizeClusterIndexList 的计算 无法解决 std140 的元素对齐问题,因为它只影响缓冲区总大小,而不改变 ClusterIndexList 内部的内存布局(仍为 4 字节/元素)。
(5)更新setupDescriptors()
选择合适的绑定点,更新缓冲区名称
void setupDescriptors() {
std::vector<VkDescriptorPoolSize> poolSizes = {
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 4),
};
VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 2);
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 1),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 2),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 3),
};
VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout));
VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
std::vector<VkWriteDescriptorSet> writeDescriptorSets = {
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniformBuffers.object.descriptor),
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, &uniformBuffers.params.descriptor),
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &uniformBuffers.clusterIndexList.descriptor), // 更新名称
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3, &uniformBuffers.clusterData.descriptor),
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, NULL);
}
上述代码中,uniformBuffer数量为四个,除了第一个顶点数据需要再顶点和片段着色器中均可见外,其他三个变量满足着色器中可见即可-
2:
表示 maxSets,即描述符池可以分配的最大描述符集(Descriptor Set)数量。
描述符集是将多个描述符绑定到着色器资源(如缓冲区、纹理)的容器。maxSets = 2 意味着池子最多支持 2 个描述符集。
(6)更新UpdateLightCluater()
修改对索引列表的访问,使用新的 clusterIndexList 结构:
2.缓冲区
1. 为什么需要创建临时缓冲区(暂存缓冲区)?
1.1. GPU和CPU内存的差异
CPU内存(主机内存):CPU可以直接访问,但GPU访问主机内存的效率较低。
GPU内存(设备内存):GPU可以高效访问,但CPU直接访问设备内存的效率较低。
因此,直接将数据从CPU内存传输到GPU内存是不现实的,因为这会导致性能瓶颈。
1.2. 暂存缓冲区的作用
暂存缓冲区(staging buffer)是一种特殊的缓冲区,用于在CPU和GPU之间高效地传输数据。它的特点包括:
主机可见(Host Visible):CPU可以直接访问和写入数据。
设备本地(Device Local):GPU可以高效访问,但CPU不能直接写入。
暂存缓冲区的典型使用流程如下:
创建暂存缓冲区:在主机可见的内存中创建一个缓冲区,用于存储要传输的数据。
将数据从CPU复制到暂存缓冲区:CPU将数据写入暂存缓冲区。
将数据从暂存缓冲区复制到设备本地缓冲区:使用命令缓冲区将数据从暂存缓冲区复制到设备本地的顶点缓冲区或索引缓冲区。
销毁暂存缓冲区:数据传输完成后,暂存缓冲区不再需要,可以销毁以释放内存。
2. 为什么用完后要销毁暂存缓冲区?
2.1. 内存管理
避免内存泄漏:如果不销毁暂存缓冲区,内存将一直被占用,导致内存泄漏,影响程序的性能和稳定性。
资源回收:销毁暂存缓冲区可以释放内存资源,供其他操作使用。
2.2. 性能优化
减少内存占用:暂存缓冲区通常只在数据传输过程中需要,传输完成后不再使用。及时销毁可以减少内存占用,提高程序的性能。
避免不必要的开销:保留不再需要的缓冲区会增加内存管理的复杂性,可能导致不必要的性能开销。
3. 代码示例和解释
cpp
复制
// 创建暂存缓冲区
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // 使用标志:传输源
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, // 内存属性:主机可见、缓存一致
&stagingVertexBuffer, // 暂存缓冲区对象
vertexBufferSize, // 缓冲区大小
vertices.data() // 数据来源
));
// 创建设备本地缓冲区(顶点缓冲区)
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, // 使用标志:顶点缓冲区、传输目标
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, // 内存属性:设备本地
&uniformBuffers.sphereVertex, // 顶点缓冲区对象
vertexBufferSize // 缓冲区大小
));
// 将数据从暂存缓冲区复制到设备本地缓冲区
vulkanDevice->copyBuffer(&stagingVertexBuffer, &uniformBuffers.sphereVertex, queue);
// 销毁暂存缓冲区,释放内存
stagingVertexBuffer.destroy();
4. 总结
创建和销毁暂存缓冲区是Vulkan中数据传输的一个重要步骤:
创建暂存缓冲区:用于将数据从CPU高效地传输到GPU。
销毁暂存缓冲区:释放不再需要的内存资源,避免内存泄漏和性能问题。
在Vulkan中,暂存缓冲区(staging buffer)的内存分配取决于其内存属性。暂存缓冲区通常具有以下内存属性:
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT:表示该内存对主机(CPU)可见,主机可以直接访问。
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT:表示主机和设备之间的内存访问是缓存一致的,不需要显式地刷新或无效化缓存。
暂存缓冲区的内存来源
暂存缓冲区的内存通常从 系统内存(主机内存) 中分配,而不是从显存(设备内存)中分配。这是因为:
主机可见性:暂存缓冲区需要主机可以直接访问,因此必须分配在主机可见的内存中。
数据传输:暂存缓冲区用于将数据从主机传输到设备,因此它需要能够高效地与主机交互。
内存堆(Memory Heaps)
在Vulkan中,内存分配是从内存堆(memory heaps)中进行的。内存堆可以分为:
主机可见堆(Host-Visible Heap):通常对应系统内存,主机可以直接访问。
设备本地堆(Device-Local Heap):通常对应显存,设备可以高效访问。
暂存缓冲区的内存通常从 主机可见堆 中分配,因为它需要主机可以直接访问。
内存类型(Memory Types)
Vulkan 提供了多种内存类型,每种内存类型具有不同的属性。在创建缓冲区时,需要选择一个满足所需属性的内存类型。例如:
主机可见(Host-Visible):内存类型支持主机直接访问。
主机缓存一致(Host-Coherent):主机和设备之间的内存访问是缓存一致的。
暂存缓冲区的内存类型选择需要满足 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT 和 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT 属性。
暂存缓冲区的创建流程
以下是暂存缓冲区的创建和使用流程:
创建缓冲区对象:
指定缓冲区的使用标志(如 VK_BUFFER_USAGE_TRANSFER_SRC_BIT)。
指定内存属性(如 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)。
分配内存并绑定到缓冲区。
将数据从主机复制到暂存缓冲区:
使用 vkMapMemory 将内存映射到主机地址空间。
将数据复制到映射的内存区域。
使用 vkUnmapMemory 解除映射。
将数据从暂存缓冲区复制到设备本地缓冲区:
使用命令缓冲区和 vkCmdCopyBuffer 将数据从暂存缓冲区复制到设备本地缓冲区。
销毁暂存缓冲区:
数据传输完成后,销毁暂存缓冲区以释放内存。


完整c++代码为
/*
* Vulkan 示例 - 基于物理的着色基础
* 参考:http://graphicrants.blogspot.de/2013/08/specular-brdf-reference.html
* 版权所有 (C) 2017-2024 Sascha Willems - www.saschawillems.de
* MIT 许可证
*/
#include "vulkanexamplebase.h"
#include "VulkanglTFModel.h"
const int maxnumLights = 64; // 最大光源数量
// 集群维度定义
const uint32_t CLUSTER_SIZE_X = 4; // 屏幕宽度方向集群数
const uint32_t CLUSTER_SIZE_Y = 4; // 屏幕高度方向集群数
const uint32_t CLUSTER_SIZE_Z = 2; // 深度方向集群数
const uint32_t TOTAL_CLUSTERS = CLUSTER_SIZE_X * CLUSTER_SIZE_Y * CLUSTER_SIZE_Z;
const uint32_t lightIndexListnum = maxnumLights * TOTAL_CLUSTERS;
// 材质定义
struct Material {
struct PushBlock {
float roughness; // 粗糙度
float metallic; // 金属度
float r, g, b; // RGB 颜色
} params{};
std::string name;
Material() {}
Material(std::string n, glm::vec3 c, float r, float m) : name(n) {
params.roughness = r;
params.metallic = m;
params.r = c.r;
params.g = c.g;
params.b = c.b;
}
};
// 光源结构体
struct Light {
glm::vec4 position; // 位置
glm::vec4 colorAndRadius; // 颜色和半径
glm::vec4 direction; // 方向
glm::vec4 cutOff; // 截止角度等参数
};
// 修正后的集群数据结构
struct ClusterCountsandOffsets {
struct Cluster {
uint32_t count; // 4 字节
uint32_t offset; // 4 字节
float padding[2]; // 8 字节,确保 16 字节对齐
};
Cluster cluster[TOTAL_CLUSTERS];
};
// 分离后的 uniform buffer 数据结构
struct UBOParams {
Light lights[maxnumLights]; // 光源数组
};
struct ClusterIndexList {
struct Indices {
uint32_t clusterIndexList; // 4 字节
float padding[3]; // 12 字节,确保 16 字节对齐
};
Indices indices[maxnumLights * TOTAL_CLUSTERS]; // 全局光源索引列表
};
class VulkanExample : public VulkanExampleBase {
public:
struct Meshes {
std::vector<vkglTF::Model> objects;
int32_t objectIndex = 0;
} models;
struct {
vks::Buffer object; // 变换矩阵和相机位置
vks::Buffer params; // 光源数据
vks::Buffer clusterData; // 集群计数和偏移数据
vks::Buffer clusterIndexList; // 全局光源索引列表
} uniformBuffers;
struct UBOMatrices {
glm::mat4 projection; // 投影矩阵
glm::mat4 model; // 模型矩阵
glm::mat4 view; // 视图矩阵
glm::vec3 camPos; // 相机位置
float padding; // 填充以对齐 16 字节
} uboMatrices;
UBOParams uboParams;
ClusterCountsandOffsets clusterData;
ClusterIndexList clusterIndexList;
VkPipelineLayout pipelineLayout{ VK_NULL_HANDLE };
VkPipeline pipeline{ VK_NULL_HANDLE };
VkDescriptorSetLayout descriptorSetLayout{ VK_NULL_HANDLE };
VkDescriptorSet descriptorSet{ VK_NULL_HANDLE };
std::vector<Material> materials;
int32_t materialIndex = 0;
std::vector<std::string> materialNames;
std::vector<std::string> objectNames;
VulkanExample() : VulkanExampleBase() {
title = "Physical based shading basics";
camera.type = Camera::CameraType::firstperson;
camera.setPosition(glm::vec3(10.0f, 13.0f, 1.8f));
camera.setRotation(glm::vec3(-62.5f, 90.0f, 0.0f));
camera.movementSpeed = 4.0f;
camera.setPerspective(60.0f, (float)width / (float)height, 0.1f, 256.0f);
camera.rotationSpeed = 0.25f;
timerSpeed *= 0.25f;
// 初始化材质
materials.push_back(Material("Gold", glm::vec3(1.0f, 0.765557f, 0.336057f), 0.1f, 1.0f));
materials.push_back(Material("Copper", glm::vec3(0.955008f, 0.637427f, 0.538163f), 0.1f, 1.0f));
materials.push_back(Material("Chromium", glm::vec3(0.549585f, 0.556114f, 0.554256f), 0.1f, 1.0f));
materials.push_back(Material("Nickel", glm::vec3(0.659777f, 0.608679f, 0.525649f), 0.1f, 1.0f));
materials.push_back(Material("Titanium", glm::vec3(0.541931f, 0.496791f, 0.449419f), 0.1f, 1.0f));
materials.push_back(Material("Cobalt", glm::vec3(0.662124f, 0.654864f, 0.633732f), 0.1f, 1.0f));
materials.push_back(Material("Platinum", glm::vec3(0.672411f, 0.637331f, 0.585456f), 0.1f, 1.0f));
materials.push_back(Material("planematerial", glm::vec3(0.955008f, 0.654864f, 0.336057f), 0.1f, 1.0f));
materials.push_back(Material("White", glm::vec3(1.0f), 0.1f, 1.0f));
materials.push_back(Material("Red", glm::vec3(1.0f, 0.0f, 0.0f), 0.1f, 1.0f));
materials.push_back(Material("Blue", glm::vec3(0.0f, 0.0f, 1.0f), 0.1f, 1.0f));
materials.push_back(Material("Black", glm::vec3(0.0f), 0.1f, 1.0f));
for (auto material : materials) {
materialNames.push_back(material.name);
}
objectNames = { "Sphere", "Teapot", "Torusknot", "Venus", "plane", "plane_circle", "" };
materialIndex = 0;
}
~VulkanExample() {
if (device) {
vkDestroyPipeline(device, pipeline, nullptr);
vkDestroyPipelineLayout(device, pipelineLayout, nullptr);
vkDestroyDescriptorSetLayout(device, descriptorSetLayout, nullptr);
uniformBuffers.object.destroy();
uniformBuffers.params.destroy();
uniformBuffers.clusterData.destroy();
uniformBuffers.clusterIndexList.destroy(); // 更新名称
}
}
void buildCommandBuffers() {
VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
VkClearValue clearValues[2];
clearValues[0].color = defaultClearColor;
clearValues[1].depthStencil = { 1.0f, 0 };
VkRenderPassBeginInfo renderPassBeginInfo = vks::initializers::renderPassBeginInfo();
renderPassBeginInfo.renderPass = renderPass;
renderPassBeginInfo.renderArea.offset.x = 0;
renderPassBeginInfo.renderArea.offset.y = 0;
renderPassBeginInfo.renderArea.extent.width = width;
renderPassBeginInfo.renderArea.extent.height = height;
renderPassBeginInfo.clearValueCount = 2;
renderPassBeginInfo.pClearValues = clearValues;
for (int32_t i = 0; i < drawCmdBuffers.size(); ++i) {
renderPassBeginInfo.framebuffer = frameBuffers[i];
VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f);
vkCmdSetViewport(drawCmdBuffers[i], 0, 1, &viewport);
VkRect2D scissor = vks::initializers::rect2D(width, height, 0, 0);
vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor);
vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &descriptorSet, 0, NULL);
Material mat = materials[materialIndex];
const uint32_t gridSize = 7;
for (uint32_t y = 0; y < gridSize; y++) {
for (uint32_t x = 0; x < gridSize; x++) {
glm::vec3 pos = glm::vec3(float(x - (gridSize / 2.0f)) * 2.5f, 0.0f, float(y - (gridSize / 2.0f)) * 2.5f);
vkCmdPushConstants(drawCmdBuffers[i], pipelineLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(glm::vec3), &pos);
vkCmdPushConstants(drawCmdBuffers[i], pipelineLayout, VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(glm::vec3), sizeof(Material::PushBlock), &mat);
models.objects[models.objectIndex].draw(drawCmdBuffers[i]);
}
}
drawUI(drawCmdBuffers[i]);
vkCmdEndRenderPass(drawCmdBuffers[i]);
VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i]));
}
}
void loadAssets() {
std::vector<std::string> filenames = { "sphere.gltf", "teapot.gltf", "torusknot.gltf", "venus.gltf", "plane.gltf", "plane_circle.gltf" };
models.objects.resize(filenames.size());
for (size_t i = 0; i < filenames.size(); i++) {
models.objects[i].loadFromFile(getAssetPath() + "models/" + filenames[i], vulkanDevice, queue,
vkglTF::FileLoadingFlags::PreTransformVertices | vkglTF::FileLoadingFlags::FlipY);
}
}
void setupDescriptors() {
std::vector<VkDescriptorPoolSize> poolSizes = {
vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 4),
};
VkDescriptorPoolCreateInfo descriptorPoolInfo = vks::initializers::descriptorPoolCreateInfo(poolSizes, 2);
VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 0),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 1),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 2),
vks::initializers::descriptorSetLayoutBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT, 3),
};
VkDescriptorSetLayoutCreateInfo descriptorLayout = vks::initializers::descriptorSetLayoutCreateInfo(setLayoutBindings);
VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &descriptorSetLayout));
VkDescriptorSetAllocateInfo allocInfo = vks::initializers::descriptorSetAllocateInfo(descriptorPool, &descriptorSetLayout, 1);
VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &descriptorSet));
std::vector<VkWriteDescriptorSet> writeDescriptorSets = {
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 0, &uniformBuffers.object.descriptor),
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, &uniformBuffers.params.descriptor),
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, &uniformBuffers.clusterIndexList.descriptor), // 更新名称
vks::initializers::writeDescriptorSet(descriptorSet, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 3, &uniformBuffers.clusterData.descriptor),
};
vkUpdateDescriptorSets(device, static_cast<uint32_t>(writeDescriptorSets.size()), writeDescriptorSets.data(), 0, NULL);
}
void preparePipelines() {
VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = vks::initializers::pipelineLayoutCreateInfo(&descriptorSetLayout, 1);
std::vector<VkPushConstantRange> pushConstantRanges = {
vks::initializers::pushConstantRange(VK_SHADER_STAGE_VERTEX_BIT, sizeof(glm::vec3), 0),
vks::initializers::pushConstantRange(VK_SHADER_STAGE_FRAGMENT_BIT, sizeof(Material::PushBlock), sizeof(glm::vec3)),
};
pipelineLayoutCreateInfo.pushConstantRangeCount = 2;
pipelineLayoutCreateInfo.pPushConstantRanges = pushConstantRanges.data();
VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, nullptr, &pipelineLayout));
VkPipelineInputAssemblyStateCreateInfo inputAssemblyState = vks::initializers::pipelineInputAssemblyStateCreateInfo(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 0, VK_FALSE);
VkPipelineRasterizationStateCreateInfo rasterizationState = vks::initializers::pipelineRasterizationStateCreateInfo(VK_POLYGON_MODE_FILL, VK_CULL_MODE_BACK_BIT, VK_FRONT_FACE_COUNTER_CLOCKWISE);
VkPipelineColorBlendAttachmentState blendAttachmentState = vks::initializers::pipelineColorBlendAttachmentState(0xf, VK_FALSE);
VkPipelineColorBlendStateCreateInfo colorBlendState = vks::initializers::pipelineColorBlendStateCreateInfo(1, &blendAttachmentState);
VkPipelineDepthStencilStateCreateInfo depthStencilState = vks::initializers::pipelineDepthStencilStateCreateInfo(VK_FALSE, VK_FALSE, VK_COMPARE_OP_LESS_OR_EQUAL);
VkPipelineViewportStateCreateInfo viewportState = vks::initializers::pipelineViewportStateCreateInfo(1, 1);
VkPipelineMultisampleStateCreateInfo multisampleState = vks::initializers::pipelineMultisampleStateCreateInfo(VK_SAMPLE_COUNT_1_BIT);
std::vector<VkDynamicState> dynamicStateEnables = { VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR };
VkPipelineDynamicStateCreateInfo dynamicState = vks::initializers::pipelineDynamicStateCreateInfo(dynamicStateEnables);
VkGraphicsPipelineCreateInfo pipelineCI = vks::initializers::pipelineCreateInfo(pipelineLayout, renderPass);
std::array<VkPipelineShaderStageCreateInfo, 2> shaderStages;
pipelineCI.pInputAssemblyState = &inputAssemblyState;
pipelineCI.pRasterizationState = &rasterizationState;
pipelineCI.pColorBlendState = &colorBlendState;
pipelineCI.pMultisampleState = &multisampleState;
pipelineCI.pViewportState = &viewportState;
pipelineCI.pDepthStencilState = &depthStencilState;
pipelineCI.pDynamicState = &dynamicState;
pipelineCI.stageCount = static_cast<uint32_t>(shaderStages.size());
pipelineCI.pStages = shaderStages.data();
pipelineCI.pVertexInputState = vkglTF::Vertex::getPipelineVertexInputState({ vkglTF::VertexComponent::Position, vkglTF::VertexComponent::Normal });
shaderStages[0] = loadShader(getShadersPath() + "pbrbasic/pbr.vert.spv", VK_SHADER_STAGE_VERTEX_BIT);
shaderStages[1] = loadShader(getShadersPath() + "pbrbasic/pbr.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT);
depthStencilState.depthWriteEnable = VK_TRUE;
depthStencilState.depthTestEnable = VK_TRUE;
VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCI, nullptr, &pipeline));
}
void prepareUniformBuffers() {
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(physicalDevice, &properties);
VkDeviceSize minAlignment = properties.limits.minUniformBufferOffsetAlignment;
VkDeviceSize alignedSizeClusterIndexList = ((sizeof(clusterIndexList) + minAlignment - 1) / minAlignment) * minAlignment; // 更新变量名
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.object,
sizeof(uboMatrices)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.params,
sizeof(uboParams)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.clusterData,
sizeof(clusterData)));
VK_CHECK_RESULT(vulkanDevice->createBuffer(
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
&uniformBuffers.clusterIndexList, // 更新名称
alignedSizeClusterIndexList)); // 更新大小变量
VK_CHECK_RESULT(uniformBuffers.object.map());
VK_CHECK_RESULT(uniformBuffers.params.map());
VK_CHECK_RESULT(uniformBuffers.clusterData.map());
VK_CHECK_RESULT(uniformBuffers.clusterIndexList.map()); // 更新名称
}
void updateUniformBuffers() {
uboMatrices.projection = camera.matrices.perspective;
uboMatrices.view = camera.matrices.view;
uboMatrices.model = glm::rotate(glm::mat4(1.0f), glm::radians(-90.0f + (models.objectIndex == 1 ? 45.0f : 0.0f)), glm::vec3(0.0f, 1.0f, 0.0f));
uboMatrices.camPos = camera.position * -1.0f;
memcpy(uniformBuffers.object.mapped, &uboMatrices, sizeof(uboMatrices));
}
void updateLights() {
const float p = 15.0f;
const int gridSize = static_cast<int>(ceil(sqrt(static_cast<float>(maxnumLights))));
const float spacing = 2.0f * p / (gridSize - 1);
int lightIndex = 0;
for (int y = 0; y < gridSize && lightIndex < maxnumLights; y++) {
for (int x = 0; x < gridSize && lightIndex < maxnumLights; x++) {
float posX = -p + x * spacing;
float posZ = -p + y * spacing;
float posY = -p*0.5f;
uboParams.lights[lightIndex].position = glm::vec4(posX, posY, posZ, 1.0f);
glm::vec3 color;
switch (lightIndex % 4) {
case 0: color = glm::vec3(1.0f, 0.0f, 0.0f); break;
case 1: color = glm::vec3(0.0f, 1.0f, 0.0f); break;
case 2: color = glm::vec3(0.0f, 0.0f, 1.0f); break;
case 3: color = glm::vec3(1.0f, 1.0f, 0.0f); break;
}
uboParams.lights[lightIndex].colorAndRadius = glm::vec4(color, 15.0f);
glm::vec3 direction = glm::normalize(glm::vec3(0.0f, 0.0f, 0.0f) - glm::vec3(posX, posY, posZ));
uboParams.lights[lightIndex].direction = glm::vec4(direction, 1.0f);
uboParams.lights[lightIndex].cutOff = glm::vec4(12.5f, 18.5f, 0.0f, 0.0f);
lightIndex++;
}
}
if (!paused) {
for (int i = 0; i < maxnumLights; i++) {
uboParams.lights[i].position.x += sin(glm::radians(timer * 360.0f)) * 0.1f;
uboParams.lights[i].position.z += cos(glm::radians(timer * 360.0f)) * 0.1f;
}
}
memcpy(uniformBuffers.params.mapped, &uboParams, sizeof(uboParams));
}
void updateLightsCluster() {
for (uint32_t i = 0; i < TOTAL_CLUSTERS; i++) {
clusterData.cluster[i].count = 0;
clusterData.cluster[i].offset = 0;
}
memset(clusterIndexList.indices, 0, sizeof(clusterIndexList.indices)); // 清零整个结构,包括 padding
glm::mat4 viewProj = uboMatrices.projection * uboMatrices.view;
float zNear = 0.1f;
float zFar = 256.0f;
std::vector<std::vector<bool>> assignedLights(TOTAL_CLUSTERS, std::vector<bool>(maxnumLights, false));
for (int lightIdx = 0; lightIdx < maxnumLights; lightIdx++) {
Light& light = uboParams.lights[lightIdx];
float radius = light.colorAndRadius.w;
glm::vec4 clipPos = viewProj * light.position;
if (clipPos.w <= 0.0f) continue;
float ndcX = clipPos.x / clipPos.w;
float ndcY = clipPos.y / clipPos.w;
float ndcZ = clipPos.z / clipPos.w;
float radiusNDC = radius / clipPos.w;
radiusNDC = glm::min(radiusNDC, 0.5f);
//if (ndcX < -1.0f - radiusNDC || ndcX > 1.0f + radiusNDC ||
// ndcY < -1.0f - radiusNDC || ndcY > 1.0f + radiusNDC ||
// ndcZ < 0.0f - radiusNDC || ndcZ > 1.0f + radiusNDC) {
// continue;
//}
float minX = glm::clamp(ndcX - radiusNDC, -1.0f, 1.0f);
float maxX = glm::clamp(ndcX + radiusNDC, -1.0f, 1.0f);
float minY = glm::clamp(ndcY - radiusNDC, -1.0f, 1.0f);
float maxY = glm::clamp(ndcY + radiusNDC, -1.0f, 1.0f);
float minZ = glm::clamp(ndcZ - radiusNDC, 0.0f, 1.0f);
float maxZ = glm::clamp(ndcZ + radiusNDC, 0.0f, 1.0f);
minZ = glm::max(minZ, 0.0001f);
maxZ = glm::max(maxZ, 0.0001f);
uint32_t minClusterX = static_cast<uint32_t>((minX * 0.5f + 0.5f) * CLUSTER_SIZE_X);
uint32_t maxClusterX = static_cast<uint32_t>((maxX * 0.5f + 0.5f) * CLUSTER_SIZE_X);
uint32_t minClusterY = static_cast<uint32_t>((minY * 0.5f + 0.5f) * CLUSTER_SIZE_Y);
uint32_t maxClusterY = static_cast<uint32_t>((maxY * 0.5f + 0.5f) * CLUSTER_SIZE_Y);
uint32_t minClusterZ = static_cast<uint32_t>((log(minZ * (zFar - zNear) + zNear) / log(zFar / zNear)) * CLUSTER_SIZE_Z);
uint32_t maxClusterZ = static_cast<uint32_t>((log(maxZ * (zFar - zNear) + zNear) / log(zFar / zNear)) * CLUSTER_SIZE_Z);
minClusterX = glm::clamp(minClusterX, 0u, CLUSTER_SIZE_X - 1);
maxClusterX = glm::clamp(maxClusterX, 0u, CLUSTER_SIZE_X - 1);
minClusterY = glm::clamp(minClusterY, 0u, CLUSTER_SIZE_Y - 1);
maxClusterY = glm::clamp(maxClusterY, 0u, CLUSTER_SIZE_Y - 1);
minClusterZ = glm::clamp(minClusterZ, 0u, CLUSTER_SIZE_Z - 1);
maxClusterZ = glm::clamp(maxClusterZ, 0u, CLUSTER_SIZE_Z - 1);
for (uint32_t z = minClusterZ; z <= maxClusterZ; ++z) {
for (uint32_t y = minClusterY; y <= maxClusterY; ++y) {
for (uint32_t x = minClusterX; x <= maxClusterX; ++x) {
uint32_t clusterIdx = z * CLUSTER_SIZE_X * CLUSTER_SIZE_Y + y * CLUSTER_SIZE_X + x;
if (!assignedLights[clusterIdx][lightIdx] && clusterData.cluster[clusterIdx].count < maxnumLights) {
clusterData.cluster[clusterIdx].count++;
assignedLights[clusterIdx][lightIdx] = true;
}
}
}
}
}
uint32_t runningSum = 0;
for (uint32_t i = 0; i < TOTAL_CLUSTERS; i++) {
clusterData.cluster[i].offset = runningSum;
runningSum += clusterData.cluster[i].count;
}
std::vector<uint32_t> tempOffsets(TOTAL_CLUSTERS, 0);
for (int lightIdx = 0; lightIdx < maxnumLights; lightIdx++) {
Light& light = uboParams.lights[lightIdx];
float radius = light.colorAndRadius.w;
glm::vec4 clipPos = viewProj * light.position;
if (clipPos.w <= 0.0f) continue;
float ndcX = clipPos.x / clipPos.w;
float ndcY = clipPos.y / clipPos.w;
float ndcZ = clipPos.z / clipPos.w;
float radiusNDC = radius / clipPos.w;
radiusNDC = glm::min(radiusNDC, 0.5f);
if (ndcX < -1.0f - radiusNDC || ndcX > 1.0f + radiusNDC ||
ndcY < -1.0f - radiusNDC || ndcY > 1.0f + radiusNDC ||
ndcZ < 0.0f - radiusNDC || ndcZ > 1.0f + radiusNDC) {
continue;
}
float minX = glm::clamp(ndcX - radiusNDC, -1.0f, 1.0f);
float maxX = glm::clamp(ndcX + radiusNDC, -1.0f, 1.0f);
float minY = glm::clamp(ndcY - radiusNDC, -1.0f, 1.0f);
float maxY = glm::clamp(ndcY + radiusNDC, -1.0f, 1.0f);
float minZ = glm::clamp(ndcZ - radiusNDC, 0.0f, 1.0f);
float maxZ = glm::clamp(ndcZ + radiusNDC, 0.0f, 1.0f);
minZ = glm::max(minZ, 0.0001f);
maxZ = glm::max(maxZ, 0.0001f);
uint32_t minClusterX = static_cast<uint32_t>((minX * 0.5f + 0.5f) * CLUSTER_SIZE_X);
uint32_t maxClusterX = static_cast<uint32_t>((maxX * 0.5f + 0.5f) * CLUSTER_SIZE_X);
uint32_t minClusterY = static_cast<uint32_t>((minY * 0.5f + 0.5f) * CLUSTER_SIZE_Y);
uint32_t maxClusterY = static_cast<uint32_t>((maxY * 0.5f + 0.5f) * CLUSTER_SIZE_Y);
uint32_t minClusterZ = static_cast<uint32_t>((log(minZ * (zFar - zNear) + zNear) / log(zFar / zNear)) * CLUSTER_SIZE_Z);
uint32_t maxClusterZ = static_cast<uint32_t>((log(maxZ * (zFar - zNear) + zNear) / log(zFar / zNear)) * CLUSTER_SIZE_Z);
minClusterX = glm::clamp(minClusterX, 0u, CLUSTER_SIZE_X - 1);
maxClusterX = glm::clamp(maxClusterX, 0u, CLUSTER_SIZE_X - 1);
minClusterY = glm::clamp(minClusterY, 0u, CLUSTER_SIZE_Y - 1);
maxClusterY = glm::clamp(maxClusterY, 0u, CLUSTER_SIZE_Y - 1);
minClusterZ = glm::clamp(minClusterZ, 0u, CLUSTER_SIZE_Z - 1);
maxClusterZ = glm::clamp(maxClusterZ, 0u, CLUSTER_SIZE_Z - 1);
for (uint32_t z = minClusterZ; z <= maxClusterZ; ++z) {
for (uint32_t y = minClusterY; y <= maxClusterY; ++y) {
for (uint32_t x = minClusterX; x <= maxClusterX; ++x) {
uint32_t clusterIdx = z * CLUSTER_SIZE_X * CLUSTER_SIZE_Y + y * CLUSTER_SIZE_X + x;
uint32_t offset = clusterData.cluster[clusterIdx].offset + tempOffsets[clusterIdx];
if (offset < lightIndexListnum && tempOffsets[clusterIdx] < clusterData.cluster[clusterIdx].count) {
clusterIndexList.indices[offset].clusterIndexList = lightIdx; // 更新访问方式
tempOffsets[clusterIdx]++;
}
}
}
}
}
memcpy(uniformBuffers.params.mapped, &uboParams, sizeof(uboParams));
memcpy(uniformBuffers.clusterData.mapped, &clusterData, sizeof(clusterData));
memcpy(uniformBuffers.clusterIndexList.mapped, &clusterIndexList, sizeof(clusterIndexList)); // 更新名称
}
void draw() {
VulkanExampleBase::prepareFrame();
submitInfo.commandBufferCount = 1;
submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
VulkanExampleBase::submitFrame();
}
void prepare() {
VulkanExampleBase::prepare();
loadAssets();
prepareUniformBuffers();
setupDescriptors();
preparePipelines();
buildCommandBuffers();
prepared = true;
}
virtual void render() {
if (!prepared) return;
updateUniformBuffers();
if (!paused) {
updateLights();
updateLightsCluster();
}
draw();
}
virtual void OnUpdateUIOverlay(vks::UIOverlay* overlay) {
if (overlay->header("Seting")) {
if (overlay->comboBox("Material", &materialIndex, materialNames)) {
buildCommandBuffers();
}
if (overlay->comboBox("Type", &models.objectIndex, objectNames)) {
updateUniformBuffers();
buildCommandBuffers();
}
}
}
};
VULKAN_EXAMPLE_MAIN()
完整frag代码为
// 版权所有 2020 Google LLC
#define NUM_LIGHTS 64 // 光源数量
#define CLUSTER_SIZE_X 4 // X 轴集群数
#define CLUSTER_SIZE_Y 4 // Y 轴集群数
#define CLUSTER_SIZE_Z 2 // Z 轴集群数
#define TOTAL_CLUSTERS CLUSTER_SIZE_X * CLUSTER_SIZE_Y * CLUSTER_SIZE_Z
#define LIGHT_INDEX_LIST_SIZE (NUM_LIGHTS * TOTAL_CLUSTERS) // 全局光源索引列表大小
struct VSOutput {
[[vk::location(0)]] float3 WorldPos : POSITION0; // 世界空间位置
[[vk::location(1)]] float3 Normal : NORMAL0; // 法线
};
struct UBO {
float4x4 projection; // 投影矩阵
float4x4 model; // 模型矩阵
float4x4 view; // 视图矩阵
float3 camPos; // 相机位置
uint maxlightindexnum; // 最大光源索引数
};
cbuffer ubo : register(b0) { UBO ubo; } // 绑定到寄存器 b0
struct Light {
float4 position; // 光源位置
float4 colorAndRadius; // 颜色和半径
float4 direction; // 方向
float4 cutOff; // 截止参数
};
cbuffer uboParams : register(b1) { // 光源数据缓冲区
Light lights[NUM_LIGHTS];
};
struct Indices {
uint clusterIndexList;
float3 padding; // 填充以确保 16 字节对齐
};
cbuffer clusterIndexList : register(b2) {
Indices indices[LIGHT_INDEX_LIST_SIZE];
};
struct Cluster {
uint counts; // 光源数量
uint offsets; // 光源偏移
float2 padding; // 填充以确保 16 字节对齐
};
cbuffer clusterCountsandOffsets : register(b3) { // 占用原来的 b3 绑定点
Cluster clusterCountsandOffsets[TOTAL_CLUSTERS];
};
struct PushConsts {
[[vk::offset(12)]] float roughness; // 粗糙度
[[vk::offset(16)]] float metallic; // 金属度
[[vk::offset(20)]] float r; // 红色分量
[[vk::offset(24)]] float g; // 绿色分量
[[vk::offset(28)]] float b; // 蓝色分量
};
[[vk::push_constant]] PushConsts material; // 推送常量
static const float PI = 3.14159265359;
float3 materialcolor() {
return float3(material.r, material.g, material.b);
}
// 法线分布函数 (GGX)
float D_GGX(float dotNH, float roughness) {
float alpha = roughness * roughness;
float alpha2 = alpha * alpha;
float denom = dotNH * dotNH * (alpha2 - 1.0) + 1.0;
return (alpha2) / (PI * denom * denom);
}
// 几何遮挡函数 (Schlick-Smith GGX)
float G_SchlicksmithGGX(float dotNL, float dotNV, float roughness) {
float r = (roughness + 1.0);
float k = (r * r) / 8.0;
float GL = dotNL / (dotNL * (1.0 - k) + k);
float GV = dotNV / (dotNV * (1.0 - k) + k);
return GL * GV;
}
// 菲涅尔函数 (Schlick)
float3 F_Schlick(float cosTheta, float metallic) {
float3 F0 = lerp(float3(0.04, 0.04, 0.04), materialcolor(), metallic);
float3 F = F0 + (1.0 - F0) * pow(1.0 - cosTheta, 5.0);
return F;
}
// 镜面 BRDF 计算
float3 BRDF(float3 L, float3 V, float3 N, float metallic, float roughness) {
float3 H = normalize(V + L);
float dotNV = clamp(dot(N, V), 0.0, 1.0);
float dotNL = clamp(dot(N, L), 0.0, 1.0);
float dotLH = clamp(dot(L, H), 0.0, 1.0);
float dotNH = clamp(dot(N, H), 0.0, 1.0);
float3 lightColor = float3(1.0, 1.0, 1.0);
float3 color = float3(0.0, 0.0, 0.0);
if (dotNL > 0.0) {
float rroughness = max(0.05, roughness);
float D = D_GGX(dotNH, roughness);
float G = G_SchlicksmithGGX(dotNL, dotNV, rroughness);
float3 F = F_Schlick(dotNV, metallic);
float3 spec = D * F * G / (4.0 * dotNL * dotNV);
color += spec * dotNL * lightColor;
}
return color;
}
// 光照辐射计算
float radiance(float radius, float3 lightVec, float3 N, float3 L) {
float distance = length(lightVec);
if (distance > radius) return 0.0;
float attenuation = pow(clamp(1.0 - distance / radius, 0.0, 1.0), 2.0);
float dotNL = max(dot(N, L), 0.0);
return attenuation * dotNL;
}
float4 main(VSOutput input) : SV_TARGET {
float3 N = normalize(input.Normal);
float3 V = normalize(ubo.camPos - input.WorldPos);
float roughness = material.roughness;
// 计算屏幕空间位置
float4 worldPos = float4(input.WorldPos, 1.0);
float4 viewPos = mul(ubo.view, worldPos);
float4 clipPos = mul(ubo.projection, viewPos);
clipPos /= clipPos.w;
float2 screenPos = clipPos.xy * 0.5 + 0.5;
// 计算对数深度
float viewZ = -viewPos.z;
float zNear = 0.1;
float zFar = 256.0;
uint clusterZ = uint(log(viewZ / zNear) / log(zFar / zNear) * CLUSTER_SIZE_Z);
clusterZ = clamp(clusterZ, 0u, CLUSTER_SIZE_Z - 1);
// 计算集群索引
uint clusterX = uint(screenPos.x * CLUSTER_SIZE_X);
uint clusterY = uint(screenPos.y * CLUSTER_SIZE_Y);
clusterX = clamp(clusterX, 0u, CLUSTER_SIZE_X - 1);
clusterY = clamp(clusterY, 0u, CLUSTER_SIZE_Y - 1);
clusterZ = clamp(clusterZ, 0u, CLUSTER_SIZE_Z - 1);
uint clusterIdx = clusterZ * CLUSTER_SIZE_X * CLUSTER_SIZE_Y + clusterY * CLUSTER_SIZE_X + clusterX;
// 获取光源列表(使用合并后的缓冲区)
uint lightCount = clusterCountsandOffsets[clusterIdx].counts;
uint lightOffset = clusterCountsandOffsets[clusterIdx].offsets;
float3 Lo = float3(0.0, 0.0, 0.0);
if (lightCount > 0) {
for (int i = lightOffset; i < lightOffset + lightCount; i++) {
float3 lightVec = lights[indices[i].clusterIndexList].position.xyz - input.WorldPos; // 更新访问方式
float3 L = normalize(lightVec);
float radianceFactor = radiance(lights[indices[i].clusterIndexList].colorAndRadius.w, lightVec, N, L);
float3 lightColor = lights[indices[i].clusterIndexList].colorAndRadius.xyz;
Lo += BRDF(L, V, N, material.metallic, roughness) * lightColor * radianceFactor;
}
}
/*
for (uint i = 0; i < 64; i++) {
float3 lightVec = lights[i].position.xyz - input.WorldPos;
float3 L = normalize(lightVec);
float radianceFactor = radiance(lights[i].colorAndRadius.w, lightVec, N, L);
float3 lightColor = lights[i].colorAndRadius.xyz;
Lo += BRDF(L, V, N, material.metallic, material.roughness) * lightColor * radianceFactor;
}
*/
// 组合环境光和镜面光
float3 color = materialcolor() * 0.02;
color += Lo;
// Gamma 校正
color = pow(color, float3(0.4545, 0.4545, 0.4545));
return float4(color, 1.0);
}

浙公网安备 33010602011771号