Introduction to my galaxy engine 9 : FFT Deep Ocean Water Simulation （计划进行中...）

目前为止DEMO效果视频

FFT512：

FFT256：

回首之前做的海面效果（主要参考的是ogre的海面例子），感觉还是有太多不足，毕竟的自己的第一个GPU程序，只能算是个试验品。一直困扰自己的一个问题就是如何解决法线贴图由于太小，用作海面上不够细致的问题。每小格delta UV太小导致一小块法线贴图铺在整个海面上则细节表现不够，反之用warp sample法线贴图，如果法线贴图不是无缝连接则有太明显的重复痕迹，就算多点采样效果也不是太好。海面法线直接影响到reflection和refraction的效果。之前demo中的refraction和reflection只是用噪音来映射和透射物体的形变，并非精确计算，打算在新demo中采用精确计算方法。海面的光照模型本身是非常复杂的，在Simulating Ocean Water[1]这篇论文中有所简单的介绍，之前demo中只使用了费内尔效果，新demo中打算尝试添加更复杂的光照模型。

今天突然发现Hydrax也添加了FFT生成海面高度的模块，正好也可以拿来研究一下。看代码才发现，它的IFFT是通过CPU来实现的_executeInverseFFT(),只有计算波的坡度法向量是通过GPU来计算的createGPUNormalMapResources()。Hydrax的它一些其他功能还有待改进，例如Caustics effects并不是精确算的，而用的是贴图。Foam effects看起来也不够真实，其他refraction and reflection effects不知道是不是用的是Perlin noise来fake的。

本人的电脑显卡比较落后，还不支持DX11，提高海面的细节用tessellation是没指望了。一个偶然机会看到用FFT动态生成实现海面顶点displacement map，看demo展示效果还不错，打算好好研究一下。

之前海面高度的模拟是通过物理建模在XZ平面的多个sin波的合成。而FFT方法则是从统计学的角度出发（Statistic based, not physics based），波浪的高度看做一个随机值，受其在水平海平面的位置和时间影响（In the statistical models,the wave height is considered a random variable of horizontal position and time）。同时，这种方法还能将波浪高度分解成很多sin,cos波形（Statistical models are also based on the ability to decompose the wave height ﬁeld as a sum of sine and cosine waves.）。此时生成的波是在频域中的，再通过IFFT变换到时域（Generate wave distribution in frequency domain, then perform inverse FFT ）。具体计算公式如下：

Lx,Lz是FFT所产生波的平面（patch）的大小，超出这个平面波形则成周期形式。M，N分别为整个X,Z方向上格子数量。

波浪高度场的坡度向量也很重要，可用于计算法向量，入射光角度等。完美的计算方法如下：

研究表明，公式19波的spatial spectrum可表示为

我们通常可以用Phillips spectrum模型来计算

L = V^2 /g; v表示风速，g为重力加速度，w为风向，a表示波幅度，k为平面上点位置。

这个模型的缺点是波的数量很小的时候，收敛属性比较差，所以需要再乘以一项：

l<<L.

波形的傅里叶高度场的幅度可表示为：

where ξr and ξi are ordinary independent draws from a gaussian random number generator, 均值为0 and 标准偏差为1。

原理图如下：

这部分的实现代码如下：

 1 void CFFTOceanShader::_Initialize()
 2 {
 3 //     m_pGaussianRandom = new complex[resolution*resolution];
 4 //      m_pPhillipsSpectrum = new complex[resolution*resolution];
 5     m_pIniWaves = new complex[resolution*resolution];
 6     m_pCurWaves = new complex[resolution*resolution];
 7     angularFrequencies = new float[resolution*resolution];
 8     m_pDx = new complex[resolution*resolution];
 9     m_pDy = new complex[resolution*resolution];
10 
11     D3DXVECTOR2 K = D3DXVECTOR2(0.0f,0.0f);
12 
13     complex* pTmpP = m_pPhillipsSpectrum;
14     //complex* pTmpG = m_pGaussianRandom;
15 
16     complex* pInitialWavesData = m_pIniWaves;
17     float* pAngularFrequenciesData = angularFrequencies;
18 
19     srand(0);// initialize random generator.
20 
21     float temp;
22     for (size_t v = 0; v < resolution; ++v)//resolution = M,N
23     {
24         //Kz
25         K.y = (resolution / -2.0f + v) * (2.0f * D3DX_PI / PhysicalResolution);//PhysicalResolution = Lx, Ly
26         //Kx
27         for (size_t u = 0; u < resolution; ++u)
28         {
29             K.x = (resolution / -2.0f + u) * (2.0f * D3DX_PI / PhysicalResolution);
30 
31              //*pTmpG = complex(_getGaussianRandomFloat(), _getGaussianRandomFloat());
32             //++pTmpG;
33 
34 //             *pTmpP = complex(_getPhillipsSpectrum(K), 0.0f);
35 //              ++pTmpP;
36             
37             temp = sqrtf(0.5f * _getPhillipsSpectrum(K));
38             *pInitialWavesData = complex(_getGaussianRandomFloat() * temp, _getGaussianRandomFloat() * temp);
39             ++pInitialWavesData;
40 
41             temp = GRAVITY * D3DXVec2Length(&K);
42             *pAngularFrequenciesData++ =sqrt(temp);//dispersion relation w(k) = sqrt(g*k)
43         }
44     }
45 
46     _calculeNoise(0);
47 }
48 
49 const float CFFTOceanShader::_getPhillipsSpectrum(const D3DXVECTOR2& K) const
50 {
51     // Compute the length of the vector
52     float ksqr = D3DXVec2LengthSq(&K);
53 
54     // To avoid division by 0
55     if (ksqr < 0.0000001f) 
56     {
57         return 0;
58     }
59     else
60     {
61         float L = powf(WindSpeed, 2.0f) / GRAVITY;
62 
63         // damp out waves with very small length w << l
64         float w = L / 1000.0f65 
66         float dot = D3DXVec2Dot(&K, &WindDirection);
67         float a = K.x * WindDirection.x + K.y * WindDirection.y;
68 
69         //A * (exp(-1/(kL)^2)/k^4) * (dot(k,w))^2 * exp(-k^2 * l^2)
70         float phillips = Amplitude * expf(-1 / (ksqr * L * L))  /  (ksqr * ksqr) * (dot * dot);
71 
72         if (dot < 0.00001f)
73             phillips *= WindDependency;
74 
75         // damp out waves with very small length w << l
76         return phillips * expf(-ksqr * w * w);
77     } 
78 }
79 
80 const float CFFTOceanShader::_getGaussianRandomFloat() const
81 {
82     float u1 = rand() / (float)RAND_MAX;
83     float u2 = rand() / (float)RAND_MAX;
84     if (u1 < 1e-6f)
85         u1 = 1e-6f;
86     return sqrtf(-2.0f * logf(u1)) * cosf(2.0f * D3DX_PI * u2);
87 }

自己实现的截图

GaussianRandom:

PhillipsSpectrum:

h0(k)

波形的傅里叶高度场在t时间的幅度为

omega = sqrtf（g*k）

有了h(k,t),2D平面上的displacement vector ﬁeld 可以通过如下公式计算：

其中

通过这个向量场，格子顶点在水平面上的位置更新为：x+λD(x,t).

h(k,t), D(x,t)实现代码如下：

 1 void CFFTOceanShader::_calculeNoise(const float &delta)
 2 {
 3     time += delta * AnimationSpeed;
 4 
 5     complex* pData = m_pCurWaves;//h(t)
 6     complex* pDx = m_pDx;
 7     complex* pDy = m_pDy;
 8 
 9     size_t u, v;
10     float wt, coswt, sinwt,    realVal, imagVal;
11     D3DXVECTOR2 K = D3DXVECTOR2(0.0f,0.0f);
12 
13     for (v = 0; v < resolution ; v++)//for each row
14     {
15         //Kz
16         K.y = (resolution / -2.0f + v) * (2.0f * D3DX_PI / PhysicalResolution);
17 
18         for (u = 0; u < resolution; u++)
19         {
20             K.x = (resolution / -2.0f + u) * (2.0f * D3DX_PI / PhysicalResolution);
21 
22             const complex& positive_h0 = m_pIniWaves[v * resolution + u];
23             const complex& negative_h0 = m_pIniWaves[(resolution-1 - v) * (resolution) + (resolution-1- u)];
24 
25             wt = angularFrequencies[v * resolution + u] * time;//w(k)*t
26 
27             coswt = cosf(wt);
28             sinwt = sinf(wt);
29 
30             realVal = positive_h0.re() * coswt - positive_h0.im() * sinwt + negative_h0.re() * coswt - (-negative_h0.im()) * (-sinwt),
31             imagVal = positive_h0.re() * sinwt + positive_h0.im() * coswt + negative_h0.re() * (-sinwt) + (-negative_h0.im()) * coswt;
32 
33             *pData++ = complex(realVal, imagVal);
34 
35             float ksqr = D3DXVec2LengthSq(&K);
36             // To avoid division by 0
37             if (ksqr < 0.0000001f) 
38             {
39                 *pDx = *pDy = complex(0.0f);
40                 ++pDx;
41                 ++pDy; 
42             }
43             else
44             {
45                 float Kx = K.x / ksqr;
46                 float Ky = K.y / ksqr;
47 
48                 *pDx = complex(-imagVal * Kx, realVal * Kx);
49                 ++pDx;
50                 *pDy = complex(-imagVal * Ky, realVal * Ky);
51                 ++pDy; 
52             }
53         }
54     }
55 
56     //make sure resolution is in the power of 2!!!!!!!!!!!!!!!!!!
57     //_executeInverseFFT();
58 }

h(k, t):

Dx:

Dy:

还有一个难点就是FFT算法的GPU实现。一般是通过Radix-X FFT Algorithms在compute shader或者pixel shader来实现。问题来的，我的显卡还不支持DX11 computer shader, 觉定暂时先在CPU上实现二维IDFT.

以下是Radix-2 FFT Algorithms示意图：

参考一维IDFT,自己实现了个二维IDFT

  1 void CFFTOceanShader::_executeInverseFFT()
  2 {
  3     //Bit reversal of each row
  4     for(size_t y = 0; y < resolution; ++y) //for each row
  5     {
  6         size_t Target = 0;
  7         //   Process all positions of input signal
  8         for (size_t Position = 0; Position < resolution; ++Position)
  9         {
 10             if (Target > Position)
 11             {
 12                 //   Swap entries
 13                 const complex Temp(m_pCurWaves[resolution * Target + y]);
 14                 m_pCurWaves[resolution * Target + y] = m_pCurWaves[resolution * Position + y];
 15                 m_pCurWaves[resolution * Position + y] = Temp;
 16             }
 17             //   Bit mask
 18             size_t Mask = resolution;
 19             //   While bit is set
 20             while (Target & (Mask >>= 1))
 21                 //   Drop bit
 22                 Target &= ~Mask;
 23             //   The current bit is 0 - set it
 24             Target |= Mask;
 25         }
 26     }
 27 
 28     //Bit reversal of each row
 29     for(size_t x = 0; x < resolution; ++x) //for each column
 30     {
 31         size_t Target = 0;
 32         //   Process all positions of input signal
 33         for (size_t Position = 0; Position < resolution; ++Position)
 34         {
 35             //   Only for not yet swapped entries
 36             if (Target > Position)
 37             {
 38                 //   Swap entries
 39                 const complex Temp(m_pCurWaves[resolution * x + Position]);
 40                 m_pCurWaves[resolution * x + Position] = m_pCurWaves[resolution * x + Target];
 41                 m_pCurWaves[resolution * x + Target] = Temp;
 42             }
 43             //   Bit mask
 44             size_t Mask = resolution;
 45             //   While bit is set
 46             while (Target & (Mask >>= 1))
 47                 //   Drop bit
 48                 Target &= ~Mask;
 49             //   The current bit is 0 - set it
 50             Target |= Mask;
 51         }
 52     }
 53 
 54      for(size_t y = 0; y < resolution; y++) //for each row
 55      {
 56          //   Iteration through dyads, quadruples, octads and so on...
 57         for (size_t Step = 1; Step < resolution; Step <<= 1)
 58         {
 59             //   Jump to the next entry of the same transform factor
 60             const size_t Jump = Step << 1;
 61             //   Angle increment
 62             const float delta = D3DX_PI / float(Step);
 63             //   Auxiliary sin(delta / 2)
 64             const float Sine = sin(delta * .5);
 65             //   Multiplier for trigonometric recurrence
 66             const complex Multiplier(-2. * Sine * Sine, sin(delta));
 67             //   Start value for transform factor (twiddle), fi = 0
 68             complex Factor(1.);
 69             //   Iteration through groups of different transform factor
 70             for (size_t Group = 0; Group < Step; ++Group)
 71             {
 72                 //   Iteration within group 
 73                 for (size_t Pair = Group; Pair < resolution; Pair += Jump)
 74                 {
 75                     //   Match position
 76                     const size_t Match = Pair + Step;
 77                     //   Second term of two-point transform
 78                     const complex Product(Factor * m_pCurWaves[resolution * Match + y]);
 79                     //   Transform for fi + pi
 80                     m_pCurWaves[resolution * Match + y] = m_pCurWaves[resolution * Pair + y] - Product;
 81                     //   Transform for fi
 82                     m_pCurWaves[resolution * Pair + y] += Product;
 83                 }
 84                 //   Successive transform factor via trigonometric recurrence
 85                 Factor = Multiplier * Factor + Factor;
 86             }
 87         }
 88      }
 89     
 90     for(size_t x = 0; x < resolution; ++x) //for each column
 91     {
 92         //   Iteration through dyads, quadruples, octads and so on...
 93         for (size_t Step = 1; Step < resolution; Step <<= 1)
 94         {
 95             //   Jump to the next entry of the same transform factor
 96             const size_t Jump = Step << 1;
 97             //   Angle increment
 98             const float delta = D3DX_PI / float(Step);
 99             //   Auxiliary sin(delta / 2)
100             const float Sine = sin(delta * .5);
101             //   Multiplier for trigonometric recurrence
102             const complex Multiplier(-2. * Sine * Sine, sin(delta));
103             //   Start value for transform factor, fi = 0
104             complex Factor(1.);
105             //   Iteration through groups of different transform factor
106             for (size_t Group = 0; Group < Step; ++Group)
107             {
108                 //   Iteration within group 
109                 for (size_t Pair = Group; Pair < resolution; Pair += Jump)
110                 {
111                     //   Match position
112                     const size_t Match = Pair + Step;
113                     //   Second term of two-point transform
114                     const complex Product(Factor * m_pCurWaves[resolution * x + Match]);
115                     //   Transform for fi + pi
116                     m_pCurWaves[resolution * x + Match] = m_pCurWaves[resolution * x + Pair] - Product;
117                     //   Transform for fi
118                     m_pCurWaves[resolution * x + Pair] += Product;
119                 }
120                 //   Successive transform factor via trigonometric recurrence
121                 Factor = Multiplier * Factor + Factor;
122             }
123         }
124     }
125 
126     //   Scaling of inverse FFT result
127     size_t NM = resolution * resolution;
128     const float Factor = 1. / float(resolution);
129     for (size_t i = 0; i < NM; ++i)
130         m_pCurWaves[i] *= Factor;
131 }

IDFT of h(k,t) or Dz:

由于我的Dx, Dy, Dz是在CPU中生成的，为了GPU中生成displacement map，我将Dx, Dy, Dz分别写入三个texture2d：

 1     for (size_t i = 0; i < resolution;  i++)
 2      {
 3          for (size_t j = 0; j < resolution; j++)
 4              m_Texture2D[i][j] = pD[i*resolution + j].re() * m_DisplacementMapScale;//add scale factor
 5      }
 6   
 7      D3D10_TEXTURE2D_DESC textureDesc;
 8      textureDesc.Height = resolution;
 9      textureDesc.Width = resolution;
10      textureDesc.MipLevels = 1;
11      textureDesc.ArraySize = 1;
12      textureDesc.Format = DXGI_FORMAT_R32_FLOAT;
13      textureDesc.SampleDesc.Count = 1;
14      textureDesc.SampleDesc.Quality = 0;
15      textureDesc.Usage = D3D10_USAGE_DEFAULT;
16      textureDesc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
17      textureDesc.CPUAccessFlags = 0;
18      textureDesc.MiscFlags = 0;
19      D3D10_SUBRESOURCE_DATA InitData;
20      ZeroMemory( &InitData, sizeof( D3D10_SUBRESOURCE_DATA ) );
21      InitData.pSysMem = m_Texture2D;
22      InitData.SysMemPitch = sizeof(m_Texture2D[0][0]) * resolution;
23      HRESULT hr = m_pd3dDevice->CreateTexture2D(&textureDesc, &InitData, &m_RenderTargetTexture[RT]);
24  
25      D3D10_SHADER_RESOURCE_VIEW_DESC shaderResourceViewDesc;
26      shaderResourceViewDesc.Format = textureDesc.Format;
27      shaderResourceViewDesc.ViewDimension = D3D10_SRV_DIMENSION_TEXTURE2D;
28      shaderResourceViewDesc.Texture2D.MostDetailedMip = 0;
29      shaderResourceViewDesc.Texture2D.MipLevels = 1;
30      hr = m_pd3dDevice->CreateShaderResourceView(m_RenderTargetTexture[RT], &shaderResourceViewDesc, &m_ShaderResourceView[RT]); 
31  
32      m_pSRV[RT]->SetResource(m_ShaderResourceView[RT]);

在GPU生成displacement map过程就是将Dx, Dy, Dz分别写入displacement map的R,G,B三个分量中：

 1 //Dx, Dy, Dz -> Displacement
 2 float4 PS_GRID(PS_GRID_INPUT input) : SV_Target
 3 {
 4     float4 displacement = 0.0f;
 5     displacement.r = g_TextureDx.Sample( g_samWrap, input.TexCoord).x;
 6     displacement.g = g_TextureDy.Sample( g_samWrap, input.TexCoord).x;
 7     displacement.b = g_TextureDz.Sample( g_samWrap, input.TexCoord).x;
 8     displacement.a = 1.0f;
 9 
10     return displacement;
11 }

生成的displacement map如下图所示：

有了displacement map，接着就可以在pixel shader中生成normal map.

 1 // Sample neighbour texels
 2     float2 one_texel = float2(1.0f / g_Dim.x, 1.0f / g_Dim.y);
 3 
 4     float normalStrength = 0.5;
 5     float tl = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2(-1, -1)).x);   // top left
 6     float  l = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2(-1,  0)).x);   // left
 7     float bl = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2(-1,  1)).x);   // bottom left
 8     float  t = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2( 0, -1)).x);   // top
 9     float  b = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2( 0,  1)).x);   // bottom
10     float tr = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2( 1, -1)).x);   // top right
11     float  r = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2( 1,  0)).x);   // right
12     float br = abs(g_TextureDP.Sample(g_samWrap, In.TexCoord + one_texel * float2( 1,  1)).x);   // bottom right
13  
14     // Compute dx using Sobel:
15     //           -1 0 1 
16     //           -2 0 2
17     //           -1 0 1
18     float dX = tr + 2*r + br -tl - 2*l - bl;
19  
20     // Compute dy using Sobel:
21     //           -1 -2 -1 
22     //            0  0  0
23     //            1  2  1
24     float dY = bl + 2*b + br -tl - 2*t - tr;
25  
26     // Build the normalized normal
27     float3 normal = float3(normalize(float3(dX, 1.0f / normalStrength, dY)));//float3(0,1,0);//

在vertex shader中根据displacement map可以修改平面顶点位置：

float3 displacement = g_TextureDP.SampleLevel(g_samWrap, uv_local.xz, 0);
output.Pos.xyz = input.Pos.xyz + displacement;//add displacement map to vertex pos

初步海面效果如下（IDFT采样精度256*256）