实现流程

  • 在C#中设置好存储粒子信息的数组(ParticleArray),并使用DrawMeshInstancedIndirect对粒子(cube)进行实例化操作。
  • 在ComputeShader中对传入的ParticleArray中的数据进行计算(主要是对位置进行偏移计算)。
  • 在Shader中根据ParticleArray中的数据对粒子(cube)的顶点进行偏移,并运用数据中的uv对贴图进行采样渲染。

粒子数据结构体

Particle Info Struct
1
2
3
4
5
6
private struct Particle
{
public Vector3 position;
public Vector3 customPosition;
public Vector2 uv;
}

初始化粒子数组

Initial Particle Array
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
m_Width = texture.width;
m_Height = texture.height;
m_Amount = m_Width * m_Height;
Particle[] particleArray = new Particle[m_Amount];
for (int i = 0; i < m_Width; i++)
{
for (int j = 0; j < m_Height; j++)
{
int id = i * m_Height + j;
float x = (float)i / (m_Width - 1);
float y = (float)j / (m_Height - 1);
particleArray[id].position = new Vector3(x - 0.5f, y - 0.5f, 0);
particleArray[id].customPosition = particleArray[id].position;
particleArray[id].uv = new Vector2(x, y);
}
}
m_ParticleBuffer = new ComputeBuffer(m_Amount, Marshal.SizeOf(typeof(Particle)));
m_ParticleBuffer.SetData(particleArray);

m_KernelID = computeShader.FindKernel("CSMain");
computeShader.SetBuffer(m_KernelID, k_ParticleBuffer, m_ParticleBuffer);
material.SetBuffer(k_ParticleBuffer, m_ParticleBuffer);

初始化粒子实例数据

Initial Particle Instance Info
1
2
3
4
5
6
7
8
9
10
11
uint[] args =
{
particleMesh.GetIndexCount(0), //单个实例的索引数
(uint)m_Amount, //实例数
particleMesh.GetIndexStart(0), //起始索引数
particleMesh.GetBaseVertex(0), //基顶点位置
0 //起始实例的位置
};
m_ArgsBuffer = new ComputeBuffer(1, args.Length * sizeof(uint),
ComputeBufferType.IndirectArguments);
m_ArgsBuffer.SetData(args);

执行CS并实例化粒子对象

Dispatch CS && Draw Instance
1
2
3
4
computeShader.Dispatch(m_KernelID, m_ThreadGroupCount, 1, 1);
material.SetMatrix(k_LocalToWorldMatrix, transform.localToWorldMatrix);
Graphics.DrawMeshInstancedIndirect(particleMesh, 0, material,
new Bounds(Vector3.zero, Vector3.one), m_ArgsBuffer);

在多线程中移动粒子

ComputeShader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
#pragma kernel CSMain

struct Particle
{
float3 position;
float3 customPosition;
float2 uv;
};
RWStructuredBuffer<Particle> _ParticleBuffer;

[numthreads(256, 1, 1)]
void CSMain(uint3 id : SV_DispatchThreadID)
{
float3 pos = _ParticleBuffer[id.x].position;
if (_Time < _MorphTime)
{
float3 morph = (_ParticleBuffer[id.x].customPosition - pos) * 0.1;
_ParticleBuffer[id.x].position += morph;
}
else if (_Time < _CurlTime)
{
_ParticleBuffer[id.x].position += curlNoise(pos) * 0.05;
}
}

渲染粒子实例

Shader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
float4x4 GetObjectToWorldMatrix(float3 pos)
{
float4x4 objectMatrix = float4x4(
_Size, 0, 0, pos.x,
0, _Size, 0, pos.y,
0, 0, _Size, pos.z,
0, 0, 0, 1);
return mul(_LocalToWorldMatrix, objectMatrix);
}

v2f vert (appdata v, uint instanceID : SV_InstanceID)
{
v2f o;
Particle particle = _ParticleBuffer[instanceID];
float4x4 objectToWorldMatrix = GetObjectToWorldMatrix(particle.position);
float4 positionWS = mul(objectToWorldMatrix, v.vertex);
o.pos = TransformWorldToHClip(positionWS);
o.uv = _ParticleBuffer[instanceID].uv;
return o;
}

half4 frag (v2f i) : SV_Target
{
half4 color = tex2D(_OriginTex, i.uv);
half4 targetColor = tex2D(_TargetTex, i.uv);
color = lerp(color, targetColor, _Lerp);
return color;
}

附 ComputeShader简述

  • Compute Shader简称cs,DX10开始出现。
  • cs可以做通用计算,在GPU上执行主要的计算过程,最终再将结果传递给CPU,这类非图形计算称为GPGPU。
  • cs虽然不在渲染流水线中,但它支持读写GPU资源,可以将运行结果直接传递到渲染管线,减少了从显存到内存的时间开销。
  • 基本概念:
    • GroupID:线程组ID
    • GroupThreadID:组内线程ID
    • DispatchThreadID:线程全局ID
    • DispatchThreadID = GroupID * numthreads + GroupThreadID
    • warp:GPU调度的基本单元
    • StructureBuffer<类型>:只读的结构缓冲区
    • RWStructureBuffer<类型>:可读写的结构缓冲区

TODO

  • curlNoise函数还没来得及深究,后续再看看相关的算法。