延迟平铺着色,OpenGL中的瓦片计算

我正在尝试使用计算着色器在OpenGL中执行延迟的平铺着色,但是在尝试为每个图块创建平截头体时遇到了一些障碍。 我正在使用AMD的Forward +演示(用D3D编写)作为指导,但是灯光似乎在他们不应该的时候被剔除。

UPDATE

阅读下面的更新。

这是我的(完整)计算着色器:

#version 430 core #define MAX_LIGHTS 1024 #define MAX_LIGHTS_PER_TILE 40 #define WORK_GROUP_SIZE 16 struct PointLight { vec3 position; float radius; vec3 color; float intensity; }; layout (binding = 0, rgba32f) uniform writeonly image2D outTexture; layout (binding = 1, rgba32f) uniform readonly image2D normalDepth; layout (binding = 2, rgba32f) uniform readonly image2D diffuse; layout (binding = 3, rgba32f) uniform readonly image2D specular; layout (binding = 4, rgba32f) uniform readonly image2D glowMatID; layout (std430, binding = 5) buffer BufferObject { PointLight pointLights[]; }; uniform mat4 view; uniform mat4 proj; uniform mat4 viewProj; uniform mat4 invViewProj; uniform mat4 invProj; uniform vec2 framebufferDim; layout (local_size_x = WORK_GROUP_SIZE, local_size_y = WORK_GROUP_SIZE) in; shared uint minDepth = 0xFFFFFFFF; shared uint maxDepth = 0; shared uint pointLightIndex[MAX_LIGHTS]; shared uint pointLightCount = 0; vec3 ReconstructWP(float z, vec2 uv_f) { vec4 sPos = vec4(uv_f * 2.0 - 1.0, z, 1.0); sPos = invViewProj * sPos; return (sPos.xyz / sPos.w); } vec4 ConvertProjToView( vec4 p ) { p = invProj * p; p /= pw; return p; } // calculate the number of tiles in the horizontal direction uint GetNumTilesX() { return uint(( ( 1280 + WORK_GROUP_SIZE - 1 ) / float(WORK_GROUP_SIZE) )); } // calculate the number of tiles in the vertical direction uint GetNumTilesY() { return uint(( ( 720 + WORK_GROUP_SIZE - 1 ) / float(WORK_GROUP_SIZE) )); } vec4 CreatePlaneEquation( vec4 b, vec4 c ) { vec4 n; // normalize(cross( b.xyz-a.xyz, c.xyz-a.xyz )), except we know "a" is the origin n.xyz = normalize(cross( b.xyz, c.xyz )); // -(n dot a), except we know "a" is the origin nw = 0; return n; } float GetSignedDistanceFromPlane( vec4 p, vec4 eqn ) { // dot( eqn.xyz, p.xyz ) + eqn.w, , except we know eqn.w is zero // (see CreatePlaneEquation above) return dot( eqn.xyz, p.xyz ); } vec4 CalculateLighting( PointLight p, vec3 wPos, vec3 wNormal, vec4 wSpec, vec4 wGlow) { vec3 direction = p.position - wPos; if(length(direction) > p.radius) return vec4(0.0f, 0.0f, 0.0f, 0.0f); float attenuation = 1.0f - length(direction) / (p.radius); direction = normalize(direction); float diffuseFactor = max(0.0f, dot(direction, wNormal)) * attenuation; return vec4(p.color.xyz, 0.0f) * diffuseFactor * p.intensity; } void main() { ivec2 pixelPos = ivec2(gl_GlobalInvocationID.xy); vec2 tilePos = vec2(gl_WorkGroupID.xy * gl_WorkGroupSize.xy) / vec2(1280, 720); vec4 normalColor = imageLoad(normalDepth, pixelPos); float d = normalColor.w; uint depth = uint(d * 0xFFFFFFFF); atomicMin(minDepth, depth); atomicMax(maxDepth, depth); barrier(); float minDepthZ = float(minDepth / float(0xFFFFFFFF)); float maxDepthZ = float(maxDepth / float(0xFFFFFFFF)); vec4 frustumEqn[4]; uint pxm = WORK_GROUP_SIZE * gl_WorkGroupID.x; uint pym = WORK_GROUP_SIZE * gl_WorkGroupID.y; uint pxp = WORK_GROUP_SIZE * (gl_WorkGroupID.x + 1); uint pyp = WORK_GROUP_SIZE * (gl_WorkGroupID.y + 1); uint uWindowWidthEvenlyDivisibleByTileRes = WORK_GROUP_SIZE * GetNumTilesX(); uint uWindowHeightEvenlyDivisibleByTileRes = WORK_GROUP_SIZE * GetNumTilesY(); vec4 frustum[4]; frustum[0] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); frustum[1] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); frustum[2] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f ,1.0f) ); frustum[3] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); for (int i = 0; i < 4; i++) frustumEqn[i] = CreatePlaneEquation(frustum[i], frustum[(i+1) & 3]); barrier(); int threadsPerTile = WORK_GROUP_SIZE * WORK_GROUP_SIZE; for (uint i = 0; i < MAX_LIGHTS; i+= threadsPerTile) { uint il = gl_LocalInvocationIndex + i; if (il < MAX_LIGHTS) { PointLight p = pointLights[il]; vec4 viewPos = view * vec4(p.position, 1.0f); float r = p.radius; if (viewPos.z + minDepthZ < r && viewPos.z - maxDepthZ < r) { if( ( GetSignedDistanceFromPlane( viewPos, frustumEqn[0] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[1] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[2] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[3] ) < r) ) { uint id = atomicAdd(pointLightCount, 1); pointLightIndex[id] = il; } } } } barrier(); vec4 diffuseColor = imageLoad(diffuse, pixelPos); vec4 specularColor = imageLoad(specular, pixelPos); vec4 glowColor = imageLoad(glowMatID, pixelPos); vec2 uv = vec2(pixelPos.x / 1280.0f, pixelPos.y / 720.0f); vec3 wp = ReconstructWP(d, uv); vec4 color = vec4(0.0f, 0.0f, 0.0f, 1.0f); for (int i = 0; i < pointLightCount; i++) { color += CalculateLighting( pointLights[pointLightIndex[i]], wp, normalColor.xyz, specularColor, glowColor); } barrier(); if (gl_LocalInvocationID.x == 0 || gl_LocalInvocationID.y == 0 || gl_LocalInvocationID.x == 16 || gl_LocalInvocationID.y == 16) imageStore(outTexture, pixelPos, vec4(.2f, .2f, .2f, 1.0f)); else { imageStore(outTexture, pixelPos, color); //imageStore(outTexture, pixelPos, vec4(maxDepthZ)); //imageStore(outTexture, pixelPos, vec4(pointLightCount / 128.0f)); //imageStore(outTexture, pixelPos, vec4(vec2(tilePos.xy), 0.0f, 1.0f)); } } 

这是我认为是问题的一部分,扑杀部分:

  barrier(); float minDepthZ = float(minDepth / float(0xFFFFFFFF)); float maxDepthZ = float(maxDepth / float(0xFFFFFFFF)); vec4 frustumEqn[4]; uint pxm = WORK_GROUP_SIZE * gl_WorkGroupID.x; uint pym = WORK_GROUP_SIZE * gl_WorkGroupID.y; uint pxp = WORK_GROUP_SIZE * (gl_WorkGroupID.x + 1); uint pyp = WORK_GROUP_SIZE * (gl_WorkGroupID.y + 1); uint uWindowWidthEvenlyDivisibleByTileRes = WORK_GROUP_SIZE * GetNumTilesX(); uint uWindowHeightEvenlyDivisibleByTileRes = WORK_GROUP_SIZE * GetNumTilesY(); vec4 frustum[4]; frustum[0] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); frustum[1] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pym) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); frustum[2] = ConvertProjToView( vec4( pxp / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f ,1.0f) ); frustum[3] = ConvertProjToView( vec4( pxm / float(uWindowWidthEvenlyDivisibleByTileRes) * 2.0f - 1.0f, (uWindowHeightEvenlyDivisibleByTileRes - pyp) / float(uWindowHeightEvenlyDivisibleByTileRes) * 2.0f - 1.0f, 1.0f, 1.0f) ); for (int i = 0; i < 4; i++) frustumEqn[i] = CreatePlaneEquation(frustum[i], frustum[(i+1) & 3]); barrier(); int threadsPerTile = WORK_GROUP_SIZE * WORK_GROUP_SIZE; for (uint i = 0; i < MAX_LIGHTS; i+= threadsPerTile) { uint il = gl_LocalInvocationIndex + i; if (il < MAX_LIGHTS) { PointLight p = pointLights[il]; vec4 viewPos = view * vec4(p.position, 1.0f); float r = p.radius; if (viewPos.z + minDepthZ < r && viewPos.z - maxDepthZ < r) { if( ( GetSignedDistanceFromPlane( viewPos, frustumEqn[0] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[1] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[2] ) < r ) && ( GetSignedDistanceFromPlane( viewPos, frustumEqn[3] ) < r) ) { uint id = atomicAdd(pointLightCount, 1); pointLightIndex[id] = il; } } } } barrier(); 

奇怪的是,当我将每个瓦片的光照数目可视化时,它显示了所有具有某种灯光方式的瓦片(第一图像)。

第二张图片显示了最终的输出结果,屏幕中间有一道细线,上面或下面没有任何东西。 删除剔除(GetSignedDistanceFromPlane())给出了所需的结果,虽然我的帧速下降像一块石头。

在这里输入图像描述

在这里输入图像描述

我的猜测是这个平截头体是错误的,但我不确定它背后的math,现在可以使用一些帮助。

编辑:添加另一个图像,显示预期的输出。

在这里输入图像描述

更新1

我们已经改变了如何完成剔除,代码现在看起来像这样:

 barrier(); float minDepthZ = float(minDepth / float(0xFFFFFFFF)); float maxDepthZ = float(maxDepth / float(0xFFFFFFFF)); //total tiles = tileScale * 2 vec2 tileScale = vec2(1280, 720) * (1.0f / float(2*WORK_GROUP_SIZE)); vec2 tileBias = tileScale - vec2(gl_WorkGroupID.xy); vec4 c1 = vec4(-proj[0][0] * tileScale.x, 0.0f, tileBias.x, 0.0f); vec4 c2 = vec4(0.0f, -proj[1][1] * tileScale.y, tileBias.y, 0.0f); vec4 c4 = vec4(0.0f, 0.0f, 1.0f, 0.0f); // Derive frustum planes vec4 frustumPlanes[6]; // Sides //right frustumPlanes[0] = c4 - c1; //left frustumPlanes[1] = c4 + c1; //bottom frustumPlanes[2] = c4 - c2; //top frustumPlanes[3] = c4 + c2; // Near/far frustumPlanes[4] = vec4(0.0f, 0.0f, 1.0f, -minDepthZ); frustumPlanes[5] = vec4(0.0f, 0.0f, -1.0f, maxDepthZ); for(int i = 0; i < 4; i++) { frustumPlanes[i] *= 1.0f / length(frustumPlanes[i].xyz); } //DO CULLING HERE for (uint lightIndex = gl_LocalInvocationIndex; lightIndex < numActiveLights; lightIndex += WORK_GROUP_SIZE) { PointLight p = pointLights[lightIndex]; if (lightIndex < numActiveLights) { bool inFrustum = true; for (uint i = 0; i < 4; i++) { float dd = dot(frustumPlanes[i], view * vec4(p.position, 1.0f)); inFrustum = inFrustum && (dd >= -p.radius_length); } if (inFrustum) { uint id = atomicAdd(pointLightCount, 1); pointLightIndex[id] = lightIndex; } } } barrier(); 

这样做效果更好,我们的灯光现在已经被正确地清除(除了最小/最大深度,因为它还没有正确实施)对我们的瓷砖。 到目前为止,这么好,但! 我们有一个灯的边缘问题,瓷砖不覆盖整个光半径和性能godawful。 1024个灯光最多可以产生40fps的大量口吃。

这个video显示边缘发生了什么,灰色瓷砖是什么瓷砖受光影响(单点光),红色部分是阴影几何。

缩放半径使其在剔除“工作”时更大,但会使性能下降更加困难。

最终答案,解决了性能问题! 改为我的剔除循环,而不是(基于骰子在BF3中使用的循环)

 uint threadCount = WORK_GROUP_SIZE * WORK_GROUP_SIZE; uint passCount = (numActiveLights + threadCount - 1) /threadCount; for (uint passIt = 0; passIt < passCount; ++passIt) { uint lightIndex = passIt * threadCount + gl_LocalInvocationIndex; lightIndex = min(lightIndex, numActiveLights); p = pointLights[lightIndex]; pos = view * vec4(p.position, 1.0f); rad = p.radius_length; if (pointLightCount < MAX_LIGHTS_PER_TILE) { inFrustum = true; for (uint i = 3; i >= 0 && inFrustum; i--) { dist = dot(frustumPlanes[i], pos); inFrustum = (-rad <= dist); } if (inFrustum) { id = atomicAdd(pointLightCount, 1); pointLightIndex[id] = lightIndex; } } } 

我现在可以在80 fps下做4096灯,我非常高兴。

部分解决了这个问题。 这是新的select代码,适用于除了远近平面之外的所有应用程序。 性能仍然非常糟糕,所以如果任何人都可以看到什么可能会导致这将不胜感激。

  ivec2 pixel = ivec2(gl_GlobalInvocationID.xy); vec4 normalColor = imageLoad(normalDepth, pixel); float d = normalColor.w; uint depth = uint(d * 0xFFFFFFFF); atomicMin(minDepth, depth); atomicMax(maxDepth, depth); barrier(); float minDepthZ = float(minDepth / float(0xFFFFFFFF)); float maxDepthZ = float(maxDepth / float(0xFFFFFFFF)); vec2 tileScale = vec2(1280, 720) * (1.0f / float( 2 * WORK_GROUP_SIZE)); vec2 tileBias = tileScale - vec2(gl_WorkGroupID.xy); vec4 col1 = vec4(-proj[0][0] * tileScale.x, proj[0][1], tileBias.x, proj[0][3]); vec4 col2 = vec4(proj[1][0], -proj[1][1] * tileScale.y, tileBias.y, proj[1][3]); vec4 col4 = vec4(proj[3][0], proj[3][1], -1.0f, proj[3][3]); vec4 frustumPlanes[6]; //Left plane frustumPlanes[0] = col4 + col1; //right plane frustumPlanes[1] = col4 - col1; //top plane frustumPlanes[2] = col4 - col2; //bottom plane frustumPlanes[3] = col4 + col2; //near frustumPlanes[4] =vec4(0.0f, 0.0f, -1.0f, -minDepthZ); //far frustumPlanes[5] = vec4(0.0f, 0.0f, -1.0f, maxDepthZ); for(int i = 0; i < 4; i++) { frustumPlanes[i] *= 1.0f / length(frustumPlanes[i].xyz); } //DO CULLING HERE for (uint lightIndex = gl_LocalInvocationIndex; lightIndex < numActiveLights; lightIndex += WORK_GROUP_SIZE) { PointLight p = pointLights[lightIndex]; if (pointLightCount < MAX_LIGHTS_PER_TILE) { bool inFrustum = true; for (uint i = 3; i >= 0 && inFrustum; i--) { float dd = dot(frustumPlanes[i], view * vec4(p.position, 1.0f)); inFrustum = (dd >= -p.radius_length); } if (inFrustum) { uint id = atomicAdd(pointLightCount, 1); pointLightIndex[id] = lightIndex; } } } barrier(); 

在行动:

http://www.youtube.com/watch?v=8SnvYya1Jn8&feature=youtu.be