Skip to content

Commit a97d67a

Browse files
authored
Merge pull request #7 from hbb1/optim
Improve training speed by 30%~40%
2 parents 362a17a + 4074ab1 commit a97d67a

File tree

4 files changed

+342
-355
lines changed

4 files changed

+342
-355
lines changed

cuda_rasterizer/auxiliary.h

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717

1818
#define BLOCK_SIZE (BLOCK_X * BLOCK_Y)
1919
#define NUM_WARPS (BLOCK_SIZE/32)
20-
#define FilterSize 0.7071067811865476
21-
#define FilterInvSquare 1/(FilterSize*FilterSize)
2220

2321
#define TIGHTBBOX 0
2422
#define RENDER_AXUTILITY 1
@@ -27,15 +25,19 @@
2725
#define NORMAL_OFFSET 2
2826
#define MIDDEPTH_OFFSET 5
2927
#define DISTORTION_OFFSET 6
30-
#define MEDIAN_WEIGHT_OFFSET 7
28+
// #define MEDIAN_WEIGHT_OFFSET 7
3129

3230
// distortion helper macros
3331
#define BACKFACE_CULL 1
3432
#define DUAL_VISIABLE 1
35-
#define NEAR_PLANE 0.2
36-
#define FAR_PLANE 100.0
33+
// #define NEAR_PLANE 0.2
34+
// #define FAR_PLANE 100.0
3735
#define DETACH_WEIGHT 0
3836

37+
__device__ const float near_n = 0.2;
38+
__device__ const float far_n = 100.0;
39+
__device__ const float FilterInvSquare = 2.0f;
40+
3941
// Spherical harmonics coefficients
4042
__device__ const float SH_C0 = 0.28209479177387814f;
4143
__device__ const float SH_C1 = 0.4886025119029199f;
@@ -149,13 +151,35 @@ __forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv)
149151
return dnormvdv;
150152
}
151153

152-
__forceinline__ __device__ float3 crossProduct(float3 a, float3 b) {
153-
float3 result;
154-
result.x = a.y * b.z - a.z * b.y;
155-
result.y = a.z * b.x - a.x * b.z;
156-
result.z = a.x * b.y - a.y * b.x;
157-
return result;
158-
}
154+
__forceinline__ __device__ float3 cross(float3 a, float3 b){return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);}
155+
156+
__forceinline__ __device__ float3 operator*(float3 a, float3 b){return make_float3(a.x * b.x, a.y * b.y, a.z*b.z);}
157+
158+
__forceinline__ __device__ float2 operator*(float2 a, float2 b){return make_float2(a.x * b.x, a.y * b.y);}
159+
160+
__forceinline__ __device__ float3 operator*(float f, float3 a){return make_float3(f * a.x, f * a.y, f * a.z);}
161+
162+
__forceinline__ __device__ float2 operator*(float f, float2 a){return make_float2(f * a.x, f * a.y);}
163+
164+
__forceinline__ __device__ float3 operator-(float3 a, float3 b){return make_float3(a.x - b.x, a.y - b.y, a.z - b.z);}
165+
166+
__forceinline__ __device__ float2 operator-(float2 a, float2 b){return make_float2(a.x - b.x, a.y - b.y);}
167+
168+
__forceinline__ __device__ float sumf3(float3 a){return a.x + a.y + a.z;}
169+
170+
__forceinline__ __device__ float sumf2(float2 a){return a.x + a.y;}
171+
172+
__forceinline__ __device__ float3 sqrtf3(float3 a){return make_float3(sqrtf(a.x), sqrtf(a.y), sqrtf(a.z));}
173+
174+
__forceinline__ __device__ float2 sqrtf2(float2 a){return make_float2(sqrtf(a.x), sqrtf(a.y));}
175+
176+
__forceinline__ __device__ float3 minf3(float f, float3 a){return make_float3(min(f, a.x), min(f, a.y), min(f, a.z));}
177+
178+
__forceinline__ __device__ float2 minf2(float f, float2 a){return make_float2(min(f, a.x), min(f, a.y));}
179+
180+
__forceinline__ __device__ float3 maxf3(float f, float3 a){return make_float3(max(f, a.x), max(f, a.y), max(f, a.z));}
181+
182+
__forceinline__ __device__ float2 maxf2(float f, float2 a){return make_float2(max(f, a.x), max(f, a.y));}
159183

160184
__forceinline__ __device__ bool in_frustum(int idx,
161185
const float* orig_points,
@@ -258,11 +282,11 @@ quat_to_rotmat_vjp(const glm::vec4 quat, const glm::mat3 v_R) {
258282

259283

260284
inline __device__ glm::mat3
261-
scale_to_mat(const float3 scale, const float glob_scale) {
285+
scale_to_mat(const glm::vec2 scale, const float glob_scale) {
262286
glm::mat3 S = glm::mat3(1.f);
263287
S[0][0] = glob_scale * scale.x;
264288
S[1][1] = glob_scale * scale.y;
265-
S[2][2] = glob_scale * scale.z;
289+
// S[2][2] = glob_scale * scale.z;
266290
return S;
267291
}
268292

0 commit comments

Comments
 (0)