Skip to content

Commit 42e1217

Browse files
authored
Merge pull request #854 from hedaoyuan/cmrnorm
Cmrnorm
2 parents 9049369 + f1a94e3 commit 42e1217

23 files changed

+891
-469
lines changed

cmake/util.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ function(link_paddle_exe TARGET_NAME)
9696
target_circle_link_libraries(${TARGET_NAME}
9797
ARCHIVE_START
9898
paddle_gserver
99+
paddle_function
99100
${METRIC_LIBS}
100101
ARCHIVE_END
101102
paddle_pserver
@@ -106,6 +107,7 @@ function(link_paddle_exe TARGET_NAME)
106107
paddle_parameter
107108
paddle_proto
108109
paddle_cuda
110+
paddle_test_main
109111
${METRIC_LIBS}
110112
${PROTOBUF_LIBRARY}
111113
${LIBGLOG_LIBRARY}

paddle/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
add_subdirectory(cuda)
2+
add_subdirectory(function)
23
add_subdirectory(utils)
34
add_subdirectory(math)
45
add_subdirectory(parameter)

paddle/api/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ add_custom_command(OUTPUT ${PROJ_ROOT}/paddle/dist/.timestamp
4646
WORKING_DIRECTORY ${PROJ_ROOT}/paddle
4747
DEPENDS python_swig_sources
4848
paddle_parameter
49+
paddle_function
4950
paddle_math
5051
paddle_utils
5152
paddle_gserver

paddle/api/paddle_ld_flags.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@
3030
whole_end = ""
3131

3232
LIB_DIRS = [
33-
"math", 'utils', 'parameter', "gserver", "api", "cuda", "pserver",
34-
"trainer"
33+
"math", 'function', 'utils', 'parameter', "gserver", "api", "cuda",
34+
"pserver", "trainer"
3535
]
3636
PARENT_LIB_DIRS = ['proto']
3737

@@ -75,6 +75,7 @@ def libs_str(self):
7575
libs = [
7676
whole_start,
7777
"-lpaddle_gserver",
78+
"-lpaddle_function",
7879
whole_end,
7980
"-lpaddle_pserver",
8081
"-lpaddle_trainer_lib",

paddle/cuda/include/hl_cnn.h

Lines changed: 0 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -240,62 +240,6 @@ extern void hl_avgpool_backward(const int frameCnt,
240240
real* backGrad,
241241
const int outStride);
242242

243-
/**
244-
* @brief Cross-map-respose normalize forward.
245-
*
246-
* @param[in] frameCnt batch size of input image.
247-
* @param[in] in input data.
248-
* @param[in] scale buffer.
249-
* @param[out] out output data.
250-
* @param[in] channels number of channel.
251-
* @param[in] height image height.
252-
* @param[in] width image width.
253-
* @param[in] sizeX size.
254-
* @param[in] alpha scale.
255-
* @param[in] beta scale.
256-
*
257-
*/
258-
extern void hl_CMRNorm_forward(size_t frameCnt,
259-
const real* in,
260-
real* scale,
261-
real* out,
262-
size_t channels,
263-
size_t height,
264-
size_t width,
265-
size_t sizeX,
266-
real alpha,
267-
real beta);
268-
269-
/**
270-
* @brief Cross-map-respose normalize backward.
271-
*
272-
* @param[in] frameCnt batch size of input image.
273-
* @param[in] inV input data.
274-
* @param[in] scale buffer.
275-
* @param[out] outV output value.
276-
* @param[out] outDiff output grad.
277-
* @param[out] inDiff input grad.
278-
* @param[in] channels number of channel.
279-
* @param[in] height image height.
280-
* @param[in] width image width.
281-
* @param[in] sizeX size.
282-
* @param[in] alpha scale.
283-
* @param[in] beta scale.
284-
*
285-
*/
286-
extern void hl_CMRNorm_backward(size_t frameCnt,
287-
const real* inV,
288-
const real* scale,
289-
const real* outV,
290-
const real* outDiff,
291-
real* inDiff,
292-
size_t channels,
293-
size_t height,
294-
size_t width,
295-
size_t sizeX,
296-
real alpha,
297-
real beta);
298-
299243
/**
300244
* @brief Bilinear interpolation forward.
301245
*

paddle/cuda/include/stub/hl_cnn_stub.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -117,30 +117,6 @@ inline void hl_avgpool_backward(const int frameCnt,
117117
real* backGrad,
118118
const int outStride) {}
119119

120-
inline void hl_CMRNorm_forward(size_t frameCnt,
121-
const real* in,
122-
real* scale,
123-
real* out,
124-
size_t channels,
125-
size_t height,
126-
size_t width,
127-
size_t sizeX,
128-
real alpha,
129-
real beta) {}
130-
131-
inline void hl_CMRNorm_backward(size_t frameCnt,
132-
const real* inV,
133-
const real* scale,
134-
const real* outV,
135-
const real* outDiff,
136-
real* inDiff,
137-
size_t channels,
138-
size_t height,
139-
size_t width,
140-
size_t sizeX,
141-
real alpha,
142-
real beta) {}
143-
144120
inline void hl_bilinear_forward(const real* inData,
145121
const size_t inImgH,
146122
const size_t inImgW,

paddle/cuda/src/hl_cuda_cnn.cu

Lines changed: 0 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -381,164 +381,6 @@ void hl_avgpool_backward(const int frameCnt, const real* outGrad,
381381
CHECK_SYNC("hl_avgpool_backward failed");
382382
}
383383

384-
__global__ void KeCMRNormFillScale(size_t nthreads, const real* in,
385-
real* scale, size_t channels,
386-
size_t height, size_t width, size_t size,
387-
real alpha) {
388-
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
389-
if (index < nthreads) {
390-
// find out the local offset
391-
size_t w = index % width;
392-
size_t h = (index / width) % height;
393-
size_t n = index / width / height;
394-
size_t offset = (n * channels * height + h) * width + w;
395-
size_t step = height * width;
396-
in += offset;
397-
scale += offset;
398-
size_t head = 0;
399-
size_t pre_pad = (size - 1) / 2;
400-
size_t post_pad = size - pre_pad - 1;
401-
real accum_scale = 0;
402-
// fill the scale at [n, :, h, w]
403-
// accumulate values
404-
while (head < post_pad) {
405-
accum_scale += in[head * step] * in[head * step];
406-
++head;
407-
}
408-
// until we reach size, nothing needs to be subtracted
409-
while (head < size) {
410-
accum_scale += in[head * step] * in[head * step];
411-
scale[(head - post_pad) * step] = 1. + accum_scale * alpha;
412-
++head;
413-
}
414-
// both add and subtract
415-
while (head < channels) {
416-
accum_scale += in[head * step] * in[head * step];
417-
accum_scale -= in[(head - size) * step] * in[(head - size) * step];
418-
scale[(head - post_pad) * step] = 1. + accum_scale * alpha;
419-
++head;
420-
}
421-
// subtract only
422-
while (head < channels + post_pad) {
423-
accum_scale -= in[(head - size) * step] * in[(head - size) * step];
424-
scale[(head - post_pad) * step] = 1. + accum_scale * alpha;
425-
++head;
426-
}
427-
}
428-
}
429-
430-
__global__ void KeCMRNormOutput(size_t nthreads, const real* in,
431-
const real* scale, real negative_beta,
432-
real* out) {
433-
size_t index = threadIdx.x + blockIdx.x * blockDim.x;
434-
if (index < nthreads) {
435-
out[index] = in[index] * pow(scale[index], negative_beta);
436-
}
437-
}
438-
439-
void hl_CMRNorm_forward(size_t frameCnt, const real* in, real* scale,
440-
real* out, size_t channels,
441-
size_t height, size_t width, size_t sizeX,
442-
real alpha, real beta) {
443-
size_t threadsNum = frameCnt * height * width;
444-
size_t blocksX = (threadsNum + 1024 - 1) / 1024;
445-
size_t blocksY = 1;
446-
dim3 threads(1024, 1);
447-
dim3 grid(blocksX, blocksY);
448-
449-
KeCMRNormFillScale<<<grid, threads, 0, STREAM_DEFAULT>>>
450-
(threadsNum, in, scale, channels, height, width, sizeX, alpha);
451-
452-
threadsNum = frameCnt * height * width *channels;
453-
blocksX = (threadsNum + 1024 -1) / 1024;
454-
dim3 threads2(1024, 1);
455-
dim3 grid2(blocksX, blocksY);
456-
KeCMRNormOutput<<<grid2, threads2, 0, STREAM_DEFAULT>>>
457-
(threadsNum, in, scale, beta, out);
458-
CHECK_SYNC("hl_CMRNorm_forward");
459-
}
460-
461-
__global__ void KeCMRNormDiff(size_t nthreads, const real* bottom_data,
462-
const real* top_data, const real* scale,
463-
const real* top_diff, size_t channels,
464-
size_t height, size_t width, size_t size,
465-
real negative_beta, real cache_ratio,
466-
real* bottom_diff ) {
467-
int index = threadIdx.x + blockIdx.x * blockDim.x;
468-
if (index < nthreads) {
469-
// find out the local offset
470-
size_t w = index % width;
471-
size_t h = (index / width) % height;
472-
size_t n = index / width / height;
473-
size_t offset = (n * channels * height + h) * width + w;
474-
size_t step = height * width;
475-
bottom_data += offset;
476-
top_data += offset;
477-
scale += offset;
478-
top_diff += offset;
479-
bottom_diff += offset;
480-
int head = 0;
481-
int pre_pad = size - (size + 1) / 2;
482-
int post_pad = size - pre_pad - 1;
483-
real accum_ratio = 0;
484-
// accumulate values
485-
while (head < post_pad) {
486-
accum_ratio += top_diff[head * step] *
487-
top_data[head * step] / scale[head * step];
488-
++head;
489-
}
490-
// until we reach size, nothing needs to be subtracted
491-
while (head < size) {
492-
accum_ratio += top_diff[head * step] *
493-
top_data[head * step] / scale[head * step];
494-
bottom_diff[(head - post_pad) * step] +=
495-
top_diff[(head - post_pad) * step] *
496-
pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
497-
bottom_data[(head - post_pad) * step] * accum_ratio;
498-
++head;
499-
}
500-
// both add and subtract
501-
while (head < channels) {
502-
accum_ratio += top_diff[head * step] * top_data[head * step] /
503-
scale[head * step];
504-
accum_ratio -= top_diff[(head - size) * step] *
505-
top_data[(head - size) * step] / scale[(head - size) * step];
506-
bottom_diff[(head - post_pad) * step] +=
507-
top_diff[(head - post_pad) * step] *
508-
pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
509-
bottom_data[(head - post_pad) * step] * accum_ratio;
510-
++head;
511-
}
512-
// subtract only
513-
while (head < channels + post_pad) {
514-
accum_ratio -= top_diff[(head - size) * step] *
515-
top_data[(head - size) * step] / scale[(head - size) * step];
516-
bottom_diff[(head - post_pad) * step] +=
517-
top_diff[(head - post_pad) * step] *
518-
pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
519-
bottom_data[(head - post_pad) * step] * accum_ratio;
520-
++head;
521-
}
522-
}
523-
}
524-
525-
void hl_CMRNorm_backward(size_t frameCnt, const real* inV,
526-
const real* scale,
527-
const real* outV, const real* outDiff,
528-
real *inDiff, size_t channels,
529-
size_t height, size_t width, size_t sizeX,
530-
real alpha, real beta) {
531-
size_t threadsNum = frameCnt * height * width;
532-
size_t blocksX = (threadsNum + 1024 - 1) / 1024;
533-
size_t blocksY = 1;
534-
dim3 threads(1024, 1);
535-
dim3 grid(blocksX, blocksY);
536-
KeCMRNormDiff <<<grid, threads, 0, STREAM_DEFAULT>>>
537-
(threadsNum, inV, outV, scale, outDiff, channels,
538-
height, width, sizeX, alpha, beta, inDiff);
539-
CHECK_SYNC("hl_CMRNorm_backward");
540-
}
541-
542384
__global__ void KeBilinearInterpFw(const real* in,
543385
const size_t inImgH,
544386
const size_t inImgW,

paddle/function/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
file(GLOB h_files . *_op.h)
2+
file(GLOB cpp_files . *_op.cpp)
3+
4+
list(APPEND h_files Function.h)
5+
list(APPEND cpp_files Function.cpp)
6+
7+
if(WITH_GPU)
8+
file(GLOB cu_files . *_op_gpu.cu)
9+
cuda_compile(cu_objs ${cu_files})
10+
endif()
11+
12+
add_library(paddle_function STATIC ${cpp_files} ${cu_objs})
13+
14+
add_library(paddle_test_main STATIC TestMain.cpp)
15+
16+
if(WITH_GPU)
17+
# TODO:
18+
# file(GLOB test_files . *_op_test.cpp)
19+
# add_executable(${test_bin} EXCLUDE_FROM_ALL ${test_files})
20+
add_simple_unittest(cross_map_normal_op_test)
21+
endif()
22+
23+
add_style_check_target(paddle_function ${h_files})
24+
add_style_check_target(paddle_function ${cpp_files})
25+
if(WITH_GPU)
26+
add_style_check_target(paddle_function ${cu_files})
27+
endif()

paddle/function/Function.cpp

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "Function.h"
16+
17+
namespace paddle {
18+
19+
template <>
20+
size_t FuncConfig::get<size_t>(const std::string& key) const {
21+
auto it = valueMap_.find(key);
22+
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
23+
return it->second.s;
24+
}
25+
26+
template <>
27+
real FuncConfig::get<real>(const std::string& key) const {
28+
auto it = valueMap_.find(key);
29+
CHECK(it != valueMap_.end()) << "Cannot find value: '" << key << "'";
30+
return it->second.r;
31+
}
32+
33+
template <>
34+
FuncConfig& FuncConfig::set<size_t>(const std::string& key, size_t v) {
35+
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
36+
valueMap_[key].s = v;
37+
return *this;
38+
}
39+
40+
template <>
41+
FuncConfig& FuncConfig::set<real>(const std::string& key, real v) {
42+
CHECK(valueMap_.count(key) == 0) << "Duplicated value: " << key;
43+
valueMap_[key].r = v;
44+
return *this;
45+
}
46+
47+
ClassRegistrar<FunctionBase> FunctionBase::funcRegistrar_;
48+
49+
} // namespace paddle

0 commit comments

Comments
 (0)