From a7ac4e19a52093a83fba07dca42360c2cf21c985 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 25 May 2018 13:09:54 -0400 Subject: [PATCH 01/11] Modify 5x5 median to handle edges like the original LA Cosmic median in IRAF, extending the window using nearest edge pixel values, rather than just leaving them unfiltered (which led to CR residuals at edges compared with IRAF). --- astroscrappy/utils/medutils.c | 95 +++++++++++++++++++++++++++-------- 1 file changed, 75 insertions(+), 20 deletions(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index 72e0e33..ac3fb97 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -478,18 +478,18 @@ PyMedFilt5(float* data, float* output, int nx, int ny) "PyMedFilt5(data, output, nx, ny) -> void\n\n" "Calculate the 5x5 median filter on an array data with dimensions " "nx x ny. The results are saved in the output array. The output " - "array should already be allocated as we work on it in place. The " - "median filter is not calculated for a 2 pixel border around the " - "image. These pixel values are copied from the input data. Note " - "that the data array needs to be striped in the x direction such " - "that pixel i,j has memory location data[i + nx * j]"); + "array should already be allocated as we work on it in place. " + "At each border of the image, the median window is completed by " + "replicating the edge row or column. Note that the data array " + "needs to be striped in the x direction such that pixel i,j has " + "memory location data[i + nx * j]"); /*Total size of the array */ int nxny = nx * ny; /* Loop indices */ int i, j, nxj; - int k, l, nxk; + int k, l, nxk, ipl, rci; /* 25 element array to calculate the median and a counter index. Note that * these both need to be unique for each thread so they both need to be @@ -497,6 +497,10 @@ PyMedFilt5(float* data, float* output, int nx, int ny) float* medarr; int medcounter; + /* End rows & columns for special treatment */ + int cols[] = {0, 1, nx-2, nx-1}; + int rows[] = {0, 1, ny-2, ny-1}; + /* Each thread needs to access the data and the output so we make them * firstprivate. We make sure that our algorithm doesn't have multiple * threads read or write the same piece of memory. */ @@ -529,22 +533,73 @@ PyMedFilt5(float* data, float* output, int nx, int ny) free(medarr); } -#pragma omp parallel firstprivate(output, data, nx, nxny) private(i) - /* Copy the border pixels from the original data into the output array */ - for (i = 0; i < nx; i++) { - output[i] = data[i]; - output[i + nx] = data[i + nx]; - output[nxny - nx + i] = data[nxny - nx + i]; - output[nxny - nx - nx + i] = data[nxny - nx - nx + i]; +#pragma omp parallel firstprivate(output, data, nx, ny, rows) \ + private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) + { + /*Each thread allocates its own array. */ + medarr = (float *) malloc(25 * sizeof(float)); + + /* Pad the input array with the nearest value for border pixels (as + in the original IRAF algorithm). I'm not sure how thread-optimized + this is after my changes but it only happens for a small number of + rows/columns - JT. + */ +#pragma omp for nowait + for (rci = 0; rci < 4; rci++) { + j = rows[rci]; + nxj = nx * j; + for (i = 0; i < nx; i++) { + medcounter = 0; + for (k = -2; k < 3; k++) { + if (j+k < 0) nxk = -nxj; + else if (j+k >= ny) nxk = nx * (ny-j-1); + else nxk = nx * k; + for (l = -2; l < 3; l++) { + ipl = i+l; + if (ipl < 1) ipl = 0; + else if (ipl >= nx) ipl = nx-1; + medarr[medcounter] = data[nxj + nxk + ipl]; + medcounter++; + } + } + output[nxj + i] = PyOptMed25(medarr); + } + } + /* Each thread needs to free its own copy of medarr */ + free(medarr); } -#pragma omp parallel firstprivate(output, data, nx, ny) private(j, nxj) - for (j = 0; j < ny; j++) { - nxj = nx * j; - output[nxj] = data[nxj]; - output[nxj + 1] = data[nxj + 1]; - output[nxj + nx - 1] = data[nxj + nx - 1]; - output[nxj + nx - 2] = data[nxj + nx - 2]; +#pragma omp parallel firstprivate(output, data, nx, ny, cols) \ + private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) + { + /*Each thread allocates its own array. */ + medarr = (float *) malloc(25 * sizeof(float)); + +#pragma omp for nowait + for (j = 0; j < ny; j++) { + nxj = nx * j; + + for (rci = 0; rci < 4; rci++) { + i = cols[rci]; + medcounter = 0; + + for (k = -2; k < 3; k++) { + if (j+k < 0) nxk = -nxj; + else if (j+k >= ny) nxk = nx * (ny-j-1); + else nxk = nx * k; + for (l = -2; l < 3; l++) { + ipl = i+l; + if (ipl < 1) ipl = 0; + else if (ipl >= nx) ipl = nx-1; + medarr[medcounter] = data[nxj + nxk + ipl]; + medcounter++; + } + } + output[nxj + i] = PyOptMed25(medarr); + } + } + /* Each thread needs to free its own copy of medarr */ + free(medarr); } return; From e065e1f27e90c628e7eda3868198351305a7b54e Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 25 May 2018 17:31:23 -0400 Subject: [PATCH 02/11] Fix test_medfilt5 to go with the last change to PyMedFilt5 (filtering at edges). --- astroscrappy/tests/test_utils.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/astroscrappy/tests/test_utils.py b/astroscrappy/tests/test_utils.py index 08feb33..9fd001a 100644 --- a/astroscrappy/tests/test_utils.py +++ b/astroscrappy/tests/test_utils.py @@ -58,10 +58,6 @@ def test_medfilt3(): def test_medfilt5(): a = np.ascontiguousarray(np.random.random((1001, 1001))).astype('f4') npmed5 = ndimage.filters.median_filter(a, size=(5, 5), mode='nearest') - npmed5[:2, :] = a[:2, :] - npmed5[-2:, :] = a[-2:, :] - npmed5[:, :2] = a[:, :2] - npmed5[:, -2:] = a[:, -2:] med5 = medfilt5(a) assert np.all(med5 == npmed5) From 3fba3de9bfb44bf87f405eace29f57078cb6276a Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 25 May 2018 17:44:21 -0400 Subject: [PATCH 03/11] Remove TABs added by EMACS. --- astroscrappy/utils/medutils.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index ac3fb97..aca713f 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -555,21 +555,21 @@ PyMedFilt5(float* data, float* output, int nx, int ny) else if (j+k >= ny) nxk = nx * (ny-j-1); else nxk = nx * k; for (l = -2; l < 3; l++) { - ipl = i+l; + ipl = i+l; if (ipl < 1) ipl = 0; - else if (ipl >= nx) ipl = nx-1; + else if (ipl >= nx) ipl = nx-1; medarr[medcounter] = data[nxj + nxk + ipl]; medcounter++; } } output[nxj + i] = PyOptMed25(medarr); - } + } } /* Each thread needs to free its own copy of medarr */ free(medarr); } -#pragma omp parallel firstprivate(output, data, nx, ny, cols) \ +#pragma omp parallel firstprivate(output, data, nx, ny, cols) \ private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) { /*Each thread allocates its own array. */ @@ -588,15 +588,15 @@ PyMedFilt5(float* data, float* output, int nx, int ny) else if (j+k >= ny) nxk = nx * (ny-j-1); else nxk = nx * k; for (l = -2; l < 3; l++) { - ipl = i+l; + ipl = i+l; if (ipl < 1) ipl = 0; - else if (ipl >= nx) ipl = nx-1; + else if (ipl >= nx) ipl = nx-1; medarr[medcounter] = data[nxj + nxk + ipl]; medcounter++; } } output[nxj + i] = PyOptMed25(medarr); - } + } } /* Each thread needs to free its own copy of medarr */ free(medarr); From 7833e6a80e25394154041e07ea84c3cd2af3d180 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Mon, 28 May 2018 11:42:25 -0400 Subject: [PATCH 04/11] Move the loops over edge rows & columns into the main parallelized block, avoiding repeated setup. --- astroscrappy/utils/medutils.c | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index aca713f..7410040 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -504,8 +504,8 @@ PyMedFilt5(float* data, float* output, int nx, int ny) /* Each thread needs to access the data and the output so we make them * firstprivate. We make sure that our algorithm doesn't have multiple * threads read or write the same piece of memory. */ -#pragma omp parallel firstprivate(output, data, nx, ny) \ - private(i, j, k, l, medarr, nxj, nxk, medcounter) +#pragma omp parallel firstprivate(output, data, nx, ny, rows) \ + private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) { /*Each thread allocates its own array. */ medarr = (float *) malloc(25 * sizeof(float)); @@ -529,21 +529,14 @@ PyMedFilt5(float* data, float* output, int nx, int ny) output[nxj + i] = PyOptMed25(medarr); } } - /* Each thread needs to free its own copy of medarr */ - free(medarr); - } - -#pragma omp parallel firstprivate(output, data, nx, ny, rows) \ - private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) - { - /*Each thread allocates its own array. */ - medarr = (float *) malloc(25 * sizeof(float)); - /* Pad the input array with the nearest value for border pixels (as - in the original IRAF algorithm). I'm not sure how thread-optimized - this is after my changes but it only happens for a small number of - rows/columns - JT. + /* Now pad the input array with the nearest value for border pixels + (as in the original IRAF algorithm). I'm not sure how + thread-optimized this is after my changes but it only happens for + a small number of rows/columns - JT. */ + + /* Border rows: */ #pragma omp for nowait for (rci = 0; rci < 4; rci++) { j = rows[rci]; @@ -565,16 +558,8 @@ PyMedFilt5(float* data, float* output, int nx, int ny) output[nxj + i] = PyOptMed25(medarr); } } - /* Each thread needs to free its own copy of medarr */ - free(medarr); - } - -#pragma omp parallel firstprivate(output, data, nx, ny, cols) \ - private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) - { - /*Each thread allocates its own array. */ - medarr = (float *) malloc(25 * sizeof(float)); + /* Border columns: */ #pragma omp for nowait for (j = 0; j < ny; j++) { nxj = nx * j; @@ -598,6 +583,7 @@ PyMedFilt5(float* data, float* output, int nx, int ny) output[nxj + i] = PyOptMed25(medarr); } } + /* Each thread needs to free its own copy of medarr */ free(medarr); } From 3097181a9f1f59eb7a3843d54ed1cc1639d1b710 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Mon, 28 May 2018 21:15:29 -0400 Subject: [PATCH 05/11] Re-implement 5x5 median filt using NxN filt (with appropriate back end median). This seems about 8% slower than the 5x5-specific version, probably because the compiler can't optimize the loops (as per the existing comment), but the more general NxN version may be useful anyway. --- astroscrappy/utils/medutils.c | 277 +++++++++++++++++++++------------- 1 file changed, 173 insertions(+), 104 deletions(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index 7410040..36039ff 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -484,111 +484,8 @@ PyMedFilt5(float* data, float* output, int nx, int ny) "needs to be striped in the x direction such that pixel i,j has " "memory location data[i + nx * j]"); - /*Total size of the array */ - int nxny = nx * ny; - - /* Loop indices */ - int i, j, nxj; - int k, l, nxk, ipl, rci; - - /* 25 element array to calculate the median and a counter index. Note that - * these both need to be unique for each thread so they both need to be - * private and we wait to allocate memory until the pragma below. */ - float* medarr; - int medcounter; - - /* End rows & columns for special treatment */ - int cols[] = {0, 1, nx-2, nx-1}; - int rows[] = {0, 1, ny-2, ny-1}; - - /* Each thread needs to access the data and the output so we make them - * firstprivate. We make sure that our algorithm doesn't have multiple - * threads read or write the same piece of memory. */ -#pragma omp parallel firstprivate(output, data, nx, ny, rows) \ - private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) - { - /*Each thread allocates its own array. */ - medarr = (float *) malloc(25 * sizeof(float)); - - /* Go through each pixel excluding the border.*/ -#pragma omp for nowait - for (j = 2; j < ny - 2; j++) { - /* Precalculate the multiplication nx * j, minor optimization */ - nxj = nx * j; - for (i = 2; i < nx - 2; i++) { - medcounter = 0; - /* The compiler should optimize away these loops */ - for (k = -2; k < 3; k++) { - nxk = nx * k; - for (l = -2; l < 3; l++) { - medarr[medcounter] = data[nxj + i + nxk + l]; - medcounter++; - } - } - /* Calculate the median in the fastest way possible */ - output[nxj + i] = PyOptMed25(medarr); - } - } - - /* Now pad the input array with the nearest value for border pixels - (as in the original IRAF algorithm). I'm not sure how - thread-optimized this is after my changes but it only happens for - a small number of rows/columns - JT. - */ + return PyMedFiltN(data, output, nx, ny, 5); - /* Border rows: */ -#pragma omp for nowait - for (rci = 0; rci < 4; rci++) { - j = rows[rci]; - nxj = nx * j; - for (i = 0; i < nx; i++) { - medcounter = 0; - for (k = -2; k < 3; k++) { - if (j+k < 0) nxk = -nxj; - else if (j+k >= ny) nxk = nx * (ny-j-1); - else nxk = nx * k; - for (l = -2; l < 3; l++) { - ipl = i+l; - if (ipl < 1) ipl = 0; - else if (ipl >= nx) ipl = nx-1; - medarr[medcounter] = data[nxj + nxk + ipl]; - medcounter++; - } - } - output[nxj + i] = PyOptMed25(medarr); - } - } - - /* Border columns: */ -#pragma omp for nowait - for (j = 0; j < ny; j++) { - nxj = nx * j; - - for (rci = 0; rci < 4; rci++) { - i = cols[rci]; - medcounter = 0; - - for (k = -2; k < 3; k++) { - if (j+k < 0) nxk = -nxj; - else if (j+k >= ny) nxk = nx * (ny-j-1); - else nxk = nx * k; - for (l = -2; l < 3; l++) { - ipl = i+l; - if (ipl < 1) ipl = 0; - else if (ipl >= nx) ipl = nx-1; - medarr[medcounter] = data[nxj + nxk + ipl]; - medcounter++; - } - } - output[nxj + i] = PyOptMed25(medarr); - } - } - - /* Each thread needs to free its own copy of medarr */ - free(medarr); - } - - return; } /* Calculate the 7x7 median filter of an array data that has dimensions @@ -1201,3 +1098,175 @@ PySepMedFilt9(float* data, float* output, int nx, int ny) return; } + +/* NxN median filter, with replicated edges. */ +void +PyMedFiltN(float* data, float* output, int nx, int ny, int wsize) +{ + PyDoc_STRVAR(PyMedFiltN__doc__, + "PyMedFiltN(data, output, nx, ny, wsize) -> void\n\n" + "Calculate the NxN median filter on an array with dimensions " + "nx x ny, using a window size of N=wsize (which must be an odd " + "number). The results are saved in the output array. The output " + "array should already be allocated, as we work on it in place. At " + "each border of the image, the median window is completed by " + "replicating the edge row or column. Note that the data array " + "needs to be striped in the x direction such that pixel i,j has " + "memory location data[i + nx * j]."); + + /*Total size of the array */ + int nxny = nx * ny; + + /* Loop indices */ + int i, j, nxj; + int k, l, nxk, ipl, rci; + int wstart, wend; + + /* N^2 element array to calculate the median and a counter index. Note that + * these both need to be unique for each thread so they both need to be + * private and we wait to allocate memory until the pragma below. */ + float* medarr; + int medcounter; + + /* Pointer to function that calculates median for a length N^2 array */ + float (*medfunc)(); + int medarg2; + + /* Rows/columns at each edge where window extends past image */ + int nedgerc = wsize-1; + int n1edge = nedgerc / 2; + int *rows, *cols; + + rows = (int *) malloc(nedgerc * sizeof(int)); + cols = (int *) malloc(nedgerc * sizeof(int)); + + /* Calculate indices of edge rows/columns: */ + for (rci = 0; rci < n1edge; rci++) { + rows[rci] = rci; + cols[rci] = rci; + rows[nedgerc-rci-1] = ny-rci-1; + cols[nedgerc-rci-1] = nx-rci-1; + } + + /* Select appropriate function to use for determining median */ + switch (wsize) { + case 3: + medfunc = PyOptMed9; + medarg2 = 0; + break; + case 5: + medfunc = PyOptMed25; + medarg2 = 0; + break; + default: + medfunc = PyMedian; + medarg2 = wsize*wsize; + } + + /* Start/end offsets for sliding window (is this significantly faster than + using n1edge directly below?) + */ + wstart = -n1edge; + wend = n1edge + 1; + + /* Each thread needs to access the data and the output so we make them + * firstprivate. We make sure that our algorithm doesn't have multiple + * threads read or write the same piece of memory. */ +#pragma omp parallel firstprivate(output, data, nx, ny, rows) \ + private(i, j, k, l, ipl, rci, medarr, nxj, nxk, medcounter) + { + /*Each thread allocates its own array. */ + medarr = (float *) malloc(wsize * wsize * sizeof(float)); + + /* Go through each pixel excluding the border.*/ +#pragma omp for nowait + for (j = n1edge; j < ny - n1edge; j++) { + /* Precalculate the multiplication nx * j, minor optimization */ + nxj = nx * j; + for (i = n1edge; i < nx - n1edge; i++) { + medcounter = 0; + /* The compiler should optimize away these loops */ + for (k = wstart; k < wend; k++) { + nxk = nx * k; + for (l = wstart; l < wend; l++) { + medarr[medcounter] = data[nxj + i + nxk + l]; + medcounter++; + } + } + /* Calculate the median in the fastest way possible */ + if (medarg2) + output[nxj + i] = medfunc(medarr, medarg2); + else + output[nxj + i] = medfunc(medarr); + } + } + + /* Now pad the input array with the nearest value for border pixels + (as in the original IRAF algorithm). I'm not sure how + thread-optimized this is after my changes but it only happens for + a small number of rows/columns - JT. + */ + + /* Border rows: */ +#pragma omp for nowait + for (rci = 0; rci < nedgerc; rci++) { + j = rows[rci]; + nxj = nx * j; + for (i = 0; i < nx; i++) { + medcounter = 0; + for (k = wstart; k < wend; k++) { + if (j+k < 0) nxk = -nxj; + else if (j+k >= ny) nxk = nx * (ny-j-1); + else nxk = nx * k; + for (l = wstart; l < wend; l++) { + ipl = i+l; + if (ipl < 1) ipl = 0; + else if (ipl >= nx) ipl = nx-1; + medarr[medcounter] = data[nxj + nxk + ipl]; + medcounter++; + } + } + if (medarg2) + output[nxj + i] = medfunc(medarr, medarg2); + else + output[nxj + i] = medfunc(medarr); + } + } + + /* Border columns: */ +#pragma omp for nowait + for (j = 0; j < ny; j++) { + nxj = nx * j; + + for (rci = 0; rci < nedgerc; rci++) { + i = cols[rci]; + medcounter = 0; + + for (k = wstart; k < wend; k++) { + if (j+k < 0) nxk = -nxj; + else if (j+k >= ny) nxk = nx * (ny-j-1); + else nxk = nx * k; + for (l = wstart; l < wend; l++) { + ipl = i+l; + if (ipl < 1) ipl = 0; + else if (ipl >= nx) ipl = nx-1; + medarr[medcounter] = data[nxj + nxk + ipl]; + medcounter++; + } + } + if (medarg2) + output[nxj + i] = medfunc(medarr, medarg2); + else + output[nxj + i] = medfunc(medarr); + } + } + + /* Each thread needs to free its own copy of medarr */ + free(medarr); + } + + free(rows); + free(cols); + + return; +} From 99f38e28cea43d3e87a4bce237b0352642a47cdf Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Tue, 29 May 2018 12:40:40 -0400 Subject: [PATCH 06/11] Address compiler return-type warning (I hope). --- astroscrappy/utils/medutils.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index 36039ff..03bd5a4 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -484,7 +484,9 @@ PyMedFilt5(float* data, float* output, int nx, int ny) "needs to be striped in the x direction such that pixel i,j has " "memory location data[i + nx * j]"); - return PyMedFiltN(data, output, nx, ny, 5); + PyMedFiltN(data, output, nx, ny, 5); + + return; } From b293c6f45ccfbc338e9bfc9ea66f57a8c2d4a449 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Tue, 29 May 2018 16:06:28 -0400 Subject: [PATCH 07/11] Add function prototype for new PyMedFiltN in the corresponding header file. Hmmm, all this Python & IRAF has made my C rather rusty... --- astroscrappy/utils/medutils.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/astroscrappy/utils/medutils.h b/astroscrappy/utils/medutils.h index b7aa72b..e25f267 100644 --- a/astroscrappy/utils/medutils.h +++ b/astroscrappy/utils/medutils.h @@ -77,6 +77,17 @@ PyMedFilt5(float* data, float* output, int nx, int ny); void PyMedFilt7(float* data, float* output, int nx, int ny); +/* Calculate the NxN median filter on an array with dimensions nx x ny, + * using a window size of N=wsize (which must be an odd number). The + * results are saved in the output array. The output array should already + * be allocated, as we work on it in place. At each border of the image, + * the median window is completed by replicating the edge row or + * column. Note that the data array needs to be striped in the x direction + * such that pixel i,j has memory location data[i + nx * j]. + */ +void +PyMedFiltN(float* data, float* output, int nx, int ny, int wsize); + /* Calculate the 3x3 separable median filter of an array data that has * dimensions nx x ny. The results are saved in the output array. The output * array should already be allocated as we work on it in place. The median From 32c57b70ed302b46f0683918d6e67d26a288d3f5 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 8 Jun 2018 16:40:25 -0400 Subject: [PATCH 08/11] Use new PyMedFiltN in the 3x3 and 7x7 medians, as well as 5x5, to replicate edge pixels instead of not filtering them. --- astroscrappy/utils/medutils.c | 125 +--------------------------------- 1 file changed, 2 insertions(+), 123 deletions(-) diff --git a/astroscrappy/utils/medutils.c b/astroscrappy/utils/medutils.c index 03bd5a4..df2501d 100644 --- a/astroscrappy/utils/medutils.c +++ b/astroscrappy/utils/medutils.c @@ -402,63 +402,7 @@ PyMedFilt3(float* data, float* output, int nx, int ny) "that the data array needs to be striped in the x direction such " "that pixel i,j has memory location data[i + nx * j]"); - /*Total size of the array */ - int nxny = nx * ny; - - /* Loop indices */ - int i, j, nxj; - int k, l, nxk; - - /* 9 element array to calculate the median and a counter index. Note that - * these both need to be unique for each thread so they both need to be - * private and we wait to allocate memory until the pragma below.*/ - float* medarr; - int medcounter; - - /* Each thread needs to access the data and the output so we make them - * firstprivate. We make sure that our algorithm doesn't have multiple - * threads read or write the same piece of memory. */ -#pragma omp parallel firstprivate(output, data, nx, ny) \ - private(i, j, k, l, medarr, nxj, nxk, medcounter) - { - /*Each thread allocates its own array. */ - medarr = (float *) malloc(9 * sizeof(float)); - - /* Go through each pixel excluding the border.*/ -#pragma omp for nowait - for (j = 1; j < ny - 1; j++) { - /* Precalculate the multiplication nx * j, minor optimization */ - nxj = nx * j; - for (i = 1; i < nx - 1; i++) { - medcounter = 0; - /* The compiler should optimize away these loops */ - for (k = -1; k < 2; k++) { - nxk = nx * k; - for (l = -1; l < 2; l++) { - medarr[medcounter] = data[nxj + i + nxk + l]; - medcounter++; - } - } - /* Calculate the median in the fastest way possible */ - output[nxj + i] = PyOptMed9(medarr); - } - } - /* Each thread needs to free its own copy of medarr */ - free(medarr); - } - -#pragma omp parallel firstprivate(output, data, nx, nxny) private(i) - /* Copy the border pixels from the original data into the output array */ - for (i = 0; i < nx; i++) { - output[i] = data[i]; - output[nxny - nx + i] = data[nxny - nx + i]; - } -#pragma omp parallel firstprivate(output, data, nx, ny) private(j, nxj) - for (j = 0; j < ny; j++) { - nxj = nx * j; - output[nxj] = data[nxj]; - output[nxj + nx - 1] = data[nxj + nx - 1]; - } + PyMedFiltN(data, output, nx, ny, 3); return; } @@ -511,72 +455,7 @@ PyMedFilt7(float* data, float* output, int nx, int ny) "that the data array needs to be striped in the x direction such " "that pixel i,j has memory location data[i + nx * j]"); - /*Total size of the array */ - int nxny = nx * ny; - - /* Loop indices */ - int i, j, nxj; - int k, l, nxk; - - /* 49 element array to calculate the median and a counter index. Note that - * these both need to be unique for each thread so they both need to be - * private and we wait to allocate memory until the pragma below. */ - float* medarr; - int medcounter; - - /* Each thread needs to access the data and the output so we make them - * firstprivate. We make sure that our algorithm doesn't have multiple - * threads read or write the same piece of memory. */ -#pragma omp parallel firstprivate(output, data, nx, ny) \ - private(i, j, k, l, medarr, nxj, nxk, medcounter) - { - /*Each thread allocates its own array. */ - medarr = (float *) malloc(49 * sizeof(float)); - - /* Go through each pixel excluding the border.*/ -#pragma omp for nowait - for (j = 3; j < ny - 3; j++) { - /* Precalculate the multiplication nx * j, minor optimization */ - nxj = nx * j; - for (i = 3; i < nx - 3; i++) { - medcounter = 0; - /* The compiler should optimize away these loops */ - for (k = -3; k < 4; k++) { - nxk = nx * k; - for (l = -3; l < 4; l++) { - medarr[medcounter] = data[nxj + i + nxk + l]; - medcounter++; - } - } - /* Calculate the median in the fastest way possible */ - output[nxj + i] = PyMedian(medarr, 49); - } - } - /* Each thread needs to free its own copy of medarr */ - free(medarr); - } - -#pragma omp parallel firstprivate(output, data, nx, nxny) private(i) - /* Copy the border pixels from the original data into the output array */ - for (i = 0; i < nx; i++) { - output[i] = data[i]; - output[i + nx] = data[i + nx]; - output[i + nx + nx] = data[i + nx + nx]; - output[nxny - nx + i] = data[nxny - nx + i]; - output[nxny - nx - nx + i] = data[nxny - nx - nx + i]; - output[nxny - nx - nx - nx + i] = data[nxny - nx - nx - nx + i]; - } - -#pragma omp parallel firstprivate(output, data, nx, ny) private(j, nxj) - for (j = 0; j < ny; j++) { - nxj = nx * j; - output[nxj] = data[nxj]; - output[nxj + 1] = data[nxj + 1]; - output[nxj + 2] = data[nxj + 2]; - output[nxj + nx - 1] = data[nxj + nx - 1]; - output[nxj + nx - 2] = data[nxj + nx - 2]; - output[nxj + nx - 3] = data[nxj + nx - 3]; - } + PyMedFiltN(data, output, nx, ny, 7); return; } From fc8a58b5038ff69ad92b6d5db3f79da7d10ee0b1 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 8 Jun 2018 17:07:20 -0400 Subject: [PATCH 09/11] Update 3x3 and 7x7 median tests now that they're filtering edge pixels. --- astroscrappy/tests/test_utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/astroscrappy/tests/test_utils.py b/astroscrappy/tests/test_utils.py index 9fd001a..d65e34f 100644 --- a/astroscrappy/tests/test_utils.py +++ b/astroscrappy/tests/test_utils.py @@ -46,10 +46,6 @@ def test_optmed25(): def test_medfilt3(): a = np.ascontiguousarray(np.random.random((1001, 1001))).astype('f4') npmed3 = ndimage.filters.median_filter(a, size=(3, 3), mode='nearest') - npmed3[:1, :] = a[:1, :] - npmed3[-1:, :] = a[-1:, :] - npmed3[:, :1] = a[:, :1] - npmed3[:, -1:] = a[:, -1:] med3 = medfilt3(a) assert np.all(med3 == npmed3) @@ -66,10 +62,6 @@ def test_medfilt5(): def test_medfilt7(): a = np.ascontiguousarray(np.random.random((1001, 1001))).astype('f4') npmed7 = ndimage.filters.median_filter(a, size=(7, 7), mode='nearest') - npmed7[:3, :] = a[:3, :] - npmed7[-3:, :] = a[-3:, :] - npmed7[:, :3] = a[:, :3] - npmed7[:, -3:] = a[:, -3:] med7 = medfilt7(a) assert np.all(med7 == npmed7) From bfb4ca912ed8d51919ce52598ac6f08c344d7d5e Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 8 Jun 2018 16:37:29 -0400 Subject: [PATCH 10/11] Don't ignore edge rows/columns in clean_medmask, replicate them to make up the median window. --- astroscrappy/astroscrappy.pyx | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/astroscrappy/astroscrappy.pyx b/astroscrappy/astroscrappy.pyx index b0a6f06..b4f4214 100644 --- a/astroscrappy/astroscrappy.pyx +++ b/astroscrappy/astroscrappy.pyx @@ -483,7 +483,8 @@ cdef void clean_medmask(float[:, ::1] cleanarr, bool[:, ::1] crmask, bool[:, ::1] mask, int nx, int ny, float background_level): """clean_medmask(cleanarr, crmask, mask, nx, ny, background_level)\n - Clean the bad pixels in cleanarr using a 5x5 masked median filter. + Clean the bad pixels in cleanarr using a 5x5 masked median filter, with + edge rows/columns replicated to complete the window. Parameters ---------- @@ -509,27 +510,33 @@ cdef void clean_medmask(float[:, ::1] cleanarr, bool[:, ::1] crmask, Average value of the background. This value will be used if there are no good pixels in a 5x5 region. """ - # Go through all of the pixels, ignore the borders - cdef int k, l, i, j, numpix + # Go through all of the pixels + cdef int k, l, i, j, i1, j1, numpix cdef float * medarr cdef bool badpixel # For each pixel with nogil, parallel(): medarr = < float * > malloc(25 * sizeof(float)) - for j in prange(2, ny - 2): - for i in range(2, nx - 2): + for j in prange(0, ny): + for i in range(0, nx): # if the pixel is in the crmask if crmask[j, i]: numpix = 0 # median the 25 pixels around the pixel ignoring # any pixels that are masked for l in range(-2, 3): + j1 = j + l + if (j1 < 0): j1 = 0 + elif (j1 > ny): j1 = ny for k in range(-2, 3): - badpixel = crmask[j + l, i + k] - badpixel = badpixel or mask[j + l, i + k] + i1 = i + k + if (i1 < 0): i1 = 0 + elif (i1 > nx): i1 = nx + badpixel = crmask[j1, i1] + badpixel = badpixel or mask[j1, i1] if not badpixel: - medarr[numpix] = cleanarr[j + l, i + k] + medarr[numpix] = cleanarr[j1, i1] numpix = numpix + 1 # if the pixels count is 0 then put in the background From 4876fc2728574d3db7776770924396faf0d84d70 Mon Sep 17 00:00:00 2001 From: "James E.H. Turner" Date: Fri, 8 Jun 2018 18:29:57 -0400 Subject: [PATCH 11/11] Oops, index the end limit of the pixel range correctly. --- astroscrappy/astroscrappy.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/astroscrappy/astroscrappy.pyx b/astroscrappy/astroscrappy.pyx index b4f4214..02e1c32 100644 --- a/astroscrappy/astroscrappy.pyx +++ b/astroscrappy/astroscrappy.pyx @@ -528,11 +528,11 @@ cdef void clean_medmask(float[:, ::1] cleanarr, bool[:, ::1] crmask, for l in range(-2, 3): j1 = j + l if (j1 < 0): j1 = 0 - elif (j1 > ny): j1 = ny + elif (j1 >= ny): j1 = ny-1 for k in range(-2, 3): i1 = i + k if (i1 < 0): i1 = 0 - elif (i1 > nx): i1 = nx + elif (i1 >= nx): i1 = nx-1 badpixel = crmask[j1, i1] badpixel = badpixel or mask[j1, i1] if not badpixel: