Source code for sicor.Tools.inpaint

import numpy as np
import warnings
import logging
from time import time
from itertools import product
from numba import jit


# noinspection PyShadowingNames

[docs]
class SL(object):
    def __init__(self, mx, extend=1.1):
        """ returns slices for given center Y and radius D but makes sure borders are within [0,mx]

        :param mx: maximum allowed index
        :param extend: multiply D by extend to over- oder under-sample the slice
        :return: function like object, to be called like f((Y,D))
        """
        self.mx = mx
        self.extend = extend

    def __call__(self, yd):
        """ to be called like (yy,dd)

        :param yd: tuple,list with two elements (yy,dd)
        :return: slice
        """
        yy, dd = yd
        i0 = int(yy - self.extend * dd)
        i1 = int(yy + self.extend * dd)
        if i0 < 0:
            i0 = 0
        if i1 > self.mx:
            i1 = self.mx
        return slice(i0, i1)



# noinspection PyShadowingNames

[docs]
@jit(nopython=True)
def fill_nan(arr):
    """
    :param arr: 2D numpy array, NaN's are filled by brooming
    """
    for ix in range(arr.shape[0]):
        bf = np.nan
        for iy in range(arr.shape[1]):
            if np.isfinite(arr[ix, iy]):
                bf = arr[ix, iy]
            if np.isfinite(bf) and np.isnan(arr[ix, iy]):
                arr[ix, iy] = bf

        bf = np.nan
        for iy in range(arr.shape[1] - 1, -1, -1):
            if np.isfinite(arr[ix, iy]):
                bf = arr[ix, iy]
            if np.isfinite(bf) and np.isnan(arr[ix, iy]):
                arr[ix, iy] = bf

    for iy in range(arr.shape[1]):
        bf = np.nan
        for ix in range(arr.shape[0]):
            if np.isfinite(arr[ix, iy]):
                bf = arr[ix, iy]
            if np.isfinite(bf) and np.isnan(arr[ix, iy]):
                arr[ix, iy] = bf

        bf = np.nan
        for ix in range(arr.shape[0] - 1, -1, -1):
            if np.isfinite(arr[ix, iy]):
                bf = arr[ix, iy]
            if np.isfinite(bf) and np.isnan(arr[ix, iy]):
                arr[ix, iy] = bf



# noinspection PyShadowingNames

[docs]
@jit(nopython=True)
def nanmean_filter(inp, dd):
    """ Filter input array with mean.

    :param inp: 2D array
    :param dd: radius of mean
    :return: filtered array
    """
    bb = np.empty(inp.shape)
    bb[:, :] = np.NaN

    ni, nj = inp.shape

    for ii in range(ni):
        for jj in range(nj):
            if np.isfinite(inp[ii, jj]):

                i1 = ii - dd
                if i1 < 0:
                    i1 = 0

                i2 = ii + dd
                if i2 >= ni:
                    i2 = ni

                j1 = jj - dd
                if j1 < 0:
                    j1 = 0

                j2 = jj + dd
                if j2 >= nj:
                    j2 = nj

                bf = 0.0
                nn = 0.0
                for kk in range(i1, i2):
                    for ll in range(j1, j2):
                        if np.isfinite(inp[kk, ll]):
                            bf += inp[kk, ll]
                            nn += 1

                if nn > 0:
                    bb[ii, jj] = bf / nn
                elif nn < ni * nj * 0.6:
                    bb[ii, jj] = np.NaN
                else:
                    bb[ii, jj] = np.NaN

    return bb




[docs]
def nangaussian_filter(uu, sigma):
    from scipy.ndimage import gaussian_filter  # import here to avoid static TLS ImportError

    vv = uu.copy()
    vv[uu != uu] = 0
    vv_g = gaussian_filter(vv, sigma=sigma)

    ww = 0 * uu.copy() + 1
    ww[uu != uu] = 0
    ww_g = gaussian_filter(ww, sigma=sigma)

    zz = vv_g / ww_g
    zz[np.isnan(uu)] = np.NaN
    return zz



# noinspection PyShadowingNames

[docs]
def itt2d(shape, dd, extend=1.00):
    """Separate 2d array into 2D blocks based in iterator returning slice tuple

    returns an iterator with ((slice,slice),...,(slice,slice)) which separates an 2D array with [shape]
    into dd^2 smaller blocks. Blocks can over- and under-sampled using the extend factor

    :param shape: shape of 2D array
    :param dd: number of desired blocks in each axis
    :param extend: over-sampling factor
    :return: iterator with ((slice,slice),...,(slice,slice))
    """

    slx = SL(shape[0], extend=extend)
    sly = SL(shape[1], extend=extend)

    vv = np.linspace(0, shape[0], dd + 1)
    vx = np.array(0.5 * (vv[:-1] + vv[1:]), dtype=int)
    dx = np.array(0.5 * (vv[1:] - vv[:-1]), dtype=int)
    vv = np.linspace(0, shape[1], dd + 1)
    vy = np.array(0.5 * (vv[:-1] + vv[1:]), dtype=int)
    dy = np.array(0.5 * (vv[1:] - vv[:-1]), dtype=int)

    return product(map(slx, zip(vx, dx)), map(sly, zip(vy, dy)))



# noinspection PyShadowingNames

[docs]
def inpaint(array, sigma=None, logger=None, update_in_place=False, fill_corners=True, fill_remaining=True,
            extend=1.15, max_allowed_good_points=50000, mask=None, max_interpolation_points=1500000,
            interpolation_method="griddata", post_processing=None):
    """
    Fill nan's in array with meaningful data.
    :param array: array to be inpainted
    :param sigma: if not None, the final result will be smoothed by a gaussian kernel with [sigma]
    :param logger: logger for logging
    :param update_in_place: if True, perform inpainting directly on array, if False, return result
    :param fill_corners: True / False, assign values to corner blocks to increase number of pixels which can be
    interpolated wel
    :param fill_remaining: True / False, if True, all remaining nan's after interpolation are replaced with nearest
    neighbour median value, if set, no nan's remain in the image, is so, a ValueError is raised
    :param extend: oversampling parameter for block splitting of array, should be chosen between 0.8 and 1.2
    :param max_allowed_good_points: max input points for griddata, determines performance and number of block splits
    :param max_interpolation_points: either None or integer, max number of points which are interpolated by griddata
    if too much, might eat up all memory
    :param mask, either None or boolean array of shape array, only those nan's in array where mask is True will
    filled with meaningful data
    :param interpolation_method, None,"griddata","spline"
    :param post_processing: None,"spline","gaussian_filter"
    :return: array filled with meaningful data if update_in_place=False, otherwise None
    """
    # import here to avoid static TLS ImportError
    from scipy.interpolate import bisplrep, bisplev
    from scipy.interpolate import griddata
    from scipy.ndimage import gaussian_filter

    t0 = time()
    logger = logger or logging.getLogger(__name__)

    # arr is the working array, either get copy or use the given one
    if update_in_place is True:
        arr = array
    else:
        logger.info("Create new array of shape: %s" % str(array.shape))
        arr = np.copy(array)

    shape = arr.shape

    if np.sum(np.isnan(arr)) == np.prod(shape):
        logger.info("Array contains no valid points -> do nothing.")
        if update_in_place is False:
            return arr

    # meshgrid to get array indices right for
    yv, xv = np.meshgrid(np.arange(arr.shape[1], dtype=np.int16),  # this way memory is saved
                         np.arange(arr.shape[0], dtype=np.int16), copy=False)

    good_results = np.isfinite(arr)  # usable points for interpolation
    bad_results = np.isnan(arr) if mask is None else np.logical_and(np.isnan(arr), mask)

    logger.info("bad/total ratio: %.5f" % (float(np.sum(bad_results) / np.prod(shape))))

    # flat index tables for good and bad data
    xvb = xv[bad_results]
    yvb = yv[bad_results]
    """
    griddata should not get too many input points, otherwise the memory use and performance
    will be horrible, the main idea behind the following mess is to intelligently reduce the number
    of input points while maintaining good performance, the algorithm starts to 2d split the working array
    until the max number of good values over all splits is lower then max_allowed_good_points
    """
    dd_split = 1  # initialize number of splits
    if interpolation_method is not None:
        for ii in range(1, np.min(shape)):
            max_good_points = np.max([np.sum(good_results[sx, sy]) for sx, sy in itt2d(shape, ii)])
            logger.info((ii, max_good_points))
            if max_good_points < max_allowed_good_points:
                dd_split = ii
                break
        logger.info("Splitting array into %i^2 parts." % dd_split)
    """
    now loop over all 2d segments and interpolate using griddata
    """
    for sx, sy in itt2d(good_results.shape, dd_split, extend=extend):
        xvg = xv[sx, sy][good_results[sx, sy]]  # good x indices for block
        yvg = yv[sx, sy][good_results[sx, sy]]  # good y indices for block
        values = arr[xvg, yvg]  # construct input fields for griddata

        if interpolation_method is None:
            pass
        elif interpolation_method == "splines":
            tck = bisplrep(x=xvg, y=yvg, z=values, kx=5, ky=5)
            xmm = np.min(xvg), np.max(xvg)
            ymm = np.min(yvg), np.max(yvg)
            rr = bisplev(x=np.arange(xmm[0], xmm[1]),
                         y=np.arange(ymm[0], ymm[1]),
                         tck=tck)
            arr[xmm[0]:xmm[1], ymm[0]:ymm[1]] = rr

        elif interpolation_method == "griddata":
            if len(values) < 1:
                pass
            else:
                points = np.squeeze(np.dstack((xvg, yvg)))  # construct input fields for griddata
                """
                attempt for interpolation should only be for bad values which lie in the current block
                """
                bb = ((np.min(xvg) < xvb) * (xvb < np.max(xvg)) * (np.min(yvg) < yvb) * (yvb < np.max(yvg)))
                xi = np.squeeze(np.dstack((xvb[bb], yvb[bb])))
                """
                attempt interpolation for block only if some bad values lie in this block
                """
                if len(xi) > 0:
                    """
                    if within a block the corners are not filled with data, then fewer points can be interpolated
                    since for each interpolation point, sufficiently neighbours need to be found, if fill_corners is
                    True we check the corners for nans, if they are nan we search the increasing 2d area for valid
                    points until we found something which is not nan -> the corner values are then replaced with the
                    nanmedian of the actual search window
                    """
                    if fill_corners is True:
                        sub_shape = arr[sx, sy].shape
                        nx, ny = sub_shape
                        points_corners = []
                        values_corners = []
                        # suppress warnings from nanmedian if only nan's are there, which is ok here
                        with warnings.catch_warnings():
                            warnings.simplefilter("ignore")

                            box_x = [np.min(points[:, 0]), np.max(points[:, 0])]
                            box_y = [np.min(points[:, 1]), np.max(points[:, 1])]
                            for ix, iy in product(box_x, box_y):
                                aa = arr[ix, iy]

                                if np.isnan(aa):  # only if corner is empty
                                    for jj in range(2, np.min(sub_shape),
                                                    2):  # incease search window fast to find stuff
                                        rbf = arr[sx, sy][np.max([ix - jj, 0]):np.min([ix + jj, nx]),
                                                          np.max([iy - jj, 0]):np.min([iy + jj, ny])]
                                        rpl = np.nanmean(rbf)
                                        if np.isfinite(rpl):
                                            points_corners.append([ix, iy])
                                            values_corners.append(rpl)
                                            break  # we found something -> stop increasing search block

                        if len(values_corners) > 0:  # if we found new values->modify griddata input values accordingly
                            logger.info("Adding corners for %s,%s, pt: %s, values: %s" %
                                        (sx, sy, points_corners, values_corners))
                            points = np.vstack((points, points_corners))
                            values = np.hstack((values, values_corners))
                            # not only inputs, also new bad values which lie in the new hull should be added
                            xvg = xv[sx, sy]
                            yvg = yv[sx, sy]
                            bb = ((np.min(xvg) < xvb) * (xvb < np.max(xvg)) * (np.min(yvg) < yvb) * (yvb < np.max(yvg)))
                            xi = np.squeeze(np.dstack((xvb[bb], yvb[bb])))

                    # finally, interpolate stuff

                    if max_interpolation_points is None:
                        grd = griddata(points=points, values=values, xi=xi, fill_value=np.nan, method="linear")
                        good_grd = np.isfinite(grd)  # points for which interpolation succeeded
                        arr[xvb[bb][good_grd], yvb[bb][good_grd]] = grd[good_grd]  # copy interpolated results
                    else:
                        nn = xi.shape[0]
                        max_interpolation_points = 1500000
                        nn_blocks = int(np.ceil(nn / max_interpolation_points))
                        ss = np.linspace(0, nn, nn_blocks, dtype=int, endpoint=True)
                        for ii, (j1, j2) in enumerate(zip(ss[:-1], ss[1:])):
                            logger.info("%i / %i ,%i,%i" % (ii, nn_blocks, j1, j2))
                            grd = griddata(points=points, values=values, xi=xi[j1:j2], fill_value=np.nan,
                                           method="linear")
                            good_grd = np.isfinite(grd)  # points for which interpolation succeeded
                            arr[xvb[bb][j1:j2][good_grd], yvb[bb][j1:j2][good_grd]] = grd[good_grd]  # copy results
        else:
            raise ValueError("interpolation_method:%s not implemented" % interpolation_method)

    # count still remaining bad points
    bad_results = np.isnan(arr) if mask is None else np.logical_and(np.isnan(arr), mask)
    logger.info("bad/total ratio: %.5f" % (float(np.sum(bad_results) / np.prod(shape))))

    if fill_remaining == "nearest_neighbour":
        """
        remaining nan's are filled by searching for every point the neighbourhood vor valid points and
        replace the actual nan with the median over the search window, this is slow
        """
        nx, ny = shape
        # suppress warnings from nanmedian if only nan's are there, which is ok here
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            for ix, iy in zip(xv[bad_results], yv[bad_results]):
                for jj in range(2, np.min([nx, ny]), 2):
                    rbf = arr[np.max([ix - jj, 0]):np.min([ix + jj, nx]),
                              np.max([iy - jj, 0]):np.min([iy + jj, ny])]
                    rpl = np.nanmedian(rbf)
                    if np.isfinite(rpl):
                        arr[ix, iy] = rpl
                        break  # we found something -> stop increasing search block
    elif fill_remaining == "broom":
        fill_nan(arr)
    elif fill_remaining == "median":
        bf = np.nanmean(arr)
        bad_results = np.isnan(arr) if mask is None else np.logical_and(np.isnan(arr), mask)
        arr[bad_results] = bf
        print(bf)
    elif fill_remaining is None:
        pass
    else:
        raise ValueError("fill_remaining=%s not implemented" % str(fill_remaining))

    if fill_remaining is not None:
        bad_results = np.isnan(arr) if mask is None else np.logical_and(np.isnan(arr), mask)
        n_bad = np.sum(bad_results)
        logger.info("bad/total ratio: %.5f" % (float(n_bad / np.prod(shape))))
        if n_bad > 0:
            print(arr[bad_results == np.False_])
            if np.sum(np.isnan(arr)) == np.prod(arr.shape):
                logger.info("Array contains no valid points.")
            else:
                raise ValueError("Field still contains bad %i results." % n_bad)

    if post_processing is None:
        pass
    elif post_processing == "gaussian_filter":
        logger.info("Post processing using gaussian filter.")
        if sigma is not None:
            """
            is sigma is not none, we can apply some gaussian smoothing to make things more smoth
            """
            arr[:] = gaussian_filter(input=np.array(arr, dtype=np.float32), sigma=sigma)
        else:
            raise ValueError("When using gaussian_filter, sigma should be supplied")
    elif post_processing == "spline":
        logger.info("Post processing using splines.")
        idx_val = np.logical_and(mask == np.True_, np.isfinite(arr))  # need '==' here
        idx_sel = np.random.choice(np.arange(0, np.sum(idx_val)),
                                   np.min([max_allowed_good_points, np.sum(idx_val)]), replace=False)
        tck = bisplrep(x=xv[idx_val][idx_sel],
                       y=yv[idx_val][idx_sel],
                       z=arr[idx_val][idx_sel], kx=5, ky=5)
        arr[:] = bisplev(x=np.arange(0, arr.shape[0]), y=np.arange(0, arr.shape[0]), tck=tck)
    else:
        raise ValueError("post_processing=%s not implemented" % post_processing)

    if mask is not None:
        logger.info("Apply mask.")
        arr[mask == np.False_] = np.nan  # need '==' here !

    logger.info("Inpainting runtime: %.2fs" % (time() - t0))
    if update_in_place is False:
        return arr



if __name__ == "__main__":

    shape = (10980, 10980)
    # shape = (1800,1800)
    s2aot = np.zeros(shape, dtype=np.float32)
    s2aot[:, :] = np.NAN

    for ii in range(600000):
        i1, i2 = np.random.randint(shape[0]), np.random.randint(shape[1])
        s2aot[i1, i2] = 0.5 + 0.1 * np.sin(i1 / 1500) + 0.1 * np.cos(i2 / 150)

    s2aot[s2aot < 0.0] = np.nan

    mask = None
    # mask = np.empty(shape,dtype=bool)
    # mask[:] = False
    # mask[500:9000,1000:6000] = True

    # imshow(s2aot[::10,::10],vmin=np.nanmin(s2aot),vmax=np.nanmax(s2aot))
    # colorbar()

    arr = inpaint(s2aot, update_in_place=False,
                  interpolation_method=["splines", "griddata", None][1],
                  post_processing=["spline", None, "gaussian_filter"][1],
                  fill_remaining=["broom", None][0],
                  sigma=10,
                  extend=1.01,
                  max_allowed_good_points=80000,
                  max_interpolation_points=2500000,
                  mask=mask)