TheMapSmith / landcover-proc

MIT License
1 stars 0 forks source link

blur raster #12

Open TheMapSmith opened 1 year ago

TheMapSmith commented 1 year ago

it's working! Now i'm looking ahead to the next step. I'd like to apply a gaussian blur to each raster that was previously generated.

TheMapSmith commented 1 year ago

To apply a Gaussian blur to each of the previously generated rasters, you can use the scipy.ndimage package to apply the Gaussian filter. Here's a Python script that processes each raster in a folder and applies a Gaussian blur to it:

import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter

def apply_gaussian_blur(input_file, output_file, sigma):
    print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)
    src_data = src_band.ReadAsArray()

    # Apply Gaussian blur
    blurred_data = gaussian_filter(src_data, sigma=sigma)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())

    # Write blurred data to output raster
    out_band = out_ds.GetRasterBand(1)
    out_band.WriteArray(blurred_data)
    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Gaussian blur applied and saved to {output_file}.")

input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1  # Adjust the sigma value as needed

# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))

for input_file in input_files:
    file_name = os.path.basename(input_file)
    output_file = os.path.join(output_folder, f"blurred_{file_name}")
    apply_gaussian_blur(input_file, output_file, sigma)

This script reads each raster file in the input folder, applies a Gaussian blur with the specified sigma value, and saves the output to the specified output folder with the prefix "blurred_". Make sure to replace the input and output folder paths with the appropriate paths on your system and adjust the sigma value as needed.

Please let me know if this works for you or if you have any other questions.

TheMapSmith commented 1 year ago

this returns an error Unable to allocate 47.7 GiB for an array with shape (141120, 362880) and data type uint8

TheMapSmith commented 1 year ago

Given the size of the input raster, it seems that the memory allocation issue is occurring when reading the entire raster into a NumPy array. In this case, we can process the raster in smaller blocks to avoid memory issues.

Here's the updated apply_gaussian_blur function that processes the raster in blocks:

import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter

def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
    print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())
    out_band = out_ds.GetRasterBand(1)

    # Process the raster in blocks
    for i in range(0, src_ds.RasterYSize, block_size):
        for j in range(0, src_ds.RasterXSize, block_size):
            # Read input block
            src_data = src_band.ReadAsArray(j, i, block_size, block_size)

            # Apply Gaussian blur
            blurred_data = gaussian_filter(src_data, sigma=sigma)

            # Write blurred data to output raster
            out_band.WriteArray(blurred_data, j, i)

    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Gaussian blur applied and saved to {output_file}.")

input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1  # Adjust the sigma value as needed
block_size = 1024  # Adjust the block size as needed

# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))

for input_file in input_files:
    file_name = os.path.basename(input_file)
    output_file = os.path.join(output_folder, f"blurred_{file_name}")
    apply_gaussian_blur(input_file, output_file, sigma, block_size)

This updated version reads and processes the raster in smaller blocks (default size is 1024x1024) to reduce memory consumption. You can adjust the block_size parameter depending on your system's memory capacity.

Please let me know if this works for you or if you have any other questions.

TheMapSmith commented 1 year ago

I apologize for the confusion. The error occurs because gaussian_filter expects a 2D array as input, but when reading the edge blocks, the array dimensions might be smaller. We can pad the edge blocks with zeros to ensure a consistent 2D input for the Gaussian filter. Here's the updated apply_gaussian_blur function:

import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter

def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
    print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())
    out_band = out_ds.GetRasterBand(1)

    # Process the raster in blocks
    for i in range(0, src_ds.RasterYSize, block_size):
        for j in range(0, src_ds.RasterXSize, block_size):
            # Read input block
            src_data = src_band.ReadAsArray(j, i, block_size, block_size)

            # Pad the edge blocks with zeros
            padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')

            # Apply Gaussian blur
            blurred_data = gaussian_filter(padded_data, sigma=sigma)

            # Write blurred data to output raster
            out_band.WriteArray(blurred_data[:src_data.shape[0], :src_data.shape[1]], j, i)

    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Gaussian blur applied and saved to {output_file}.")

input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1  # Adjust the sigma value as needed
block_size = 1024  # Adjust the block size as needed

# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))

for input_file in input_files:
    file_name = os.path.basename(input_file)
    output_file = os.path.join(output_folder, f"blurred_{file_name}")
    apply_gaussian_blur(input_file, output_file, sigma, block_size)

This version pads the edge blocks with zeros before applying the Gaussian filter and then writes only the relevant portion of the blurred data back to the output raster. This should resolve the error.

Please let me know if this works for you or if you have any other questions.

TheMapSmith commented 1 year ago
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')

AttributeError: 'NoneType' object has no attribute 'shape'

TheMapSmith commented 1 year ago

I apologize for the oversight. It seems that the ReadAsArray function returns None when reading an empty block. We can add a check for this case and skip processing the block if the data is None. Here's the updated apply_gaussian_blur function:

import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter

def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
    print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())
    out_band = out_ds.GetRasterBand(1)

    # Process the raster in blocks
    for i in range(0, src_ds.RasterYSize, block_size):
        for j in range(0, src_ds.RasterXSize, block_size):
            # Read input block
            src_data = src_band.ReadAsArray(j, i, block_size, block_size)

            if src_data is not None:
                # Pad the edge blocks with zeros
                padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')

                # Apply Gaussian blur
                blurred_data = gaussian_filter(padded_data, sigma=sigma)

                # Write blurred data to output raster
                out_band.WriteArray(blurred_data[:src_data.shape[0], :src_data.shape[1]], j, i)

    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Gaussian blur applied and saved to {output_file}.")

input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1  # Adjust the sigma value as needed
block_size = 1024  # Adjust the block size as needed

# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))

for input_file in input_files:
    file_name = os.path.basename(input_file)
    output_file = os.path.join(output_folder, f"blurred_{file_name}")
    apply_gaussian_blur(input_file, output_file, sigma, block_size)

This version checks if src_data is None before processing the block. If it's None, the block will be skipped.

Please let me know if this works for you or if you have any other questions.

TheMapSmith commented 1 year ago

via #13, let's try the blur raster function again, but with a mean filter instead

TheMapSmith commented 1 year ago

Sure, we can replace the Gaussian blur with a mean filter. Here's the updated apply_mean_filter function using a mean filter from the scipy.ndimage module:

import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import uniform_filter

def apply_mean_filter(input_file, output_file, filter_size, block_size=1024):
    print(f"Applying mean filter to {input_file} with filter size = {filter_size}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())
    out_band = out_ds.GetRasterBand(1)

    # Process the raster in blocks
    for i in range(0, src_ds.RasterYSize, block_size):
        for j in range(0, src_ds.RasterXSize, block_size):
            # Read input block
            src_data = src_band.ReadAsArray(j, i, block_size, block_size)

            if src_data is not None:
                # Pad the edge blocks with zeros
                padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')

                # Apply mean filter
                mean_filtered_data = uniform_filter(padded_data, size=filter_size)

                # Write mean filtered data to output raster
                out_band.WriteArray(mean_filtered_data[:src_data.shape[0], :src_data.shape[1]], j, i)

    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Mean filter applied and saved to {output_file}.")

input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
filter_size = 5  # Adjust the filter size as needed
block_size = 1024  # Adjust the block size as needed

# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))

for input_file in input_files:
    file_name = os.path.basename(input_file)
    output_file = os.path.join(output_folder, f"mean_filtered_{file_name}")
    apply_mean_filter(input_file, output_file, filter_size, block_size)

In this version, I replaced the Gaussian filter with a uniform (mean) filter using uniform_filter from scipy.ndimage. You can adjust the filter_size parameter to control the size of the filter kernel.

Let me know if this works for you or if you have any other questions.

TheMapSmith commented 1 year ago

ERROR 5: output_folder/blur-pre\113_global.tif, band 1: Access window out of range in RasterIO(). Requested

TheMapSmith commented 1 year ago

It appears that the error is related to accessing a window outside the valid range of the raster. This can happen if the block_size is larger than the raster dimensions or if there is a mistake in the loop indices. To fix this issue, let's modify the apply_mean_filter function to calculate the block size correctly for the edge blocks.

Here's the updated function:

def apply_mean_filter(input_file, output_file, filter_size, block_size=1024):
    print(f"Applying mean filter to {input_file} with filter size = {filter_size}...")

    # Open input raster
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)

    # Create output raster
    driver = gdal.GetDriverByName("GTiff")
    out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
    out_ds.SetGeoTransform(src_ds.GetGeoTransform())
    out_ds.SetProjection(src_ds.GetProjection())
    out_band = out_ds.GetRasterBand(1)

    # Process the raster in blocks
    for i in range(0, src_ds.RasterYSize, block_size):
        for j in range(0, src_ds.RasterXSize, block_size):
            # Calculate actual block size for edge blocks
            actual_block_size_x = min(block_size, src_ds.RasterXSize - j)
            actual_block_size_y = min(block_size, src_ds.RasterYSize - i)

            # Read input block
            src_data = src_band.ReadAsArray(j, i, actual_block_size_x, actual_block_size_y)

            if src_data is not None:
                # Pad the edge blocks with zeros
                padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')

                # Apply mean filter
                mean_filtered_data = uniform_filter(padded_data, size=filter_size)

                # Write mean filtered data to output raster
                out_band.WriteArray(mean_filtered_data[:src_data.shape[0], :src_data.shape[1]], j, i)

    out_band.FlushCache()

    # Close datasets
    out_ds = None
    src_ds = None

    print(f"Mean filter applied and saved to {output_file}.")

I've added two lines to calculate the actual block size for edge blocks, making sure we don't read beyond the raster dimensions. This should fix the "Access window out of range" error. Please try running the script again with these modifications and let me know if it works for you.