Open TheMapSmith opened 1 year ago
To apply a Gaussian blur to each of the previously generated rasters, you can use the scipy.ndimage package to apply the Gaussian filter. Here's a Python script that processes each raster in a folder and applies a Gaussian blur to it:
import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter
def apply_gaussian_blur(input_file, output_file, sigma):
print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
src_data = src_band.ReadAsArray()
# Apply Gaussian blur
blurred_data = gaussian_filter(src_data, sigma=sigma)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
# Write blurred data to output raster
out_band = out_ds.GetRasterBand(1)
out_band.WriteArray(blurred_data)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Gaussian blur applied and saved to {output_file}.")
input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1 # Adjust the sigma value as needed
# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))
for input_file in input_files:
file_name = os.path.basename(input_file)
output_file = os.path.join(output_folder, f"blurred_{file_name}")
apply_gaussian_blur(input_file, output_file, sigma)
This script reads each raster file in the input folder, applies a Gaussian blur with the specified sigma value, and saves the output to the specified output folder with the prefix "blurred_". Make sure to replace the input and output folder paths with the appropriate paths on your system and adjust the sigma value as needed.
Please let me know if this works for you or if you have any other questions.
this returns an error Unable to allocate 47.7 GiB for an array with shape (141120, 362880) and data type uint8
Given the size of the input raster, it seems that the memory allocation issue is occurring when reading the entire raster into a NumPy array. In this case, we can process the raster in smaller blocks to avoid memory issues.
Here's the updated apply_gaussian_blur function that processes the raster in blocks:
import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter
def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
out_band = out_ds.GetRasterBand(1)
# Process the raster in blocks
for i in range(0, src_ds.RasterYSize, block_size):
for j in range(0, src_ds.RasterXSize, block_size):
# Read input block
src_data = src_band.ReadAsArray(j, i, block_size, block_size)
# Apply Gaussian blur
blurred_data = gaussian_filter(src_data, sigma=sigma)
# Write blurred data to output raster
out_band.WriteArray(blurred_data, j, i)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Gaussian blur applied and saved to {output_file}.")
input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1 # Adjust the sigma value as needed
block_size = 1024 # Adjust the block size as needed
# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))
for input_file in input_files:
file_name = os.path.basename(input_file)
output_file = os.path.join(output_folder, f"blurred_{file_name}")
apply_gaussian_blur(input_file, output_file, sigma, block_size)
This updated version reads and processes the raster in smaller blocks (default size is 1024x1024) to reduce memory consumption. You can adjust the block_size parameter depending on your system's memory capacity.
Please let me know if this works for you or if you have any other questions.
I apologize for the confusion. The error occurs because gaussian_filter expects a 2D array as input, but when reading the edge blocks, the array dimensions might be smaller. We can pad the edge blocks with zeros to ensure a consistent 2D input for the Gaussian filter. Here's the updated apply_gaussian_blur function:
import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter
def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
out_band = out_ds.GetRasterBand(1)
# Process the raster in blocks
for i in range(0, src_ds.RasterYSize, block_size):
for j in range(0, src_ds.RasterXSize, block_size):
# Read input block
src_data = src_band.ReadAsArray(j, i, block_size, block_size)
# Pad the edge blocks with zeros
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')
# Apply Gaussian blur
blurred_data = gaussian_filter(padded_data, sigma=sigma)
# Write blurred data to output raster
out_band.WriteArray(blurred_data[:src_data.shape[0], :src_data.shape[1]], j, i)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Gaussian blur applied and saved to {output_file}.")
input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1 # Adjust the sigma value as needed
block_size = 1024 # Adjust the block size as needed
# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))
for input_file in input_files:
file_name = os.path.basename(input_file)
output_file = os.path.join(output_folder, f"blurred_{file_name}")
apply_gaussian_blur(input_file, output_file, sigma, block_size)
This version pads the edge blocks with zeros before applying the Gaussian filter and then writes only the relevant portion of the blurred data back to the output raster. This should resolve the error.
Please let me know if this works for you or if you have any other questions.
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')
AttributeError: 'NoneType' object has no attribute 'shape'
I apologize for the oversight. It seems that the ReadAsArray function returns None when reading an empty block. We can add a check for this case and skip processing the block if the data is None. Here's the updated apply_gaussian_blur function:
import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import gaussian_filter
def apply_gaussian_blur(input_file, output_file, sigma, block_size=1024):
print(f"Applying Gaussian blur to {input_file} with sigma = {sigma}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
out_band = out_ds.GetRasterBand(1)
# Process the raster in blocks
for i in range(0, src_ds.RasterYSize, block_size):
for j in range(0, src_ds.RasterXSize, block_size):
# Read input block
src_data = src_band.ReadAsArray(j, i, block_size, block_size)
if src_data is not None:
# Pad the edge blocks with zeros
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')
# Apply Gaussian blur
blurred_data = gaussian_filter(padded_data, sigma=sigma)
# Write blurred data to output raster
out_band.WriteArray(blurred_data[:src_data.shape[0], :src_data.shape[1]], j, i)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Gaussian blur applied and saved to {output_file}.")
input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
sigma = 1 # Adjust the sigma value as needed
block_size = 1024 # Adjust the block size as needed
# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))
for input_file in input_files:
file_name = os.path.basename(input_file)
output_file = os.path.join(output_folder, f"blurred_{file_name}")
apply_gaussian_blur(input_file, output_file, sigma, block_size)
This version checks if src_data is None before processing the block. If it's None, the block will be skipped.
Please let me know if this works for you or if you have any other questions.
via #13, let's try the blur raster function again, but with a mean filter instead
Sure, we can replace the Gaussian blur with a mean filter. Here's the updated apply_mean_filter function using a mean filter from the scipy.ndimage module:
import os
import glob
from osgeo import gdal
import numpy as np
from scipy.ndimage import uniform_filter
def apply_mean_filter(input_file, output_file, filter_size, block_size=1024):
print(f"Applying mean filter to {input_file} with filter size = {filter_size}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
out_band = out_ds.GetRasterBand(1)
# Process the raster in blocks
for i in range(0, src_ds.RasterYSize, block_size):
for j in range(0, src_ds.RasterXSize, block_size):
# Read input block
src_data = src_band.ReadAsArray(j, i, block_size, block_size)
if src_data is not None:
# Pad the edge blocks with zeros
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')
# Apply mean filter
mean_filtered_data = uniform_filter(padded_data, size=filter_size)
# Write mean filtered data to output raster
out_band.WriteArray(mean_filtered_data[:src_data.shape[0], :src_data.shape[1]], j, i)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Mean filter applied and saved to {output_file}.")
input_folder = "path/to/your/input/folder"
output_folder = "path/to/your/output/folder"
filter_size = 5 # Adjust the filter size as needed
block_size = 1024 # Adjust the block size as needed
# Iterate through all TIFF files in the input folder
input_files = glob.glob(os.path.join(input_folder, "*.tif"))
for input_file in input_files:
file_name = os.path.basename(input_file)
output_file = os.path.join(output_folder, f"mean_filtered_{file_name}")
apply_mean_filter(input_file, output_file, filter_size, block_size)
In this version, I replaced the Gaussian filter with a uniform (mean) filter using uniform_filter from scipy.ndimage. You can adjust the filter_size parameter to control the size of the filter kernel.
Let me know if this works for you or if you have any other questions.
ERROR 5: output_folder/blur-pre\113_global.tif, band 1: Access window out of range in RasterIO(). Requested
It appears that the error is related to accessing a window outside the valid range of the raster. This can happen if the block_size is larger than the raster dimensions or if there is a mistake in the loop indices. To fix this issue, let's modify the apply_mean_filter function to calculate the block size correctly for the edge blocks.
Here's the updated function:
def apply_mean_filter(input_file, output_file, filter_size, block_size=1024):
print(f"Applying mean filter to {input_file} with filter size = {filter_size}...")
# Open input raster
src_ds = gdal.Open(input_file)
src_band = src_ds.GetRasterBand(1)
# Create output raster
driver = gdal.GetDriverByName("GTiff")
out_ds = driver.Create(output_file, src_ds.RasterXSize, src_ds.RasterYSize, 1, src_band.DataType)
out_ds.SetGeoTransform(src_ds.GetGeoTransform())
out_ds.SetProjection(src_ds.GetProjection())
out_band = out_ds.GetRasterBand(1)
# Process the raster in blocks
for i in range(0, src_ds.RasterYSize, block_size):
for j in range(0, src_ds.RasterXSize, block_size):
# Calculate actual block size for edge blocks
actual_block_size_x = min(block_size, src_ds.RasterXSize - j)
actual_block_size_y = min(block_size, src_ds.RasterYSize - i)
# Read input block
src_data = src_band.ReadAsArray(j, i, actual_block_size_x, actual_block_size_y)
if src_data is not None:
# Pad the edge blocks with zeros
padded_data = np.pad(src_data, ((0, block_size - src_data.shape[0]), (0, block_size - src_data.shape[1])), mode='constant')
# Apply mean filter
mean_filtered_data = uniform_filter(padded_data, size=filter_size)
# Write mean filtered data to output raster
out_band.WriteArray(mean_filtered_data[:src_data.shape[0], :src_data.shape[1]], j, i)
out_band.FlushCache()
# Close datasets
out_ds = None
src_ds = None
print(f"Mean filter applied and saved to {output_file}.")
I've added two lines to calculate the actual block size for edge blocks, making sure we don't read beyond the raster dimensions. This should fix the "Access window out of range" error. Please try running the script again with these modifications and let me know if it works for you.
it's working! Now i'm looking ahead to the next step. I'd like to apply a gaussian blur to each raster that was previously generated.