output files are uniform

TheMapSmith / landcover-proc

MIT License

1 stars 0 forks source link

output files are uniform #9

Closed TheMapSmith closed 1 year ago

TheMapSmith commented 1 year ago

the output are a more reasonable file size now, but they are all a uniform single color raster. the source raster is a classified 8-bit thematic raster. Do you need to run a different function to extract each classification pixels to their own file, with the other areas transparent?

TheMapSmith commented 1 year ago

I apologize for the confusion. To create an output raster with the extracted classification pixels and the other areas as transparent (NoData value), you can modify the separate_rasters_by_color function as follows:

import os
import numpy as np
from osgeo import gdal

def separate_rasters_by_color(input_file, output_folder, color_values, resampling_factor=1):
    print("Separating rasters by color...")

    # Open the input raster file
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)
    x_size, y_size = src_ds.RasterXSize, src_ds.RasterYSize

    # Define resampled dimensions
    x_resampled_size = int(x_size / resampling_factor)
    y_resampled_size = int(y_size / resampling_factor)

    # Define block size and number of blocks
    block_size = 1024
    n_blocks_x = int(np.ceil(x_resampled_size / block_size))
    n_blocks_y = int(np.ceil(y_resampled_size / block_size))

    # Create output rasters
    output_rasters = {}
    for color_value in color_values:
        output_file = os.path.join(output_folder, f"{color_value}_global.tif")
        driver = gdal.GetDriverByName("GTiff")
        dst_ds = driver.Create(output_file, x_resampled_size, y_resampled_size, 1, gdal.GDT_Byte, options=['COMPRESS=LZW'])
        dst_ds.SetGeoTransform(src_ds.GetGeoTransform())
        dst_ds.SetProjection(src_ds.GetProjection())
        output_rasters[color_value] = dst_ds.GetRasterBand(1)
        output_rasters[color_value].SetNoDataValue(0)

    # Loop through the raster in blocks
    for i in range(n_blocks_x):
        for j in range(n_blocks_y):
            x_start = i * block_size * resampling_factor
            y_start = j * block_size * resampling_factor

            x_block_size = min(block_size * resampling_factor, x_size - x_start)
            y_block_size = min(block_size * resampling_factor, y_size - y_start)

            src_array = src_band.ReadAsArray(x_start, y_start, x_block_size, y_block_size)

            if resampling_factor > 1:
                src_array = src_array[::resampling_factor, ::resampling_factor]

            for color_value in color_values:
                mask = np.where(src_array == color_value, color_value, 0).astype(np.uint8)
                output_rasters[color_value].WriteArray(mask, x_start // resampling_factor, y_start // resampling_factor)

    # Clean up
    for color_value in output_rasters:
        output_rasters[color_value] = None

    src_band, src_ds = None, None
    print("Separation of rasters by color complete.")

In this updated version, I've set the NoData value for each output raster band to 0 using the SetNoDataValue(0) method. Additionally, I've changed the mask assignment line to keep the original color value for the extracted classification pixels (mask = np.where(src_array == color_value, color_value, 0).astype(np.uint8)).

Now, the output rasters should have the extracted classification pixels with their original values, and the other areas should be transparent (NoData value).

TheMapSmith commented 1 year ago

there's a problem with the output. the files won't render in ArcGIS Pro for some reason. can you confirm the band separation is working as intended?

TheMapSmith commented 1 year ago

I apologize for the issue you're experiencing. I suspect the problem might be related to the NoData value set for the output rasters. In the previous code, I set the NoData value to 0, which might be causing issues with rendering in ArcGIS Pro. Let's try setting the NoData value to a value outside the range of the classification values, such as 255.

Here's the updated separate_rasters_by_color function with the NoData value set to 255:

import os
import numpy as np
from osgeo import gdal

def separate_rasters_by_color(input_file, output_folder, color_values, resampling_factor=1):
    print("Separating rasters by color...")

    # Open the input raster file
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)
    x_size, y_size = src_ds.RasterXSize, src_ds.RasterYSize

    # Define resampled dimensions
    x_resampled_size = int(x_size / resampling_factor)
    y_resampled_size = int(y_size / resampling_factor)

    # Define block size and number of blocks
    block_size = 1024
    n_blocks_x = int(np.ceil(x_resampled_size / block_size))
    n_blocks_y = int(np.ceil(y_resampled_size / block_size))

    # Create output rasters
    output_rasters = {}
    for color_value in color_values:
        output_file = os.path.join(output_folder, f"{color_value}_global.tif")
        driver = gdal.GetDriverByName("GTiff")
        dst_ds = driver.Create(output_file, x_resampled_size, y_resampled_size, 1, gdal.GDT_Byte, options=['COMPRESS=LZW'])
        dst_ds.SetGeoTransform(src_ds.GetGeoTransform())
        dst_ds.SetProjection(src_ds.GetProjection())
        output_rasters[color_value] = dst_ds.GetRasterBand(1)
        output_rasters[color_value].SetNoDataValue(255)

    # Loop through the raster in blocks
    for i in range(n_blocks_x):
        for j in range(n_blocks_y):
            x_start = i * block_size * resampling_factor
            y_start = j * block_size * resampling_factor

            x_block_size = min(block_size * resampling_factor, x_size - x_start)
            y_block_size = min(block_size * resampling_factor, y_size - y_start)

            src_array = src_band.ReadAsArray(x_start, y_start, x_block_size, y_block_size)

            if resampling_factor > 1:
                src_array = src_array[::resampling_factor, ::resampling_factor]

            for color_value in color_values:
                mask = np.where(src_array == color_value, color_value, 255).astype(np.uint8)
                output_rasters[color_value].WriteArray(mask, x_start // resampling_factor, y_start // resampling_factor)

    # Clean up
    for color_value in output_rasters:
        output_rasters[color_value] = None

    src_band, src_ds = None, None
    print("Separation of rasters by color complete.")

Please try running this updated function and check if the output files render correctly in ArcGIS Pro. If the issue persists, please let me know, and I'll help you troubleshoot further.

TheMapSmith commented 1 year ago

for some reason, nothing is rendering in ArcGIS pro. It won't render as classified, discrete, or stretched. Just nothing shows up. Do you know why?

TheMapSmith commented 1 year ago

I apologize for the inconvenience. It's difficult to pinpoint the exact issue without more information, but one possibility is that the output rasters might not have proper georeferencing or projection information.

To help troubleshoot the issue, let's try the following:

Verify that the output GeoTIFF files have the correct georeferencing and projection information. You can use the gdalinfo command-line utility or a Python script to check the metadata of the output files.
Try visualizing the output rasters in other GIS software such as QGIS or with Python libraries like rasterio or matplotlib to determine if the issue is specific to ArcGIS Pro.
Check if there are any error messages or warnings in ArcGIS Pro related to the rendering issue. These messages can provide more insight into the problem.

If you're still facing issues, please provide more information about the input raster (size, projection, data type, etc.), and I'll help you troubleshoot further.

TheMapSmith commented 1 year ago

here's the goal of the script. Can you tell me if it's doing that?

The input file is a global landcover map that is classified into discrete classes. I'd like the script to output each individual class (such as urban, forest, water, agriculture, etc) to be it's own file.

TheMapSmith commented 1 year ago

Yes, the separate_rasters_by_color function in the script I provided is designed to achieve this goal. The function takes an input file (global landcover map) and separates it into individual rasters based on the specified color values (landcover classes). Each output raster file will have only one class (urban, forest, water, agriculture, etc.) with their original values, while the other areas are set to a NoData value, making them transparent.

However, as you've mentioned that the output rasters are not rendering correctly in ArcGIS Pro, there might be an issue with the way the output files are being created. Please follow the troubleshooting steps mentioned in my previous response to help identify the problem.

If you have more information about the input raster or the issues you're facing in ArcGIS Pro, please provide those details, and I'll help you troubleshoot further.

TheMapSmith commented 1 year ago

would you expect the output rasters to all have the exact same file size?

TheMapSmith commented 1 year ago

While it's possible for the output rasters to have similar file sizes, they don't necessarily have to be the exact same size. The file size can vary depending on the number of pixels for each class, the data type, and the compression method used.

In the provided script, we used LZW compression for the output GeoTIFF files, which is a lossless compression method. This means that the file size can be affected by the spatial distribution and the complexity of the class features. If the classes have a similar number of pixels and spatial distribution, the file sizes might end up being quite similar. However, if the classes have varying distributions or different numbers of pixels, the file sizes may differ.

If you notice that all the output rasters have the exact same file size, it could be an indication that there might be an issue with the processing or the output format. In such a case, please provide more information about the input raster and any issues you're experiencing, and I'll help you troubleshoot further.

TheMapSmith commented 1 year ago

I believe that's the next place to check. Each output file is exactly the same size. What would you do to correct that issue?

TheMapSmith commented 1 year ago

It's unusual for all output rasters to have the exact same file size. To further investigate and address this issue, let's start by checking if the input raster is read correctly and if the output rasters contain the expected data.

First, I suggest visualizing the input raster and a couple of output rasters using a different GIS software (like QGIS) or Python libraries like rasterio or matplotlib. This step will help us ensure that the output rasters contain the correct data for each class.

Next, let's add some debugging information to the separate_rasters_by_color function to get a better understanding of the input raster and the output rasters being created:

import os
import numpy as np
from osgeo import gdal

def separate_rasters_by_color(input_file, output_folder, color_values, resampling_factor=1):
    print("Separating rasters by color...")

    # Open the input raster file
    src_ds = gdal.Open(input_file)
    src_band = src_ds.GetRasterBand(1)
    x_size, y_size = src_ds.RasterXSize, src_ds.RasterYSize
    print(f"Input raster dimensions: {x_size} x {y_size}")

    # Define resampled dimensions
    x_resampled_size = int(x_size / resampling_factor)
    y_resampled_size = int(y_size / resampling_factor)

    # Define block size and number of blocks
    block_size = 1024
    n_blocks_x = int(np.ceil(x_resampled_size / block_size))
    n_blocks_y = int(np.ceil(y_resampled_size / block_size))

    # Create output rasters
    output_rasters = {}
    for color_value in color_values:
        output_file = os.path.join(output_folder, f"{color_value}_global.tif")
        driver = gdal.GetDriverByName("GTiff")
        dst_ds = driver.Create(output_file, x_resampled_size, y_resampled_size, 1, gdal.GDT_Byte, options=['COMPRESS=LZW'])
        dst_ds.SetGeoTransform(src_ds.GetGeoTransform())
        dst_ds.SetProjection(src_ds.GetProjection())
        output_rasters[color_value] = dst_ds.GetRasterBand(1)
        output_rasters[color_value].SetNoDataValue(255)

    # Loop through the raster in blocks
    for i in range(n_blocks_x):
        for j in range(n_blocks_y):
            x_start = i * block_size * resampling_factor
            y_start = j * block_size * resampling_factor

            x_block_size = min(block_size * resampling_factor, x_size - x_start)
            y_block_size = min(block_size * resampling_factor, y_size - y_start)

            src_array = src_band.ReadAsArray(x_start, y_start, x_block_size, y_block_size)

            if resampling_factor > 1:
                src_array = src_array[::resampling_factor, ::resampling_factor]

            for color_value in color_values:
                mask = np.where(src_array == color_value, color_value, 255).astype(np.uint8)
                output_rasters[color_value].WriteArray(mask, x_start // resampling_factor, y_start // resampling_factor)

            print(f"Processed block: {i}, {j}")

    # Clean up
    for color_value in output_rasters:
        output_rasters[color_value] = None

    src_band, src_ds = None, None
    print("Separation of rasters by color complete.")

This updated function will print the input raster dimensions and the progress of processing each block. Run this function again and observe the printed information to ensure that the input

TheMapSmith commented 1 year ago

for some reason when i run the code, some of the print functions do not print print(f"Processed block: {i}, {j}") and print("Separation of rasters by color complete.") don't run.

TheMapSmith commented 1 year ago

It's possible that the script is not reaching these print statements due to an error, or the script is taking too long to process the data, and it hasn't reached those statements yet.

To determine if there is an issue with the script, you can add more print statements at different stages of the processing to see where the problem occurs. You can also wrap the processing code inside a try-except block to catch any errors and print them.

Here's the updated separate_rasters_by_color function with additional print statements and a try-except block:

import os
import numpy as np
from osgeo import gdal

def separate_rasters_by_color(input_file, output_folder, color_values, resampling_factor=1):
    print("Separating rasters by color...")
    try:
        # Open the input raster file
        src_ds = gdal.Open(input_file)
        src_band = src_ds.GetRasterBand(1)
        x_size, y_size = src_ds.RasterXSize, src_ds.RasterYSize
        print(f"Input raster dimensions: {x_size} x {y_size}")

        # Define resampled dimensions
        x_resampled_size = int(x_size / resampling_factor)
        y_resampled_size = int(y_size / resampling_factor)

        # Define block size and number of blocks
        block_size = 1024
        n_blocks_x = int(np.ceil(x_resampled_size / block_size))
        n_blocks_y = int(np.ceil(y_resampled_size / block_size))

        # Create output rasters
        output_rasters = {}
        for color_value in color_values:
            output_file = os.path.join(output_folder, f"{color_value}_global.tif")
            driver = gdal.GetDriverByName("GTiff")
            dst_ds = driver.Create(output_file, x_resampled_size, y_resampled_size, 1, gdal.GDT_Byte, options=['COMPRESS=LZW'])
            dst_ds.SetGeoTransform(src_ds.GetGeoTransform())
            dst_ds.SetProjection(src_ds.GetProjection())
            output_rasters[color_value] = dst_ds.GetRasterBand(1)
            output_rasters[color_value].SetNoDataValue(255)

        # Loop through the raster in blocks
        for i in range(n_blocks_x):
            for j in range(n_blocks_y):
                x_start = i * block_size * resampling_factor
                y_start = j * block_size * resampling_factor

                x_block_size = min(block_size * resampling_factor, x_size - x_start)
                y_block_size = min(block_size * resampling_factor, y_size - y_start)

                src_array = src_band.ReadAsArray(x_start, y_start, x_block_size, y_block_size)

                if resampling_factor > 1:
                    src_array = src_array[::resampling_factor, ::resampling_factor]

                for color_value in color_values:
                    mask = np.where(src_array == color_value, color_value, 255).astype(np.uint8)
                    output_rasters[color_value].WriteArray(mask, x_start // resampling_factor, y_start // resampling_factor)

                print(f"Processed block: {i}, {j}")

        # Clean up
        for color_value in output_rasters:
            output_rasters[color_value] = None

        src_band, src_ds = None, None
        print("Separation of rasters by color complete.")

    except Exception as e:
        print(f"Error occurred during processing: {e}")

Now, run this function again and see if it provides more information about the processing progress or any errors. If you still don't see the expected print statements, it could

TheMapSmith commented 1 year ago

a few things

the file sizes are still identical
only these messages print, nothing else does
- Separating rasters by color...
- Input raster dimensions: 362880 x 141120
i confirmed that the spatial reference appears correct
The raster does not display in QGIS or ArcPro

TheMapSmith commented 1 year ago

Based on your feedback, it appears that the script is not processing the data as intended, and the output files are not suitable for visualization. I suggest the following modifications to the script to address these issues:

Instead of using a mask with 255 as the NoData value, we will use np.nan (Not a Number) to represent NoData values. This will allow GIS software to display the rasters properly. We'll also change the output raster data type to gdal.GDT_Float32 to accommodate the np.nan values. To understand why the loop through the raster blocks is not progressing, add more print statements to the script to check the processing status and ensure that the script is processing the input data correctly. Here's the updated separate_rasters_by_color function with these modifications:

import os
import numpy as np
from osgeo import gdal

def separate_rasters_by_color(input_file, output_folder, color_values, resampling_factor=1):
    print("Separating rasters by color...")
    try:
        # Open the input raster file
        src_ds = gdal.Open(input_file)
        src_band = src_ds.GetRasterBand(1)
        x_size, y_size = src_ds.RasterXSize, src_ds.RasterYSize
        print(f"Input raster dimensions: {x_size} x {y_size}")

        # Define resampled dimensions
        x_resampled_size = int(x_size / resampling_factor)
        y_resampled_size = int(y_size / resampling_factor)

        # Define block size and number of blocks
        block_size = 1024
        n_blocks_x = int(np.ceil(x_resampled_size / block_size))
        n_blocks_y = int(np.ceil(y_resampled_size / block_size))

        # Create output rasters
        output_rasters = {}
        for color_value in color_values:
            output_file = os.path.join(output_folder, f"{color_value}_global.tif")
            driver = gdal.GetDriverByName("GTiff")
            dst_ds = driver.Create(output_file, x_resampled_size, y_resampled_size, 1, gdal.GDT_Float32, options=['COMPRESS=LZW'])
            dst_ds.SetGeoTransform(src_ds.GetGeoTransform())
            dst_ds.SetProjection(src_ds.GetProjection())
            output_rasters[color_value] = dst_ds.GetRasterBand(1)
            output_rasters[color_value].SetNoDataValue(np.nan)

        # Loop through the raster in blocks
        for i in range(n_blocks_x):
            for j in range(n_blocks_y):
                print(f"Processing block: {i}, {j}")
                x_start = i * block_size * resampling_factor
                y_start = j * block_size * resampling_factor

                x_block_size = min(block_size * resampling_factor, x_size - x_start)
                y_block_size = min(block_size * resampling_factor, y_size - y_start)

                src_array = src_band.ReadAsArray(x_start, y_start, x_block_size, y_block_size)

                if resampling_factor > 1:
                    src_array = src_array[::resampling_factor, ::resampling_factor]

                for color_value in color_values:
                    mask = np.where(src_array == color_value, color_value, np.nan).astype(np.float32)
                    output_rasters[color_value].WriteArray(mask, x_start // resampling_factor, y_start // resampling_factor)

        # Clean up
        for color_value in output_rasters:
            output_rasters[color_value] = None

        src_band, src_ds = None, None
        print("Separation of rasters by color

TheMapSmith commented 1 year ago

that's still not working. the output files increased in size nearly 4x from the latest output, and the raster file still does not render in QGIS or ArcPro. only the following print messages are showing: Separating rasters by color... Input raster dimensions: 362880 x 141120 Processing block: 0, 0