CodeSchmiedeHGW / BLITZ

Bulk Loading and Interactive Time series Zonal analysis
GNU General Public License v3.0
1 stars 0 forks source link

PCA Implementation - AFTER V1.2 - #36

Open PiMaV opened 2 weeks ago

PiMaV commented 2 weeks ago
PiMaV commented 2 weeks ago

Funny enough ChatGPT already made some suggestions:

Code Implementation Example:

import numpy as np
import os
import cupy as cp
from scipy.sparse.linalg import svds

class PCAHandler:
    def __init__(self):
        self.pca_result = None
        self.metadata = {}
        self.pca_path = "images/pca_{}_{}_{}.npy"

    def calculate_pca(self, data, crop_region=None):
        try:
            # PCA calculation using CuPy for GPU acceleration
            self._svd = U, s, Vh = cp.linalg.svd(data, full_matrices=False)
            self.pca_result = (U, s, Vh)
            self._autosave_pca(crop_region)
        except MemoryError as e:
            required_memory = (data.shape[0] * data.shape[1]) ** 2 * 8 / 1024 ** 3
            print(f"Memory Error: Unable to allocate {required_memory:.2f} GiB. {str(e)}")

    def _autosave_pca(self, crop_region):
        num_images = len(self.pca_result[0])
        x, y = crop_region if crop_region else (self.pca_result[0].shape[1], self.pca_result[0].shape[2])
        filename = self.pca_path.format(num_images, x, y)
        np.save(filename, self.pca_result)
        self._save_metadata(num_images, x, y)

    def _save_metadata(self, num_images, x, y):
        metadata_filename = self.pca_path.format(num_images, x, y).replace('.npy', '.meta')
        self.metadata = {
            "original_image_size": (x, y),
            "number_of_images": num_images,
            "cropped_region": (x, y)
        }
        with open(metadata_filename, 'w') as f:
            json.dump(self.metadata, f)

    def load_pca(self, filename):
        if os.path.exists(filename):
            self.pca_result = np.load(filename)
            metadata_filename = filename.replace('.npy', '.meta')
            with open(metadata_filename, 'r') as f:
                self.metadata = json.load(f)

    def clear_pca(self):
        self.pca_result = None
        self.metadata = {}

    def estimate_memory_usage(self, x, y):
        return (x * y) ** 2 * 8 / 1024 ** 3

    def display_memory_estimate(self, x, y):
        estimated_memory = self.estimate_memory_usage(x, y)
        print(f"Estimated memory usage for PCA: {estimated_memory:.2f} GiB")