Updated - Debugged CityLearn Gromov Wasserstein Reward Function Env

kennethZhangML commented 1 year ago

import torch 
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim

import matplotlib.pyplot as plt 
import numpy as np

import gym 
from citylearn import GridLearn

from entropyUtilities import information_mutual_conditional, information_mutual
from geomloss import SamplesLoss

class customEnv(gym.Env):
    def __init__(self, max_timesteps, n_agents, weather_file, building_attributes_file):
        super().__init__()

        self.timestep = 0
        self.max_timesteps = max_timesteps
        self.n_agents = n_agents

        self.past = np.random.randint(0, 2, size=1)
        self.present = np.random.randint(0, 2, size=1)
        self.future = np.random.randint(0, 2, size=1)

        self.tou_periods = [(0, 8), (8, 16), (16, 24)]
        self.tou_prices = [0.1, 0.2, 0.3]
        self.dr_event_start = 500
        self.dr_event_end = 550
        self.dr_event_percent_reduction = 0.1

        self.grid = GridLearn(weather_file, building_attributes_file)

    def step(self, actions):
        self.past = np.append(self.past, self.present, axis=0)
        self.present = np.append(self.present, self.future, axis=0)
        self.future = np.append(self.future, actions, axis=0)

        tou_period = self.timestep % 24 // 8
        tou_price = self.tou_prices[tou_period]

        if self.dr_event_start <= self.timestep < self.dr_event_end:
            demand_reduction = self.dr_event_percent_reduction
        else:
            demand_reduction = 0

        actions_scaled = actions * self.grid.buildings['Electricity'].peak_power / 2
        self.grid.step(actions_scaled, tou_energy_prices=[tou_price] * 3, demand_response=demand_reduction)

        tau = self.present 
        s = self.past 
        t = self.future 
        I_tau_sx = information_mutual_conditional(s, t, tau)
        I_tau_sx_shared = information_mutual(s, t)
        I_tau_sx_excel = I_tau_sx - I_tau_sx_shared

        action_counts = np.bincount(actions, minlength=2)
        action_probabilities = action_counts / len(actions)
        diversity_penalty = -np.sum(action_probabilities * np.log(action_probabilities))
        reward = -I_tau_sx_excel + diversity_penalty

        done = (self.timestep >= self.max_timesteps)
        obs = self.grid.get_state()[0]['Electricity']['consumption'].flatten()
        return obs, reward, done, {}

    def reset(self):
        self.timestep = 0
        self.past = np.random.randint(0, 2, size=1)
        self.present = np.random.randint(0, 2, size=1)
        self.future = np.random.randint(0, 2, size=1)
        self.grid.reset()

        obs = self.grid.get_state()[0]['Electricity']['consumption'].flatten()
        return obs

def main(max_timesteps, n_agents, weather_file, building_attributes_file):
    env = customEnv(max_timesteps=max_timesteps, n_agents=n_agents,
                    weather_file=weather_file,
                    building_attributes_file=building_attributes_file)

    obs = env.reset()
    done = False
    cumulative_reward = 0

    while not done:
        action = np.random.randint(0, 2, size=n_agents)
        obs, reward, done, info = env.step(action)
        cumulative_reward += reward

    print(f"Total reward earned: {cumulative_reward}")

    return cumulative_reward

I updated the import statement in the customEnv class to import only the required functions from the entropyUtilities module. The line changed from from entropyUtilities import * to from entropyUtilities import information_mutual_conditional, information_mutual. This change assumes that the information_mutual_conditional and information_mutual functions are defined in the entropyUtilities module.

Next, I corrected a typo in the argument name during the instantiation of the customEnv class. The line changed from building_attributes=building_attributes_file to building_attributes_file=building_attributes_file. This ensures that the correct argument name building_attributes_file is used.

Lastly, I modified the stepv2 method in the customEnv class to have the correct method name step. This change avoids having two methods with the same name (step and stepv2), which can cause issues. The line changed from def stepv2(self, actions): to def step(self, actions):.

After making these changes, the code should be ready for debugging and running.

bmorphism commented 1 year ago

Amazing!! I am grateful for the runnable implementation here and will report back as far as my run!

kennethZhangML commented 1 year ago

Feel free to add anything or leave comments on the code as you go. We can modify the code as we move progress through the implementation.

plurigrid / ontology

Updated - Debugged CityLearn Gromov Wasserstein Reward Function Env #65