sustainable-processes / summit

Optimising chemical reactions using machine learning
https://gosummit.readthedocs.io/en/latest/
MIT License
118 stars 24 forks source link

Adding more than three discrete variables fails #249

Closed zjyz17 closed 1 year ago

zjyz17 commented 1 year ago

Description

When adding more than three discrete variables, TSEMO cannot run. The error shows like this:

Traceback (most recent call last):

  File ~/anaconda3/lib/python3.10/site-packages/spyder_kernels/py3compat.py:356 in compat_exec
    exec(code, globals, locals)

  File ~/Desktop/Reaction opt/TSEMO/temp.py:113
    Optimization = strategy.suggest_experiments(1, prev_res=Data)

  File ~/anaconda3/lib/python3.10/site-packages/summit/strategies/tsemo.py:243 in suggest_experiments
    if X.shape[0] == 0 and yhat.shape[0] == 0:

UnboundLocalError: local variable 'X' referenced before assignment

What I Did

import numpy as np
[test2.xlsx](https://github.com/sustainable-processes/summit/files/11724763/test2.xlsx)

import pandas as pd
import openpyxl
import matplotlib.pyplot as plt
import pkg_resources
import pathlib

from summit.domain import *
from summit.utils.dataset import DataSet
from summit.strategies import SOBO
from summit.strategies import TSEMO
from summit.strategies import LHS
from summit.strategies import Random
from datetime import datetime
from openpyxl import Workbook, load_workbook

# Create the experimental domain
domain = Domain()

# Define the optimization variables

# des_1
mixer_df = DataSet([[0.5, 90],[1, 90],[0.5, 120],[1, 120]], \
                    index=['T_small','T_big','Y_small','Y_big'], \
                    columns=['inner_diameter', 'angle'])
domain += CategoricalVariable('mixer', 'mixer_descriptors',descriptors=mixer_df)

# des_2 
equiv_df = DataSet([[1],[1.25],[1.5]],\
                    index=['equiv_1','equiv_1.25','equiv_1.5'],\
                    columns=["Equivalent"])
domain += CategoricalVariable('equiv','equiv_level',descriptors=equiv_df)

# des_3
flowrate_df = DataSet([[0.1],[0.5],[1]],\
                    index=['flowrate_0.1','flowrate_0.5','flowrate_1'],\
                    columns=["Flowrate"])
domain += CategoricalVariable('flowrate','flowrate_level',descriptors=flowrate_df)

# domain += ContinuousVariable(name = 'flowrate', description="flowrate", bounds = [0, 10])

# des_4
solvent_df = DataSet([[1e8],[8.1],[1.3],[0.05]],\
                    index=['MeCN','EtOAc','CH2Cl2','Toluene'],\
                    columns=['Solubility'])
domain += CategoricalVariable('solvent','solubility',descriptors=solvent_df)

# Define the objective
obj_1 = "Space_time_yield"
domain += ContinuousVariable(
    name = "STY",
    description = obj_1,
    bounds = [0, 10],
    is_objective = True,
    maximize = False,
)

# Define the objective
obj_2 = "Reaction_mass_efficiency"
domain += ContinuousVariable(
    name = "RME",
    description = obj_2,
    bounds = [0, 100],
    is_objective = True,
    maximize = True,
)

# Load the excel file to start the optimization
Data_DF = pd.read_excel('test2.xlsx')
# Convert DataFrame to DataSet
Data = DataSet.from_df(Data_DF)
print(Data)

strategy =TSEMO(domain)
Optimization = strategy.suggest_experiments(1, prev_res=Data)
print(Optimization)

The dataset in Excel is like this:

NAME    mixer       equiv      flowrate  solvent   STY  RME
TYPE     DATA        DATA          DATA     DATA  DATA DATA
0     T_small   equiv_1.5  flowrate_0.1  Toluene   9.1   12
1     T_small     equiv_1    flowrate_1  Toluene   7.3   41
2     Y_small   equiv_1.5    flowrate_1    EtOAc   3.7   35