Open QUEST2179 opened 1 year ago
Hi, this is because your targets are str and not transformed to float. One way to deal with this is to use the utils.literal_eval
function for transformation as in data.MoleculeDataset. load_csv
.
Thanks, I will try your suggestion.
Another error on save model.
with open("beta_cnn.json", "w") as fout:
json.dump(solver.config_dict(), fout)
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type TruncateProtein is not JSON serializable
solver.save("beta_cnn.pth") works though.
Hi, could you share more contexts, e.g., how you define your solver and dataset? Everything works well for me.
Just follow your BetaLactamase example on your webpage.
def testPropertyPrediction(): from torchdrug import models
model = models.ProteinCNN(input_dim=21,
hidden_dims=[1024, 1024],
kernel_size=5, padding=2, readout="max")
from torchdrug import transforms
truncate_transform = transforms.TruncateProtein(max_length=200, random=False)
protein_view_transform = transforms.ProteinView(view="residue")
transform = transforms.Compose([truncate_transform, protein_view_transform])
from torchdrug import datasets
dataset = datasets.BetaLactamase("protein-datasets/", atom_feature=None, bond_feature=None, residue_feature="default", transform=transform)
train_set, valid_set, test_set = dataset.split()
print("The label of first sample: ", dataset[0][dataset.target_fields[0]])
print("train samples: %d, valid samples: %d, test samples: %d" % (len(train_set), len(valid_set), len(test_set)))
from torchdrug import tasks
task = tasks.PropertyPrediction(model, task= ('scaled_effect1'), #dataset.tasks,
criterion="mse", metric=("mae", "rmse", "spearmanr"),
normalization=False, num_mlp_layer=2)
import torch
from torchdrug import core
optimizer = torch.optim.Adam(task.parameters(), lr=1e-4)
solver = core.Engine(task, train_set, valid_set, test_set, optimizer, gpus=[0], batch_size=64)
solver.train(num_epoch=10)
solver.evaluate("valid")
import json
with open("beta_cnn.json", "w") as fout:
json.dump(solver.config_dict(), fout)
solver.save("beta_cnn.pth")
got output: mean absolute error [scaled_effect1]: 0.303814 root mean squared error [scaled_effect1]: 0.331703 spearmanr [scaled_effect1]: 0.442122
pytorch 1.13.1 py3.7_cuda11.6_cudnn8_0 pytorch pytorch-cuda 11.6 h867d48c_1 pytorch
Thanks for raising this issue. It seems to be a bug that the config_dict()
function fails to deal with list arguments in transforms.Compose
. This has been fixed in https://github.com/DeepGraphLearning/torchdrug/commit/b50884877f8e1185d7500cc9207cc7b3782fb028.
Sorry this fix doesn't work. I still get the same error.
It works for me. Maybe you need to clone the latest repo to fetch the commit and remember to install from the source code.
sorry I didn't pay much attention to the exact error message, use your latest repo, the error message has changed to the following
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type range is not JSON serializable
I print out solver.config_dict(), it had 3 occurrences of range.
{'class': 'core.Engine', 'task': {'class': 'tasks.PropertyPrediction', 'model': {'class': 'models.ProteinConvolutionalNetwork', 'input_dim': 21, 'hidden_dims': [1024, 1024], 'kernel_size': 5, 'stride': 1, 'padding': 2, 'activation': 'relu', 'short_cut': False, 'concat_hidden': False, 'readout': 'max'}, 'task': 'scaled_effect1', 'criterion': 'mse', 'metric': ('mae', 'rmse', 'spearmanr'), 'num_mlp_layer': 2, 'normalization': False, 'num_class': None, 'mlp_batch_norm': False, 'mlp_dropout': 0, 'graph_construction_model': None, 'verbose': 0}, 'train_set': {'class': 'dataset.Subset', 'dataset': {'class': 'datasets.BetaLactamase', 'path': 'protein-datasets/', 'verbose': 1, 'atom_feature': None, 'bond_feature': None, 'residue_feature': 'default', 'transform': {'class': 'transforms.Compose', 'transforms': [{'class': 'transforms.TruncateProtein', 'max_length': 200, 'random': False, 'keys': 'graph'}, {'class': 'transforms.ProteinView', 'view': 'residue', 'keys': 'graph'}]}}, 'indices': range(0, 4158)}, 'valid_set': {'class': 'dataset.Subset', 'dataset': {'class': 'datasets.BetaLactamase', 'path': 'protein-datasets/', 'verbose': 1, 'atom_feature': None, 'bond_feature': None, 'residue_feature': 'default', 'transform': {'class': 'transforms.Compose', 'transforms': [{'class': 'transforms.TruncateProtein', 'max_length': 200, 'random': False, 'keys': 'graph'}, {'class': 'transforms.ProteinView', 'view': 'residue', 'keys': 'graph'}]}}, 'indices': range(4158, 4678)}, 'test_set': {'class': 'dataset.Subset', 'dataset': {'class': 'datasets.BetaLactamase', 'path': 'protein-datasets/', 'verbose': 1, 'atom_feature': None, 'bond_feature': None, 'residue_feature': 'default', 'transform': {'class': 'transforms.Compose', 'transforms': [{'class': 'transforms.TruncateProtein', 'max_length': 200, 'random': False, 'keys': 'graph'}, {'class': 'transforms.ProteinView', 'view': 'residue', 'keys': 'graph'}]}}, 'indices': range(4678, 5198)}, 'optimizer': {'class': 'optim.Adam', 'lr': 0.0001, 'betas': (0.9, 0.999), 'eps': 1e-08, 'weight_decay': 0, 'amsgrad': False, 'foreach': None, 'maximize': False, 'capturable': False, 'differentiable': False, 'fused': False}, 'scheduler': None, 'gpus': [0], 'batch_size': 64, 'gradient_interval': 1, 'num_worker': 0, 'logger': 'logging', 'log_interval': 100}
Yes, you're right. The problem is caused by range
, while the behavior of config_dict()
is correct. In this case, I would suggest to manually convert range
into list when dumping as json.
Dear Developer,
try to mimic beta_lactamase.py to load csv file directly, but got the following error. could you please help? Thanks!
File "C:\Users\18482\work\torchdrug-master\torchdrug\tasks\property_prediction.py", line 66, in preprocess if not math.isnan(sample[task]): TypeError: must be real number, not str
I included beta_csv.py for your troubleshooting.
import os from torch.utils import data as torch_data from torchdrug import data, utils from torchdrug.core import Registry as R import pandas as pd from collections import defaultdict
@R.register("datasets.beta_csv") @utils.copy_args(data.ProteinDataset.load_sequence, ignore=("target_fields")) class beta_csv(data.ProteinDataset): """ Qualitative data of drugs approved by the FDA and those that have failed clinical trials for toxicity reasons.
beta_lactamase_test.csv looks like this Unnamed: 0 Sequences Targets 0 0 MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLAARVGYIE... 1.011182 1 1 MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIE... 1.003127 2 2 MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIE... -0.008031 3 3 MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIE... 0.621368 4 4 MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIE... 1.005303