Open zhaojingo opened 2 years ago
您好,我想问一下特征的维度是怎么确定的?筛选标准是什么呢?
请问你跑通这个包了吗?
您好,我想问一下特征的维度是怎么确定的?筛选标准是什么呢?
由于要跑很多癌种,为了保持后续分析的一致性,特征选择(包括组学数据选择)的策略来自于18年TCGA发布的PANCAN上的分型分析的特征(Cell-of-Origin Patterns Dominate the Molecular Classification of 10,000 Tumors from 33 Types of Cancer)。
您好,我想问一下特征的维度是怎么确定的?筛选标准是什么呢?
请问你跑通这个包了吗?
这个包肯定能跑通的。按照要求装需要的依赖包即可。即使没有GPU,用CPU版本的keras也能跑通。
这个包肯定能跑通的。按照要求装需要的依赖包即可。即使没有GPU,用CPU版本的keras也能跑通。
谢谢您的回答,但我在命令行没跑通这个代码。在检查SubtypeGAN.py的时候发现:
468行:vec = SubtypeGAN.feature_gan(...) 运行时候提示feature_gan是SubtypeGAN_API的method而不是SubtypeGAN的method。 我把这行修改成vec = SubtypeGAN_API.feature_gan(...) 又提示feature_gan() 缺少 'datasets'这个flag。 不知道是不是这里有问题导致我没跑通。
这个包肯定能跑通的。按照要求装需要的依赖包即可。即使没有GPU,用CPU版本的keras也能跑通。
谢谢您的回答,但我在命令行没跑通这个代码。在检查SubtypeGAN.py的时候发现:
468行:vec = SubtypeGAN.feature_gan(...) 运行时候提示feature_gan是SubtypeGAN_API的method而不是SubtypeGAN的method。 我把这行修改成vec = SubtypeGAN_API.feature_gan(...) 又提示feature_gan() 缺少 'datasets'这个flag。 不知道是不是这里有问题导致我没跑通。
不需要修改代码。我更新了环境包。请按照说明配置好环境,程序就可以运行了。
非常感谢,我已经运行上了。
您好,请问您在选择癌症种类的时候有没有考虑像是LUSC,PRAD,KICH,THCA这样的癌症类型?如果考虑的话为什么最后没有加上这几类?
您好,请问,在GAN代码里,是Encoder的GAN网络对应的encoder先更新权重再更新autoencoder的encoder权重还是,无所谓顺序都行。
class SubtypeGAN():
def __init__(self, datasets, n_latent_dim, weight=0.001, model_path='SubtypeGAN.h5', epochs=100, batch_size=64):
self.latent_dim = n_latent_dim
optimizer = Adam()
self.n = len(datasets)
self.epochs = epochs
self.batch_size = batch_size
sample_size = 0
if self.n > 1:
sample_size = datasets[0].shape[0]
print(sample_size)
if sample_size > 300:
self.epochs = 11
else:
self.epochs = 10
self.epochs = 30 * batch_size
self.shape = []
self.weight = [0.3, 0.1, 0.1, 0.5]
self.disc_w = 1e-4
self.model_path = model_path
input = []
loss = []
loss_weights = []
output = []
for i in range(self.n):
self.shape.append(datasets[i].shape[1])
loss.append('mse') #loss=[["mse"],["mse"],["mse"]]
loss.append('binary_crossentropy')#loss=[["mse"],["mse"],["mse"],["binary_crossentropy"]]
self.decoder, self.disc = self.build_decoder_disc()
self.disc.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
self.encoder = self.build_encoder()
for i in range(self.n):
input.append(Input(shape=(self.shape[i],)))
loss_weights.append((1 - self.disc_w) * self.weight[i])
loss_weights.append(self.disc_w)
z_mean, z_log_var, z = self.encoder(input)
output = self.decoder(z)
_**self.gan = Model(input, output)
self.gan.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer)**_
print(self.gan.summary())
return
def build_encoder(self):
def sampling(args):
z_mean, z_log_var = args
return z_mean + K.exp(0.5 * z_log_var) * K.random_normal(K.shape(z_mean), seed=0)
encoding_dim = self.latent_dim
X = []
dims = []
denses = []
for i in range(self.n):
X.append(Input(shape=(self.shape[i],)))
dims.append(int(encoding_dim * self.weight[i]))
for i in range(self.n):
denses.append(Dense(dims[i])(X[i]))
if self.n > 1:
merged_dense = concatenate(denses, axis=-1)
else:
merged_dense = denses[0]
model = BatchNormalization()(merged_dense)
model = Activation('gelu')(model)
model = Dense(encoding_dim)(model)
z_mean = Dense(encoding_dim)(model)
z_log_var = Dense(encoding_dim)(model)
z = Lambda(sampling, output_shape=(encoding_dim,), name='z')([z_mean, z_log_var])
return Model(X, [z_mean, z_log_var, z])
def build_decoder_disc(self):
denses = []
X = Input(shape=(self.latent_dim,))
model = Dense(self.latent_dim)(X)
model = BatchNormalization()(model)
model = Activation('gelu')(model)
for i in range(self.n):
denses.append(Dense(self.shape[i])(model))
dec = Dense(1, activation='sigmoid')(model)
denses.append(dec)
_**m_decoder = Model(X, denses)#output is [Dense(self.shape[i]] (batchsize,Dense(self.shape[i]),#(batchsize,Dense(self.shape[i],[vector(batschsize,1)])
m_disc = Model(X, dec)#(batchsize,Dense(self.shape[i],[vector(batschsize,1)])**_
return m_decoder, m_disc
def build_disc(self):
X = Input(shape=(self.latent_dim,))
dec = Dense(1, activation='sigmoid', kernel_initializer="glorot_normal")(X)
output = Model(X, dec)
return output
def train(self, X_train, bTrain=True):
model_path = self.model_path
log_file = "./run.log"
fp = open(log_file, 'w')
if bTrain:
# GAN
valid = np.ones((self.batch_size, 1))
fake = np.zeros((self.batch_size, 1))
for epoch in range(self.epochs):
# Train Discriminator
data = []
idx = np.random.randint(0, X_train[0].shape[0], self.batch_size)
for i in range(self.n):
data.append(X_train[i][idx])
latent_fake = self.encoder.predict(data)[2]#forward got the latent data after the calculation of encoder
latent_real = np.random.normal(size=(self.batch_size, self.latent_dim))
_** d_loss_real = self.disc.train_on_batch(latent_real, valid)
d_loss_fake = self.disc.train_on_batch(latent_fake, fake)
d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)
outs = data + [valid]
# Train Encoder_GAN
g_loss = self.gan.train_on_batch(data, outs)#encoder and discrimonator updated ?**_
fp.close()
self.encoder.save(model_path)
else:
self.encoder = load_model(model_path)
mat = self.encoder.predict(X_train)[0]
return mat
您好,请问您在选择癌症种类的时候有没有考虑像是LUSC,PRAD,KICH,THCA这样的癌症类型?如果考虑的话为什么最后没有加上这几类?
我们论文里选了10个数量最多的癌症类型,样本量大这样有助于深度方法发挥优势。实际上都能跑出结果的。
您好,请问,在GAN代码里,是Encoder的GAN网络对应的encoder先更新权重再更新autoencoder的encoder权重还是,无所谓顺序都行。
class SubtypeGAN(): def __init__(self, datasets, n_latent_dim, weight=0.001, model_path='SubtypeGAN.h5', epochs=100, batch_size=64): self.latent_dim = n_latent_dim optimizer = Adam() self.n = len(datasets) self.epochs = epochs self.batch_size = batch_size sample_size = 0 if self.n > 1: sample_size = datasets[0].shape[0] print(sample_size) if sample_size > 300: self.epochs = 11 else: self.epochs = 10 self.epochs = 30 * batch_size self.shape = [] self.weight = [0.3, 0.1, 0.1, 0.5] self.disc_w = 1e-4 self.model_path = model_path input = [] loss = [] loss_weights = [] output = [] for i in range(self.n): self.shape.append(datasets[i].shape[1]) loss.append('mse') #loss=[["mse"],["mse"],["mse"]] loss.append('binary_crossentropy')#loss=[["mse"],["mse"],["mse"],["binary_crossentropy"]] self.decoder, self.disc = self.build_decoder_disc() self.disc.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) self.encoder = self.build_encoder() for i in range(self.n): input.append(Input(shape=(self.shape[i],))) loss_weights.append((1 - self.disc_w) * self.weight[i]) loss_weights.append(self.disc_w) z_mean, z_log_var, z = self.encoder(input) output = self.decoder(z) _**self.gan = Model(input, output) self.gan.compile(loss=loss, loss_weights=loss_weights, optimizer=optimizer)**_ print(self.gan.summary()) return def build_encoder(self): def sampling(args): z_mean, z_log_var = args return z_mean + K.exp(0.5 * z_log_var) * K.random_normal(K.shape(z_mean), seed=0) encoding_dim = self.latent_dim X = [] dims = [] denses = [] for i in range(self.n): X.append(Input(shape=(self.shape[i],))) dims.append(int(encoding_dim * self.weight[i])) for i in range(self.n): denses.append(Dense(dims[i])(X[i])) if self.n > 1: merged_dense = concatenate(denses, axis=-1) else: merged_dense = denses[0] model = BatchNormalization()(merged_dense) model = Activation('gelu')(model) model = Dense(encoding_dim)(model) z_mean = Dense(encoding_dim)(model) z_log_var = Dense(encoding_dim)(model) z = Lambda(sampling, output_shape=(encoding_dim,), name='z')([z_mean, z_log_var]) return Model(X, [z_mean, z_log_var, z]) def build_decoder_disc(self): denses = [] X = Input(shape=(self.latent_dim,)) model = Dense(self.latent_dim)(X) model = BatchNormalization()(model) model = Activation('gelu')(model) for i in range(self.n): denses.append(Dense(self.shape[i])(model)) dec = Dense(1, activation='sigmoid')(model) denses.append(dec) _**m_decoder = Model(X, denses)#output is [Dense(self.shape[i]] (batchsize,Dense(self.shape[i]),#(batchsize,Dense(self.shape[i],[vector(batschsize,1)]) m_disc = Model(X, dec)#(batchsize,Dense(self.shape[i],[vector(batschsize,1)])**_ return m_decoder, m_disc def build_disc(self): X = Input(shape=(self.latent_dim,)) dec = Dense(1, activation='sigmoid', kernel_initializer="glorot_normal")(X) output = Model(X, dec) return output def train(self, X_train, bTrain=True): model_path = self.model_path log_file = "./run.log" fp = open(log_file, 'w') if bTrain: # GAN valid = np.ones((self.batch_size, 1)) fake = np.zeros((self.batch_size, 1)) for epoch in range(self.epochs): # Train Discriminator data = [] idx = np.random.randint(0, X_train[0].shape[0], self.batch_size) for i in range(self.n): data.append(X_train[i][idx]) latent_fake = self.encoder.predict(data)[2]#forward got the latent data after the calculation of encoder latent_real = np.random.normal(size=(self.batch_size, self.latent_dim)) _** d_loss_real = self.disc.train_on_batch(latent_real, valid) d_loss_fake = self.disc.train_on_batch(latent_fake, fake) d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) outs = data + [valid] # Train Encoder_GAN g_loss = self.gan.train_on_batch(data, outs)#encoder and discrimonator updated ?**_ fp.close() self.encoder.save(model_path) else: self.encoder = load_model(model_path) mat = self.encoder.predict(X_train)[0] return mat
我们这块是根据Keras的GAN的实现来的。没有特意改变顺序,但是实际上如果能稳定收敛的话应该都差不多。
您好,我想问一下特征的维度是怎么确定的?筛选标准是什么呢?