CQCL / lambeq

A high-level Python library for Quantum Natural Language Processing
https://cqcl.github.io/lambeq-docs
Apache License 2.0
451 stars 108 forks source link

PennyLane training problem #135

Closed sujit4uwbslg closed 4 months ago

sujit4uwbslg commented 7 months ago

def read_data(sentences): labels, data = [], [] for sentence in sentences: label = int(sentence[0]) labels.append([label,1-label]) data.append(sentence[3:].strip()) # Adjust index to account for label return labels, data

train_sentences = [ "0, Achieving my goals gives me a sense of accomplishment and pride.", "0, Traveling to new places broadens my horizons and enriches my life.", "1, The constant noise pollution in the city makes it hard to relax and unwind.", "0, I enjoy eating healthy and nutritious meals.", "1, Feeling unappreciated at work leads to dissatisfaction and resentment.", "0, Learning new skills brings me joy and fulfillment.", "0, Surrounding myself with positive people uplifts my spirits.", "1, Financial struggles cause me a great deal of stress and worry.", "1, The traffic this morning was terrible and stressful.", "0, I am grateful for the opportunities that come my way.", "1, Dealing with health issues takes a toll on both my physical and mental well-being.", "0, Exercise boosts my energy levels and improves my mood." ]

val_sentences=[ "1, Experiencing constant setbacks can be discouraging and demoralizing.", "0, Helping others makes me feel happy and fulfilled.", "1, Dealing with difficult people can be draining and frustrating.", "0, I love spending time with my friends and family.", "1, Failing to meet deadlines fills me with anxiety and disappointment.", "0, Achieving my goals gives me a sense of accomplishment and pride.", "1, Losing touch with loved ones leaves me feeling lonely and isolated.", "0, Traveling to new places broadens my horizons and enriches my life." ]

train_labels, train_data = read_data(train_sentences) val_labels, val_data = read_data(val_sentences)

from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords

def filtering(sent): words=word_tokenize(sent)

filtered_words=[]
for w in words:
    if w not in stopwords.words('english') and w not in "#.&*%^@_-+-*/$":
        filtered_words.append(w)
filtered_sent=" ".join(filtered_words)
return filtered_sent

train_data_filtered=[filtering(s) for s in train_data] val_data_filtered=[filtering(s) for s in val_data]

from lambeq import BobcatParser

parser = BobcatParser(verbose='text')

train_diagrams = parser.sentences2diagrams(train_data_filtered, suppress_exceptions=True) val_diagrams = parser.sentences2diagrams(val_data_filtered, suppress_exceptions=True)

from lambeq import AtomicType, IQPAnsatz, RemoveCupsRewriter

ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 0}, n_layers=1, n_single_qubit_params=3) remove_cups = RemoveCupsRewriter()

train_circuits = [ansatz(remove_cups(diagram)) for diagram in train_diagrams] val_circuits = [ansatz(remove_cups(diagram)) for diagram in val_diagrams]

train_circuits[0].draw(figsize=(9, 10))

from lambeq import PennyLaneModel

all_circuits = train_circuits + val_circuits

model = PennyLaneModel.from_diagrams(all_circuits) model.initialise_weights()

from lambeq import Dataset BATCH_SIZE=1 train_dataset = Dataset(train_circuits, train_labels, batch_size=BATCH_SIZE)

val_dataset = Dataset(val_circuits, val_labels)

import torch def acc(y_hat, y): print(y_hat) print(y) return (torch.argmax(y_hat, dim=1) == torch.argmax(y, dim=1)).sum().item()/len(y)

def loss(y_hat, y): print(y_hat) print(y) return torch.nn.functional.mse_loss(y_hat, y)

from lambeq import PytorchTrainer

trainer = PytorchTrainer( model=model, loss_function=loss, optimizer=torch.optim.Adam, learning_rate=1e-5, epochs=20, evaluate_functions={"acc": acc}, evaluate_on_train=True, use_tensorboard=False, verbose='text', seed=42 )

trainer.fit(train_dataset, val_dataset)

output:

tensor([1.], grad_fn=) tensor([[1., 0.]]) tensor([1.], grad_fn=) tensor([[1., 0.]])

:11: UserWarning: Using a target size (torch.Size([1, 2])) that is different to the input size (torch.Size([1])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size. return torch.nn.functional.mse_loss(y_hat, y) --------------------------------------------------------------------------- IndexError Traceback (most recent call last) [](https://localhost:8080/#) in () 14 ) 15 ---> 16 trainer.fit(train_dataset, val_dataset) 2 frames [](https://localhost:8080/#) in acc(y_hat, y) 3 print(y_hat) 4 print(y) ----> 5 return (torch.argmax(y_hat, dim=1) == 6 torch.argmax(y, dim=1)).sum().item()/len(y) 7 IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1) what is problem? I have followed all the steps to train the circuits but it is not running. Somehow what I understood the training method is not returning the data in the same format of Y so I am getting the problem. How to solve this problem plz help me
dimkart commented 5 months ago

Hi, please check if all your diagrams have the same number of free wires (same output). If this is not the case, use UnifyCodomainRewriter to give the same codomain to all your diagrams.

dimkart commented 4 months ago

This will be closed due to inactivity.