ShannonAI / glyce

Code for NeurIPS 2019 - Glyce: Glyph-vectors for Chinese Character Representations
https://arxiv.org/abs/1901.10125
Apache License 2.0
419 stars 75 forks source link

train.json cant't find #38

Open Lirsakura opened 3 years ago

Lirsakura commented 3 years ago

can you tell me Where is train.json

FileNotFoundError: [Errno 2] No such file or directory: '/data/bq/train.json'

okcd00 commented 3 years ago

Data files for bq all have the postfix .tsv (not .json)
I modify the function in class BQProcessor as below, and it successfully loads bq corpus,
you can view the changes in my fork:


# Updated
class BQProcessor(DataProcessor):
    """Processor for the dbqa data set """

    def get_train_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "train.tsv")), "train")

    def get_dev_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "valid.tsv")),
            "dev_matched")

    def get_test_examples(self, data_dir):
        """See base class."""
        return self._create_examples(
            self._read_tsv(os.path.join(data_dir, "test.tsv")),
            "dev_matched")

    def get_labels(self):
        """See base class."""
        return ["0", "1"]

    # Original
    def _create_examples_old(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            line = json.loads(line[0])
            if i < 2:
               print("-"*10) 
               print("check  loading example") 
               print(line)
               print(type(line)) 
            guid = "%s-%s" % (set_type, i)
            text_a = line[1] # .replace(" ", "")
            text_b = line[2] # .replace(" ", "")
            label = line
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples

    def _create_examples(self, lines, set_type):
        """Creates examples for the training and dev sets."""
        examples = []
        for (i, line) in enumerate(lines):
            if i < 2:
               print("-"*10) 
               print("check  loading example") 
               print(line)
               print(type(line)) 
            guid = "%s-%s" % (set_type, i)
            text_a = line[1] # .replace(" ", "")
            text_b = line[2] # .replace(" ", "")
            label = line[0]
            examples.append(
                InputExample(guid=guid, text_a=text_a, text_b=text_b, label=label))
        return examples