Cannot concatenate individual BCI 2000 dataset files

pengweimin commented 5 years ago

when i use mne download BCI 2000dataset，it‘s worning。

import numpy as np
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import ShuffleSplit, cross_val_score

from mne import Epochs, pick_types, find_events
from mne.channels import read_layout
from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci
from mne.time_frequency import  tfr_morlet
import scipy.io as scio

print(__doc__)

# #############################################################################
# # Set parameters and read data

# avoid classification of evoked responses by using epochs that start 1s after
# cue onset.
tmin, tmax = 0., 4.
event_id = dict(hands=2, feet=3)

runs = [6, 10, 14]  # motor imagery: hands vs feet

raw_fnames = [eegbci.load_data(subject, runs) for subject in range(1,110)]  #从1到109个受试者
raw_fnames = np.concatenate(raw_fnames)
raw_files = [read_raw_edf(f, preload=True, stim_channel='auto') for f in
             raw_fnames]
raw = concatenate_raws(raw_files)

# strip channel names of "." characters
raw.rename_channels(lambda x: x.strip('.'))

# Apply band-pass filter
picks = pick_types(raw.info, meg=False, eeg=True, stim=False, eog=False,
                   exclude='bads')
#raw.notch_filter(np.arange(50, 251, 50), picks=picks, fir_design='firwin')
#raw.filter(8., 30., fir_design='firwin', skip_by_annotation='edge')

events = find_events(raw, shortest_event=0, stim_channel='STI 014')

# Read epochs (train will be done only between 1 and 2s)
# Testing will be done with a running classifier
epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks,
                baseline=None, preload=True)
epochs_train = epochs.copy().crop(tmin=1., tmax=2.)
labels = epochs.events[:, -1] 

epochs_data = epochs.get_data()

#freqs=np.arange(8., 1., 30.)
##n_cycles = freqs / 2.
##datas=tfr_morlet(epochs,freqs=freqs,n_cycles=n_cycles, return_itc=False)

scio.savemat('D://pwm/tf_EEGLearn-master/data.mat',{'data':epochs_data,'label':labels})

ValueError: raw[261]['info']['sfreq'] must match

cbrnr commented 5 years ago

Can you please post the full output/error message? It is not clear where exactly this error is happening.

pengweimin commented 5 years ago

THIS IS FULL ERROR MESSAGE,THANKS.

Traceback (most recent call last):

File "", line 1, in runfile('D:/pwm/BCI 2000/download_dataset.py', wdir='D:/pwm/BCI 2000')

File "C:\Users\pwm\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 866, in runfile execfile(filename, namespace)

File "C:\Users\pwm\Anaconda3\lib\site-packages\spyder\utils\site\sitecustomize.py", line 102, in execfile exec(compile(f.read(), filename, 'exec'), namespace)

File "D:/pwm/BCI 2000/download_dataset.py", line 38, in raw = concatenate_raws(raw_files)

File "", line 2, in concatenate_raws

File "C:\Users\pwm\AppData\Roaming\Python\Python35\site-packages\mne\utils.py", line 952, in verbose return function(*args, **kwargs)

File "C:\Users\pwm\AppData\Roaming\Python\Python35\site-packages\mne\io\base.py", line 2637, in concatenate_raws raws[0].append(raws[1:], preload)

File "C:\Users\pwm\AppData\Roaming\Python\Python35\site-packages\mne\io\base.py", line 2083, in append _check_raw_compatibility(all_raws)

File "C:\Users\pwm\AppData\Roaming\Python\Python35\site-packages\mne\io\base.py", line 2584, in _check_raw_compatibility raise ValueError('raw[%d][\'info\'][\'sfreq\'] must match' % ri)

ValueError: raw[261]['info']['sfreq'] must match

cbrnr commented 5 years ago

The problem is that data of two subjects have a sample rate of 128Hz, whereas all other data sets have a sample rate of 160Hz. Here's a minimal working example to reproduce:

from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci

runs = [6, 10, 14]
raw_fnames = [eegbci.load_data(subject, runs) for subject in range(1, 110)]
raw_fnames = [run for subject in raw_fnames for run in subject]  # flatten
raw_files = [read_raw_edf(f, preload=True) for f in raw_fnames]

fs = [raw.info["sfreq"] for raw in raw_files] 
raw = concatenate_raws(raw_files)

The last line fails because of different sampling rates. You can inspect the contents of fs to see which data sets have a different sampling rate (262, 263, 264, 274, 275, 276, 298, 299, 300 corresponding to datasets 88, 92, and 100).

You should exclude these subjects, i.e.

from mne.io import concatenate_raws, read_raw_edf
from mne.datasets import eegbci

runs = [6, 10, 14]
subjects = [s for s in range(1, 110) if s not in (88, 92, 100)]
raw_fnames = [eegbci.load_data(subject, runs) for subject in subjects]
raw_fnames = [run for subject in raw_fnames for run in subject]  # flatten
raw_files = [read_raw_edf(f, preload=True) for f in raw_fnames]

raw = concatenate_raws(raw_files)

However, concatenating still fails because:

ValueError                                Traceback (most recent call last)
<ipython-input-38-35ccc276e9d0> in <module>
      9 raw_files = [read_raw_edf(f, preload=True) for f in raw_fnames]
     10 
---> 11 raw = concatenate_raws(raw_files)

</usr/local/lib/python3.7/site-packages/mne/externals/decorator.py:decorator-gen-144> in concatenate_raws(raws, preload, events_list, verbose)

/usr/local/lib/python3.7/site-packages/mne/utils/_logging.py in wrapper(*args, **kwargs)
     87             with use_log_level(verbose_level):
     88                 return function(*args, **kwargs)
---> 89         return function(*args, **kwargs)
     90     return FunctionMaker.create(
     91         function, 'return decfunc(%(signature)s)',

/usr/local/lib/python3.7/site-packages/mne/io/base.py in concatenate_raws(raws, preload, events_list, verbose)
   2389         first, last = zip(*[(r.first_samp, r.last_samp) for r in raws])
   2390         events = concatenate_events(events_list, first, last)
-> 2391     raws[0].append(raws[1:], preload)
   2392 
   2393     if events_list is None:

/usr/local/lib/python3.7/site-packages/mne/io/base.py in append(self, raws, preload)
   1861         all_raws = [self]
   1862         all_raws += raws
-> 1863         _check_raw_compatibility(all_raws)
   1864 
   1865         # deal with preloading data first (while files are separate)

/usr/local/lib/python3.7/site-packages/mne/io/base.py in _check_raw_compatibility(raw)
   2342             raise ValueError('raw[%d][\'info\'][\'ch_names\'] must match' % ri)
   2343         if not all(raw[ri]._cals == raw[0]._cals):
-> 2344             raise ValueError('raw[%d]._cals must match' % ri)
   2345         if len(raw[0].info['projs']) != len(raw[ri].info['projs']):
   2346             raise ValueError('SSP projectors in raw files must be the same')

ValueError: raw[261]._cals must match

Not sure what's going on there, but you might want to iteratively exclude all data sets that give errors.

agramfort commented 5 years ago

cals have nothing to do with sample freq. But it suggests the files have been acquired with different parameters

TanTingyi commented 5 years ago

drop subject 88 89 92 100

Bardiafeiz commented 3 years ago

This error can also occur when channels of two subjects are different. I had the same problem so first I picked certain channels from both of them with pick_channels([channels_name]) (for doing this you need to load raw data to memory first) and then concatenated them.

cbrnr commented 3 years ago

Right, you can only concatenate compatible datasets (i.e. matching channel names, channel properties, sampling frequencies). This is intended behavior, so I'm closing this issue.

mne-tools / mne-python

Cannot concatenate individual BCI 2000 dataset files #6539

THIS IS FULL ERROR MESSAGE,THANKS.

ValueError: raw[261]['info']['sfreq'] must match