compomics / ThermoRawFileParser

Thermo RAW file parser that runs on Linux/Mac and all other platforms that support Mono
Apache License 2.0
189 stars 50 forks source link

Orbitrap recognized in `referenceableParamGroupList` but not in `instrumentConfigurationList` ? #177

Closed elliotfontaine closed 6 months ago

elliotfontaine commented 9 months ago

This is the file produced by ThermoRawFileParser with default parameters:

<?xml version="1.0" encoding="utf-8"?>
<mzML xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" version="1.1.0" id="060623_std_30sec_CarboAmmo_10mM_70-20-10_01" xmlns="http://psi.hupo.org/ms/mzml">
  <cvList count="2">
    <cv id="MS" fullName="Mass spectrometry ontology" version="4.1.79" URI="https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo" />
    <cv id="UO" fullName="Unit Ontology" version="09:04:2014" URI="https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo" />
  </cvList>
  <fileDescription>
    <fileContent>
      <cvParam cvRef="MS" accession="MS:1000579" value="" name="MS1 spectrum" />
      <cvParam cvRef="MS" accession="MS:1000810" value="" name="ion current chromatogram" />
    </fileContent>
    <sourceFileList count="1">
      <sourceFile id="RAW1" name="060623_std_30sec_CarboAmmo_10mM_70-20-10_01" location="file:////Users/elliotfontaine/raws/060623_std_30sec_CarboAmmo_10mM_70-20-10_01.raw">
        <cvParam cvRef="MS" accession="MS:1000768" value="" name="Thermo nativeID format" />
        <cvParam cvRef="MS" accession="MS:1000563" value="" name="Thermo RAW format" />
        <cvParam cvRef="MS" accession="MS:1000569" value="2aa446994a290815bb32f4194999d78e62777488" name="SHA-1" />
      </sourceFile>
    </sourceFileList>
  </fileDescription>
  <referenceableParamGroupList count="1">
    <referenceableParamGroup id="commonInstrumentParams">
      <cvParam cvRef="MS" accession="MS:1001742" value="" name="LTQ Orbitrap Velos" />
      <cvParam cvRef="MS" accession="MS:1000529" value="SN03173B" name="instrument serial number" />
    </referenceableParamGroup>
  </referenceableParamGroupList>
  <softwareList count="1">
    <software id="ThermoRawFileParser" version="1.4.3">
      <cvParam cvRef="MS" accession="MS:1003145" name="ThermoRawFileParser" />
    </software>
  </softwareList>
  <instrumentConfigurationList count="1">
    <instrumentConfiguration id="IC1">
      <referenceableParamGroupRef ref="commonInstrumentParams" />
      <componentList count="3">
        <source order="1">
          <cvParam cvRef="MS" accession="MS:1000073" value="" name="electrospray ionization" />
        </source>
        <analyzer order="2">
          <cvParam cvRef="MS" accession="MS:1000079" value="" name="fourier transform ion cyclotron resonance mass spectrometer" />
        </analyzer>
        <detector order="3">
          <cvParam cvRef="MS" accession="MS:1000624" value="" name="inductive detector" />
        </detector>
      </componentList>
    </instrumentConfiguration>
  </instrumentConfigurationList>
  <dataProcessingList count="1">
    <dataProcessing id="ThermoRawFileParserProcessing">
      <processingMethod order="0" softwareRef="ThermoRawFileParser">
        <cvParam cvRef="MS" accession="MS:1000544" value="" name="Conversion to mzML" />
      </processingMethod>
      <processingMethod order="1" softwareRef="ThermoRawFileParser">
        <cvParam cvRef="MS" accession="MS:1000035" value="" name="peak picking" />
      </processingMethod>
    </dataProcessing>
  </dataProcessingList>
  <run id="_060623_std_30sec_CarboAmmo_10mM_70-20-10_01" defaultInstrumentConfigurationRef="IC1" startTimeStamp="2023-06-21T17:30:53.439Z" defaultSourceFileRef="RAW1">
  </run>
</mzML>

I don't think the analyser is a "fourier transform ion cyclotron resonance mass spectrometer". This is the one produced by MSConvert for comparison.

<?xml version="1.0" encoding="utf-8"?>
<indexedmzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.2_idx.xsd">
  <mzML xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd" id="060623_std_30sec_CarboAmmo_10mM_70-20-10_01" version="1.1.0">
    <cvList count="2">
      <cv id="MS" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" version="4.1.56" URI="https://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo"/>
      <cv id="UO" fullName="Unit Ontology" version="09:04:2014" URI="https://raw.githubusercontent.com/bio-ontology-research-group/unit-ontology/master/unit.obo"/>
    </cvList>
    <fileDescription>
      <fileContent>
        <cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value=""/>
      </fileContent>
      <sourceFileList count="1">
        <sourceFile id="RAW1" name="060623_std_30sec_CarboAmmo_10mM_70-20-10_01.raw" location="file:///T:\redacted\rawdata\230612_Opti-Timepoints">
          <cvParam cvRef="MS" accession="MS:1000768" name="Thermo nativeID format" value=""/>
          <cvParam cvRef="MS" accession="MS:1000563" name="Thermo RAW format" value=""/>
          <cvParam cvRef="MS" accession="MS:1000569" name="SHA-1" value="2aa446994a290815bb32f4194999d78e62777488"/>
        </sourceFile>
      </sourceFileList>
    </fileDescription>
    <referenceableParamGroupList count="1">
      <referenceableParamGroup id="CommonInstrumentParams">
        <cvParam cvRef="MS" accession="MS:1001742" name="LTQ Orbitrap Velos" value=""/>
        <cvParam cvRef="MS" accession="MS:1000529" name="instrument serial number" value="SN03173B"/>
      </referenceableParamGroup>
    </referenceableParamGroupList>
    <sampleList count="1">
      <sample id="_x0031_" name="">
        <cvParam cvRef="MS" accession="MS:1000002" name="sample name" value="1"/>
      </sample>
    </sampleList>
    <softwareList count="2">
      <software id="Xcalibur" version="2.7.0 SP1">
        <cvParam cvRef="MS" accession="MS:1000532" name="Xcalibur" value=""/>
      </software>
      <software id="pwiz" version="3.0.22129">
        <cvParam cvRef="MS" accession="MS:1000615" name="ProteoWizard software" value=""/>
      </software>
    </softwareList>
    <instrumentConfigurationList count="2">
      <instrumentConfiguration id="IC1">
        <referenceableParamGroupRef ref="CommonInstrumentParams"/>
        <componentList count="3">
          <source order="1">
            <cvParam cvRef="MS" accession="MS:1000073" name="electrospray ionization" value=""/>
            <cvParam cvRef="MS" accession="MS:1000057" name="electrospray inlet" value=""/>
          </source>
          <analyzer order="2">
            <cvParam cvRef="MS" accession="MS:1000484" name="orbitrap" value=""/>
          </analyzer>
          <detector order="3">
            <cvParam cvRef="MS" accession="MS:1000624" name="inductive detector" value=""/>
          </detector>
        </componentList>
        <softwareRef ref="Xcalibur"/>
      </instrumentConfiguration>
      <instrumentConfiguration id="IC2">
        <referenceableParamGroupRef ref="CommonInstrumentParams"/>
        <componentList count="3">
          <source order="1">
            <cvParam cvRef="MS" accession="MS:1000073" name="electrospray ionization" value=""/>
            <cvParam cvRef="MS" accession="MS:1000057" name="electrospray inlet" value=""/>
          </source>
          <analyzer order="2">
            <cvParam cvRef="MS" accession="MS:1000083" name="radial ejection linear ion trap" value=""/>
          </analyzer>
          <detector order="3">
            <cvParam cvRef="MS" accession="MS:1000253" name="electron multiplier" value=""/>
          </detector>
        </componentList>
        <softwareRef ref="Xcalibur"/>
      </instrumentConfiguration>
    </instrumentConfigurationList>
    <dataProcessingList count="1">
      <dataProcessing id="pwiz_Reader_Thermo_conversion">
        <processingMethod order="0" softwareRef="pwiz">
          <cvParam cvRef="MS" accession="MS:1000544" name="Conversion to mzML" value=""/>
        </processingMethod>
        <processingMethod order="1" softwareRef="pwiz">
          <cvParam cvRef="MS" accession="MS:1000035" name="peak picking" value=""/>
          <userParam name="Thermo/Xcalibur peak picking"/>
        </processingMethod>
      </dataProcessing>
    </dataProcessingList>
    <run id="_x0030_60623_std_30sec_CarboAmmo_10mM_70-20-10_01" defaultInstrumentConfigurationRef="IC1" startTimeStamp="2023-06-21T15:30:53Z" defaultSourceFileRef="RAW1">
    </run>
  </mzML>
  <indexList count="2">
  </indexList>
  <indexListOffset>522043</indexListOffset>
  <fileChecksum>822dbcbf725d7759dcd0026dd7d0a6e9061d1d77</fileChecksum>
</indexedmzML>
caetera commented 9 months ago

Hi @elliotfontaine, thank you for using ThermoRawFileParser. Internally RAW files use the same analyzer type (FTMS) for all FT-enabled mass data (FTICR and Orbitrap in that regard). The <referenceableParamGroup> element contains the instrument model and serial number - these are saved in the RAW file, elements of <componentList> are populated based on the actual scan types used in a particular RAW file. Currently, TRFP assigns MS:1000079 (i.e. FTICR analyzer) for all FTMS scan types. It is possible to deduce if FTMS means FTICR or Orbitrap, based on the instrument model (basically, using a dictionary of all instruments). This feature can be implemented in the next release.