Scan order check update

kcho commented 1 year ago

The Scan order table in the QQC report checks the series order compared to the template dataset. This table is created by qqc.qqc.dicom.check_order_of_series.

You can try using the function by running the lines below in python

export PYTHONPATH=/data/predict1/home/kcho/software/qqc:${PYTHONPATH}
ipython

from pathlib import Path
from qqc.dicom_files import get_dicom_files_walk
from qqc.qqc.json import jsons_from_bids_to_df
from qqc.qqc.dicom import check_order_of_series

input_dir = Path('/data/predict1/data_from_nda/MRI_ROOT/sourcedata/SI03540/ses-XXXXXXX')
df_full_input = get_dicom_files_walk(input_dir, quick_scan=True)

template_dir = Path('/data/predict1/data_from_nda/MRI_ROOT/rawdata/sub-SI00726/ses-XXXXXX')
df_full_std = jsons_from_bids_to_df(template_dir).drop_duplicates()

order_check_df = check_order_of_series(df_full_input, df_full_std)

order_check_df

However, when a series gets repeated due to an issue in the initial scan, the order of series will be shifted by one and will not match the standard template anymore.

input_dir2 = Path('/data/predict1/data_from_nda/MRI_ROOT/sourcedata/SI07530/ses-2023XXXXX')
df_full_input2 = get_dicom_files_walk(input_dir2, quick_scan=True)

order_check_df2 = check_order_of_series(df_full_input2, df_full_std)

order_check_df2

        series_num   series_order_target          series_order series_num_target order_diff
Summary                                                                                Fail
0              1.0             Localizer             Localizer               1.0       Pass
1              2.0              AAHScout              AAHScout               2.0       Pass
2              3.0      AAHScout_MPR_sag      AAHScout_MPR_sag               3.0       Pass
3              4.0      AAHScout_MPR_cor      AAHScout_MPR_cor               4.0       Pass
4              5.0      AAHScout_MPR_tra      AAHScout_MPR_tra               5.0       Pass
5              6.0     Localizer_aligned     Localizer_aligned               6.0       Pass
6              7.0      DistortionMap_AP      DistortionMap_AP               7.0       Pass
7              8.0      DistortionMap_PA      DistortionMap_PA               8.0       Pass
8              9.0            T1w_MPR_ND            T1w_MPR_ND               9.0       Pass
9             10.0               T1w_MPR               T1w_MPR              10.0       Pass
10            11.0            T2w_SPC_ND            T2w_SPC_ND              11.0       Pass
11            12.0               T2w_SPC               T2w_SPC              12.0       Pass
12            13.0      DistortionMap_AP      DistortionMap_AP              13.0       Pass
13            15.0      DistortionMap_PA      DistortionMap_PA              14.0       Pass
14            17.0   rfMRI_REST_AP_SBRef   rfMRI_REST_AP_SBRef              15.0       Pass
15            19.0         rfMRI_REST_AP   rfMRI_REST_AP_SBRef              16.0       Fail
16            21.0   rfMRI_REST_PA_SBRef         rfMRI_REST_AP              17.0       Fail
17            23.0         rfMRI_REST_PA   rfMRI_REST_PA_SBRef              18.0       Fail
18            25.0      dMRI_b0_AP_SBRef         rfMRI_REST_PA              19.0       Fail
19            27.0            dMRI_b0_AP      dMRI_b0_AP_SBRef              20.0       Fail
20            29.0  dMRI_dir176_PA_SBRef            dMRI_b0_AP              21.0       Fail
21            31.0        dMRI_dir176_PA  dMRI_dir176_PA_SBRef              22.0       Fail
22            33.0      dMRI_b0_AP_SBRef        dMRI_dir176_PA              25.0       Fail
23            35.0            dMRI_b0_AP      dMRI_b0_AP_SBRef              26.0       Fail
24            37.0      DistortionMap_AP            dMRI_b0_AP              27.0       Fail
25            39.0      DistortionMap_PA      DistortionMap_AP              28.0       Fail
26            41.0   rfMRI_REST_AP_SBRef      DistortionMap_PA              29.0       Fail
27            43.0         rfMRI_REST_AP   rfMRI_REST_AP_SBRef              30.0       Fail
28            45.0   rfMRI_REST_PA_SBRef         rfMRI_REST_AP              31.0       Fail
29            47.0         rfMRI_REST_PA   rfMRI_REST_PA_SBRef              32.0       Fail
30            49.0                   NaN         rfMRI_REST_PA               NaN       Fail

We need a function that

takes in order_check_df (pd.DataFrame)
and replaces Fail into Pass if just extra series were added to series_order column.
maybe map out which series occurs before each series in the series_order_target column, and check if this mapping applies to each row in the series_order column?

nickckim commented 1 year ago


def consecutive_duplicates(df):

    # Boolean mask of 'series_order' consecutive duplicates
    consecutive_duplicates = df['series_order'].eq(df['series_order'].shift())

    # If there are any 'series_order' consecutive duplicates
    if consecutive_duplicates.any():

        # Indicate which 'series_order' have consecutive duplicates in new 'temporary' column
        df['temporary'] = consecutive_duplicates

        # Iterate through rows of dataframe
        for row in df.iloc[1:].itertuples(): # .iloc[1:] required because first row contains float
            if row.temporary: # If 'series_order' consecutive duplicate indicated in 'temporary'
                # Insert 'Consecutive duplicate detected' cell at index
                df['series_order_target'] = pd.concat([
                    df['series_order_target'].iloc[:row.Index],    # Values up to `row.Index`
                    pd.Series(['Consecutive duplicate detected']), # New Series object with message
                    df['series_order_target'].iloc[row.Index:]     # Values beyond `row.Index`
                ]).reset_index(drop=True)  

        # Drop 'temporary' column
        df.drop('temporary', axis=1, inplace=True)

        # Iterate through rows of dataframe
        for row in df.iloc[1:].itertuples():  # .iloc[1:] required because first row contains float
            df.at[row.Index, 'order_diff'] = row.series_order_target == row.series_order # Update 'order_diff'

    return df```

nickckim commented 1 year ago

In the above code, if consecutive duplicate series are detected in 'series_order', a message cell will be added to 'series_order_target' at the index of the consecutive duplicate series (realigning series below it to the standard template), and 'order_diff' will be updated.

nickckim commented 1 year ago

@kcho Please let me know if you have any thoughts. I will hold off now for integrating this patch into the repository, as you know best where it should go.

kcho commented 1 year ago

@nickckim Great work. Could you creat a new branch and add this function to qqc/qqc/dicom.py? Your function could be place right before line 246 to take in series_order_df_all as input and return updated series_order_df_all. I'll test your function in the new branch once you create the PR for this.

nickckim commented 1 year ago

Done @kcho

nickckim commented 6 months ago

Bug due to modifying df during iteration. For some subjects with several consecutive duplicates, the df gets messy.

nickckim commented 6 months ago

I found this revised code from last year that was never pushed. I believe it resolves the issue but it still needs to be tested. If I recall correctly, I left this code in a comment last year, but I cannot find the comment.

def consecutive_duplicates(df):                                                 
    """                                                                         
    Ignore scan order fails caused by unexpected consecutive duplicates         
    (consecutive duplicate in series_order that is not in series_order_target). 
    """                                                                         

    # Drop summary row                                     
    # df.drop('Summary', inplace=True)                                          

    # Drop summary row            
    df.drop(0, inplace=True)                                                    

    # Reset index                                                           
    df = df.reset_index(drop=True)                                              

    # If df contains unexpected consecutive duplicate,                          
    # update series_order_target column to fix offset,    
    # and recalculate order_diff column                       
    if (                                                                        
        (df["order_diff"] == "Fail")                                            
        & (df["series_order"].shift(1) == df["series_order"])                   
        & (df["series_order"] != df["series_order_target"])                     
    ).any():                                                                    

        # List to update series_order_target column                             
        updated_series_order_target = []                                        

        for index, row in df.iterrows():                                        

            # If row contains unexpected consecutive duplicate,                 
            # append message and series_order_target to list                    
            if (                                                                
                row["order_diff"] == "Fail"                                     
                and row["series_order"] == df.iloc[index - 1]["series_order"]   
                and row["series_order"] != row["series_order_target"]           
            ):                                                                  
                updated_series_order_target.append(                             
                    "Unexpected consecutive duplicate"                          
                )                                                               
                updated_series_order_target.append(row["series_order_target"])  

            # If row does not contain unexpected consecutive duplicate,         
            # append series_order_target to list                                
            else:                                                               
                updated_series_order_target.append(row["series_order_target"])  

        # Remove trailing NaNs from list                                        
        updated_series_order_target = [                                         
            x for x in updated_series_order_target if not pd.isna(x)            
        ]                                                                       

        # Update series_order_target column                                     
        df = df.assign(series_order_target=updated_series_order_target)         

        # Recalculate order_diff column                                              
        df["order_diff"] = ""                                                   
        df.loc[                                                                 
            df["series_order_target"] == df["series_order"], "order_diff"       
        ] = "Pass"                                                              
        df.loc[                                                                 
            df["series_order_target"] == "Unexpected consecutive duplicate",    
            "order_diff",                                                       
        ] = "Warning"                                                           
        df.loc[                                                                 
            (df["series_order_target"] != df["series_order"])                   
            & (                                                                 
                df["series_order_target"] != "Unexpected consecutive duplicate" 
            ),                                                                  
            "order_diff",                                                       
        ] = "Fail"      

        # Return updated df
        return df                                                        

    # If df does not contain unexpected consecutive duplicate, return df        
    else:                                                                       
        return df

AMP-SCZ / qqc

Scan order check update #35