Matching SQI Dataframe Window to Clinical Time of “Event” for Multiple Records

Matching SQI Dataframe Window to Clinical Time of “Event” for Multiple Records

Importing Libraries

12
13
14
15
16
17
 from datetime import date
 import pandas as pd
 import numpy as np
 from datetime import datetime
 import matplotlib.pyplot as plt
 import os

Loading SQI Data and clinical data

 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
 filename_SQIs = r'..\..\..\..\OUCRU\Outputs\Complete_SQIs.csv'
 filename_Clinical = r'..\..\..\..\OUCRU\Clinical\v0.0.10\01nva_data_stacked_corrected.csv'
 study_no_path = r'..\..\..\..\OUCRU\01NVa_Dengue\Adults'


 #taking a list of records
 study_no_list = os.listdir(study_no_path) # Need to take the last 8 characters i.e. study_no_list[i][-8:]

 #reading .csv into dataframes
 Clinical = pd.read_csv(filename_Clinical)
 SQIs = pd.read_csv(filename_SQIs)

 SQIs['PPG_w_s'] = pd.to_datetime(SQIs['PPG_w_s'])
 SQIs['PPG_w_f'] = pd.to_datetime(SQIs['PPG_w_f'])


 TERMINAL = True

 #Showing Data
 if TERMINAL:
     print("\n Clincial Data:")
     print(Clinical)
     print("\n SQI Data:")
     print(SQIs)

 def match_clinical_to_SQIs(Clinical, SQIs, event, study_no_list):
     SQIs[event]=np.nan #forming an empty column for the event
     print("\n EVENT: ", event)
     #iterating through the study_no
     for i in range(len(study_no_list)):
         print("\n STUDY-NO: ", study_no_list[i])
         #finding the rows for which the following logic is satisfied
         event_row = Clinical[(((Clinical.column == event) & (Clinical.result == 'TRUE')) | (Clinical.result == event)) & (Clinical.study_no == study_no_list[i][-8:])]
         #iterating through the event row for indexing
         event_row['date'] = pd.to_datetime(event_row['date'])
         if event == 'shock_admission' and not event_row.empty:
             SQIs['shock_admission'][SQIs.study_no == study_no_list[i][-8:]] = True #Showing whether a record indicates a shock on admission
         else:
             for j in range(len(event_row)):
                 valid_SQI_rows = SQIs[(SQIs.PPG_w_s <= event_row.date[event_row.date.index[j]]) & (SQIs.PPG_w_f > event_row.date[event_row.date.index[j]]) & (SQIs.study_no == study_no_list[i][-8:])]
                 SQIs[event][valid_SQI_rows.index] = True #Setting the value to True for the window corresponding to an event
                 if valid_SQI_rows.empty:
                     ##If the date is equal to the date of an event, we can raise True for the whole day, in case no time is present (or it's out of the recording)
                     SQIs[event][(SQIs.PPG_w_s.dt.date == event_row.date[event_row.date.index[j]].date) & (SQIs.PPG_w_f.dt.date == event_row.date[event_row.date.index[j]].date) & (SQIs.study_no == study_no_list[i][-8:])]
     return SQIs

 '''
 NOTE:
 We previously had functions that fetched ppg start and
 calculated relative times but since we included the PPG Start times
 in the SQI file, that is no longer needed.
 '''

 #Calling the matching function by first defining the event
 event_lookup = 'event_shock'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs, event_lookup, study_no_list)

 event_lookup = 'reshock24'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'diagnosis_admission' #all diagnosis admission are for shock so we can use this as is
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'shock_admission'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)
 #In the second iteration we call the function with the output of the first one, so that we can form
 #multiple columns of events
 event_lookup = 'ascites'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'respiratory_distress'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'ventilation_cannula'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'ventilation_mechanical'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'ventilation_ncpap'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'bleeding_severe'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'cns_abnormal'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'liver_mild'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'pleural_effusion'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)

 event_lookup = 'skidney'
 SQIs_with_Clinical = match_clinical_to_SQIs(Clinical, SQIs_with_Clinical, event_lookup, study_no_list)



 if TERMINAL:
     print('\n SQIs with Clinical Event Match:')
     print(SQIs_with_Clinical)


 #Saving output to CSV
 SQIs_with_Clinical.to_csv(r'..\..\..\..\OUCRU\Outputs\Complete_SQIs_with_Clinical.csv')


 if TERMINAL:
     #example of the output
     fig, axs = plt.subplots(nrows=2, sharex=True, figsize=(8, 10))
     img = plt.imread(r'..\..\..\..\MISC\SQI_Clinical_Image_example_2.png')
     axs[0].set_title('Example a Event Alignment with SQIs in Excel')
     axs[0].imshow(img)
     img2 = plt.imread(r'..\..\..\..\MISC\SQI_Clinical_Image_example_3.png')
     axs[1].imshow(img2)
     plt.show()
Example a Event Alignment with SQIs in Excel

Out:

 Clincial Data:
       Unnamed: 0  study_no              date        column         result unit result_old  date_old
0               0  003-1001  11/06/2020 13:30  hypertension           TRUE  NaN        NaN       NaN
1               1  003-1002  17/06/2020 11:10  hypertension          FALSE  NaN        NaN       NaN
2               2  003-1003  18/06/2020 07:30  hypertension          FALSE  NaN        NaN       NaN
3               3  003-1004  22/06/2020 12:45  hypertension          FALSE  NaN        NaN       NaN
4               4  003-1005  02/07/2020 15:11  hypertension          FALSE  NaN        NaN       NaN
...           ...       ...               ...           ...            ...  ...        ...       ...
42060       42060  003-2231  05/12/2020 00:00       outcome  Full recovery  NaN        NaN       NaN
42061       42061  003-2232  19/12/2020 00:00       outcome  Full recovery  NaN        NaN       NaN
42062       42062  003-2233  02/01/2021 00:00       outcome  Full recovery  NaN        NaN       NaN
42063       42063  003-2234  08/01/2021 00:00       outcome  Full recovery  NaN        NaN       NaN
42064       42064  003-2235  18/01/2021 00:00       outcome  Full recovery  NaN        NaN       NaN

[42065 rows x 8 columns]

 SQI Data:
                    timedelta                 PPG_w_s                 PPG_w_f    first  ...     w  study_no  study_no_rec  keep
0      0 days 00:05:00.010000 2020-07-28 16:04:20.104 2020-07-28 16:04:50.094    29913  ...     0  003-2009             0  True
1      0 days 00:05:30.010000 2020-07-28 16:04:50.104 2020-07-28 16:05:20.094    32913  ...     1  003-2009             0  True
2      0 days 00:06:00.010000 2020-07-28 16:05:20.104 2020-07-28 16:05:50.094    35913  ...     2  003-2009             0  True
3      0 days 00:06:30.010000 2020-07-28 16:05:50.104 2020-07-28 16:06:20.094    38913  ...     3  003-2009             0  True
4      0 days 00:07:00.010000 2020-07-28 16:06:20.104 2020-07-28 16:06:50.094    41913  ...     4  003-2009             0  True
...                       ...                     ...                     ...      ...  ...   ...       ...           ...   ...
12698  0 days 15:03:30.010000 2020-07-21 05:08:37.203 2020-07-21 05:09:07.193  5420828  ...  1797  003-2162             0  True
12699  0 days 15:04:00.010000 2020-07-21 05:09:07.203 2020-07-21 05:09:37.193  5423828  ...  1798  003-2162             0  True
12700  0 days 15:04:30.010000 2020-07-21 05:09:37.203 2020-07-21 05:10:07.193  5426828  ...  1799  003-2162             0  True
12701  0 days 15:05:00.010000 2020-07-21 05:10:07.203 2020-07-21 05:10:37.193  5429828  ...  1800  003-2162             0  True
12702  0 days 15:05:30.010000 2020-07-21 05:10:37.203 2020-07-21 05:10:52.883  5432828  ...  1801  003-2162             0  True

[12703 rows x 27 columns]

 EVENT:  event_shock

 STUDY-NO:  01NVa-003-2009
D:\FILES\Desktop\Dissertation ICL\Git\main\examples\Pre-processing\plot_SQI_and_clinical_matching_multiple_patients.py:56: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

D:\FILES\Desktop\Dissertation ICL\Git\main\examples\Pre-processing\plot_SQI_and_clinical_matching_multiple_patients.py:62: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  reshock24

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  diagnosis_admission

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  shock_admission

 STUDY-NO:  01NVa-003-2009
D:\FILES\Desktop\Dissertation ICL\Git\main\examples\Pre-processing\plot_SQI_and_clinical_matching_multiple_patients.py:58: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  ascites

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  respiratory_distress

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  ventilation_cannula

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  ventilation_mechanical

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  ventilation_ncpap

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  bleeding_severe

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  cns_abnormal

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  liver_mild

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  pleural_effusion

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 EVENT:  skidney

 STUDY-NO:  01NVa-003-2009

 STUDY-NO:  01NVa-003-2012

 STUDY-NO:  01NVa-003-2023

 STUDY-NO:  01NVa-003-2028

 STUDY-NO:  01NVa-003-2103

 STUDY-NO:  01NVa-003-2104

 STUDY-NO:  01NVa-003-2109

 STUDY-NO:  01NVa-003-2110

 STUDY-NO:  01NVa-003-2162

 SQIs with Clinical Event Match:
                    timedelta                 PPG_w_s                 PPG_w_f    first  ...  cns_abnormal  liver_mild  pleural_effusion  skidney
0      0 days 00:05:00.010000 2020-07-28 16:04:20.104 2020-07-28 16:04:50.094    29913  ...           NaN         NaN               NaN      NaN
1      0 days 00:05:30.010000 2020-07-28 16:04:50.104 2020-07-28 16:05:20.094    32913  ...           NaN         NaN               NaN      NaN
2      0 days 00:06:00.010000 2020-07-28 16:05:20.104 2020-07-28 16:05:50.094    35913  ...           NaN         NaN               NaN      NaN
3      0 days 00:06:30.010000 2020-07-28 16:05:50.104 2020-07-28 16:06:20.094    38913  ...           NaN         NaN               NaN      NaN
4      0 days 00:07:00.010000 2020-07-28 16:06:20.104 2020-07-28 16:06:50.094    41913  ...           NaN         NaN               NaN      NaN
...                       ...                     ...                     ...      ...  ...           ...         ...               ...      ...
12698  0 days 15:03:30.010000 2020-07-21 05:08:37.203 2020-07-21 05:09:07.193  5420828  ...           NaN         NaN               NaN      NaN
12699  0 days 15:04:00.010000 2020-07-21 05:09:07.203 2020-07-21 05:09:37.193  5423828  ...           NaN         NaN               NaN      NaN
12700  0 days 15:04:30.010000 2020-07-21 05:09:37.203 2020-07-21 05:10:07.193  5426828  ...           NaN         NaN               NaN      NaN
12701  0 days 15:05:00.010000 2020-07-21 05:10:07.203 2020-07-21 05:10:37.193  5429828  ...           NaN         NaN               NaN      NaN
12702  0 days 15:05:30.010000 2020-07-21 05:10:37.203 2020-07-21 05:10:52.883  5432828  ...           NaN         NaN               NaN      NaN

[12703 rows x 41 columns]

Total running time of the script: ( 0 minutes 4.307 seconds)

Gallery generated by Sphinx-Gallery