/*************************************************************************** * File: G:\CTRHS\eMERGE_II\PROGRAMMING\Programs\eM2_Phenotypes\CAAD\SAS\CAAD_Process_NLP_Data.sas * Auth: David Carrell * Date: 09/05/2014 2:05:44 PM * Task: Read into SAS data generated by the CAAD NLP system and saved as a SQL Server * database table, connecting from SAS to the SQL database via a SAS libname statement, * then summarize NLP RESULTS to identify patients qualifying as CAAD cases or * CAAD controls based on measured stenosis levels as described in the pseudo code * document posted on PheKB. * * Note: The SAS libname used here requires that SAS has an OLEDB driver installed, that * the database is able to accept login credentials via integrated security, that * the database is Microsoft SQL Server, that the server name is ghriNLP, and that * the database name on the server is EMERGE. Modify the libname as needed for your * database and environment. * * * I/O: In: sqldbase.CAAD_Rad_Rpts_Results * * * Out: mylib.CAAD_Rad_Rpts_Results * mylib.CAAD_cases_stenosis_ge_50 * mylib.CAAD_cases_stenosis_16_to_49 * mylib.CAAD_conrols_stenosis_le_15 * mylib.CAAD_cases_and_controls (identifies cases/controls via stenosis) * ****************************************************************************/ ** Following are login scripts unique to the user and will need to be changed. ; ** ; options nosource2 ; %include '\\home\carrds1\sas\NewDwLogin.sas' ; %include "//ghrisas/warehouse/remote/remotestart.sas" ; options source2 ; run ; options msglevel=i pageno=1 nocenter noovp symbolgen source2 errabend nosqlremerge formchar='|-++++++++++=|-/|<>*' ; run; ** Following are SAS libnames and will need to be changed to point to your data source ** and storage area. ; ** ; libname mylib '\\groups\data\CTRHS\eMERGE_II\PROGRAMMING\Programs\eM2_Phenotypes\CAAD\Data' ; libname sqldbase OLEDB provider=SQLOLEDB datasource='ghriNLP' properties=("Integrated Security"=SSPI "Initial Catalog"=EMERGE) DBMAX_TEXT=32767 ; ** First read the data produced by the CAAD NLP system ** from the SQL database into a SAS data set. ** ; proc contents data=sqldbase.CAAD_Rad_Rpts_Results varnum ; run ; data mylib.CAAD_Rad_Rpts_Results ; set sqldbase.CAAD_Rad_Rpts_Results ; run ; ** CAAD Cases by stenosis: >=50 percent stenosis rule: ** ** Now process the CAAD NLP data to identify any patients who ** qualify as CAAD cases according to the rule for measured stenosis ** greater than or equal to 50 percent. ; ** ; proc sql; create table mylib.CAAD_cases_stenosis_ge_50 as select min('CASE (measured stenosis >=50%)') as Subject_Assignment , min(SourceName) as Patient_ID , min(input(custom0, 5.)) as Min_low_stenosis , max(input(custom0, 5.)) as Max_low_stenosis , min(input(custom1, 5.)) as Min_hi_stenosis , max(input(custom1, 5.)) as Max_hi_stenosis , min(datepart(SourceDate)) as Min_stenosis_date format = date9. , max(datepart(SourceDate)) as Max_stenosis_date format = date9. from mylib.CAAD_Rad_Rpts_Results group by SourceName having min(input(custom0, 5.)) >= 50 order by SourceName ; quit; run ; ** CAAD Cases by stenosis: >=16% and <=49 percent stenosis rule: ** ** Now process the CAAD NLP data to identify any patients who ** qualify as CAAD cases according to the rule for measured stenosis ** greater than or equal to 15 percent and less than or equal to 49 percent. ; ** ; proc sql; create table mylib.CAAD_cases_stenosis_16_to_49 as select min('CASE (measured stenosis 16-49%)') as Subject_Assignment , min(SourceName) as Patient_ID , min(input(custom0, 5.)) as Min_low_stenosis , max(input(custom0, 5.)) as Max_low_stenosis , min(input(custom1, 5.)) as Min_hi_stenosis , max(input(custom1, 5.)) as Max_hi_stenosis , min(datepart(SourceDate)) as Min_stenosis_date format = date9. , max(datepart(SourceDate)) as Max_stenosis_date format = date9. from mylib.CAAD_Rad_Rpts_Results group by SourceName having min(input(custom0, 5.)) >= 16 AND max(input(custom1, 5.)) <= 49 order by SourceName ; quit; run ; ** CAAD Controls by stenosis: <=15 percent stenosis rule: ** ** Now process the CAAD NLP data to identify any patients who ** qualify as CAAD controls according to the rule for maximum ** measured stenosis being less than or equal to 15 percent. ; ** ; proc sql; create table mylib.CAAD_conrols_stenosis_le_15 as select min('CONTROL (maximum stenosis <=15%)') as Subject_Assignment , min(SourceName) as Patient_ID , min(input(custom0, 5.)) as Min_low_stenosis , max(input(custom0, 5.)) as Max_low_stenosis , min(input(custom1, 5.)) as Min_hi_stenosis , max(input(custom1, 5.)) as Max_hi_stenosis , min(datepart(SourceDate)) as Min_stenosis_date format = date9. , max(datepart(SourceDate)) as Max_stenosis_date format = date9. from mylib.CAAD_Rad_Rpts_Results group by SourceName having min(input(custom0, 5.)) <= 15 AND max(input(custom0, 5.)) <= 15 AND min(input(custom1, 5.)) <= 15 AND max(input(custom1, 5.)) <= 15 order by SourceName ; quit; run ; ** Now combine the three data files created above, one per range of ** stenosis values (two of which are for cases and one of which is for ** controls. ; ** ; proc sql ; create table mylib.CAAD_cases_and_controls as select * from mylib.CAAD_cases_stenosis_ge_50 union select * from mylib.CAAD_cases_stenosis_16_to_49 union select * from mylib.CAAD_conrols_stenosis_le_15 order by Subject_Assignment , Patient_ID ; quit ; run ; ** Check for problems, which would be indicated if ** any Patient_ID appeard in >1 group of stenosis ** readings. ; ** ; proc sql ; create table mylib.check_should_be_all_ones as select Patient_ID , count(*) as N_Records from mylib.CAAD_cases_and_controls group by Patient_ID ; quit ; run ; proc freq data=mylib.check_should_be_all_ones ; table N_Records ; run ; proc freq data=mylib.CAAD_cases_and_controls ; table Subject_Assignment ; run ; proc contents data=mylib.CAAD_Rad_Rpts_Results varnum ; run ; proc contents data=mylib.CAAD_cases_stenosis_ge_50 varnum ; run ; proc contents varnum data=mylib.CAAD_cases_stenosis_16_to_49 ; run ; proc contents data=mylib.CAAD_conrols_stenosis_le_15 varnum ; run ; proc contents data=mylib.CAAD_cases_and_controls varnum ; run ; endrsubmit ; endsas ;