@@ -39,28 +39,17 @@ function __construct($argv) {
3939 parent ::__construct ($ argv , "sider " );
4040
4141 // set and print application parameters
42- parent ::addParameter ('files ' ,true ,'all|label_mapping|adverse_effects_raw|indications_raw|meddra_freq_parsed ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
42+ parent ::addParameter ('files ' ,true ,'all|indications|se|freq ' ,'all ' ,'all or comma-separated list of ontology short names to process ' );
4343 parent ::addParameter ('download_url ' ,false ,null ,'http://sideeffects.embl.de/media/download/ ' );
4444
4545 parent ::initialize ();
4646 }
4747
4848 function run () {
49-
50- if (parent ::getParameterValue ('download ' ) === true )
51- {
52- $ this ->download ();
53- }
54- if (parent ::getParameterValue ('process ' ) === true )
55- {
56- $ this ->process ();
57- }
58-
59- }
60-
61- function download (){
6249 $ idir = parent ::getParameterValue ('indir ' );
50+ $ odir = parent ::getParameterValue ('outdir ' );
6351 $ files = parent ::getParameterValue ('files ' );
52+ $ dataset_description = '' ;
6453
6554 if ($ files == 'all ' ) {
6655 $ files = explode ('| ' , parent ::getParameterList ('files ' ));
@@ -70,8 +59,11 @@ function download(){
7059 }
7160
7261 foreach ($ files AS $ file ) {
73- $ lfile = $ idir .$ file .'.tsv.gz ' ;
74- $ rfile = parent ::getParameterValue ('download_url ' ).$ file .'.tsv.gz ' ;
62+ $ f = $ file ;
63+ if ($ file != "freq " ) $ f = "all_ " .$ file ;
64+ $ f = "meddra_ " .$ f .".tsv.gz " ;
65+ $ lfile = $ idir .$ f ;
66+ $ rfile = parent ::getParameterValue ('download_url ' ).$ f ;
7567 if (!file_exists ($ lfile ) || parent ::getParameterValue ('download ' ) == 'true ' ) {
7668 echo "downloading $ file... " ;
7769 $ ret = file_get_contents ($ rfile );
@@ -86,43 +78,15 @@ function download(){
8678 }
8779 echo "done! " .PHP_EOL ;
8880 }
89- }//foreach
90- }
91-
92- function process (){
93-
94- $ idir = parent ::getParameterValue ('indir ' );
95- $ odir = parent ::getParameterValue ('outdir ' );
96- $ files = parent ::getParameterValue ('files ' );
97-
98- if ($ files == 'all ' ) {
99- $ files = explode ('| ' , parent ::getParameterList ('files ' ));
100- array_shift ($ files );
101- } else {
102- $ files = explode (', ' , parent ::getParameterValue ('files ' ));
103- }
104-
105- parent ::setCheckpoint ('dataset ' );
106-
107- $ dataset_description = '' ;
108-
109- $ graph_uri = parent ::getGraphURI ();
110- if (parent ::getParameterValue ('dataset_graph ' ) == true ) parent ::setGraphURI (parent ::getDatasetURI ());
11181
112- foreach ($ files AS $ file ) {
113- $ lfile = $ idir .$ file .'.tsv.gz ' ;
114- $ rfile = parent ::getParameterValue ('download_url ' ).$ file .'.tsv.gz ' ;
115-
116- echo "Processing $ file... " ;
82+ echo "Processing $ f... " ;
11783 parent ::setReadFile ($ lfile ,true );
11884
11985 $ suffix = parent ::getParameterValue ('output_format ' );
12086 $ ofile = "sider- " .$ file .'. ' .$ suffix ;
12187 $ gz = false ;
12288
123- if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) {
124- $ gz = true ;
125- }
89+ if (strstr (parent ::getParameterValue ('output_format ' ), "gz " )) $ gz = true ;
12690
12791 parent ::setWriteFile ($ odir .$ ofile , $ gz );
12892 $ this ->$ file ();
@@ -320,43 +284,83 @@ function GetPCFromStereo($id)
320284 Format: label identifier, concept id, name of side effect (as found on the label)
321285 */
322286
323- function adverse_effects_raw ()
287+ function se ()
324288 {
325289 $ declared = null ;
326290
327291 parent ::setCheckpoint ('file ' );
328- while ($ l = $ this ->GetReadFile ()->Read ()) {
292+ while ($ l = $ this ->getReadFile ()->Read ()) {
329293 $ a = explode ("\t" ,$ l );
330- $ id = "sider: " .urlencode ($ a [0 ]);
331- $ cui = "umls: " .$ a [1 ];
332- $ cui_label = strtolower (trim ($ a [2 ]));
294+ if (count ($ a ) != 6 ) {
295+ trigger_error ("Expecting 6 columns, found " .count ($ a )." instead. " , E_USER_ERROR );
296+ exit ;
297+ }
298+ $ stitch_flat = "stitch: " .$ a [0 ];
299+ $ stitch_stereo = "stitch: " .$ a [1 ];
300+ $ cui = "umls: " .$ a [2 ];
301+ $ term_type = $ a [3 ];
302+ $ term_type_cui = $ a [4 ];
303+ $ term_type_label = $ a [5 ];
304+
305+ if ($ term_type == 'LLT ' ) continue ;
306+
307+ $ id = "sider: " .md5 ("se " .$ stitch_flat .$ cui );
308+
309+ $ cui_label = strtolower (trim ($ term_type_label ));
310+ if (!isset ($ declared [$ cui ])) {
311+ parent ::addRDF (
312+ parent ::describeClass ($ cui , $ cui_label )
313+ );
314+ $ declared [$ cui ] = '' ;
315+ }
316+
333317 parent ::addRDF (
334- parent ::describeClass ($ cui , $ cui_label ).
335- parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui )
318+ parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label side effect " , parent ::getVoc ()."Drug-Side-Effect " ).
319+ parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui ).
320+ parent ::triplify ($ id , parent ::getVoc ()."stitch-flat " , $ stitch_flat ).
321+ parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
336322 );
337323 parent ::setCheckpoint ('record ' );
338324 }
325+
339326 parent ::setCheckpoint ('file ' );
340327 }
341328
342- function indications_raw ()
329+ function indications ()
343330 {
344331 $ declared = null ;
345-
332+ $ list = null ;
346333 parent ::setCheckpoint ('file ' );
347- while ($ l = $ this ->GetReadFile ()->Read ()) {
334+ while ($ l = $ this ->getReadFile ()->Read ()) {
348335 parent ::setCheckpoint ('record ' );
349336
350337 $ a = explode ("\t" ,$ l );
351- $ id = "sider: " .urlencode ($ a [0 ]);
352- $ cui = "umls: " .$ a [1 ];
353- $ cui_label = strtolower (trim ($ a [2 ]));
338+ list ($ stitch_flat ,$ cui ,$ provenance ,$ cui_label ,$ term_type ,$ term_cui ,$ term_cui_label ) = $ a ;
339+ $ id = "sider: " .md5 ("i " .$ stitch_flat .$ cui );
340+
341+ if ($ term_type == "LLT " or isset ($ list [$ id ])) continue ;
342+ if (!isset ($ list [$ id ])) {
343+ $ list [$ id ] = '' ;
344+ }
345+
346+
347+ $ stitch_id = "stitch: $ stitch_flat " ;
348+ $ meddra_id = "meddra: $ cui " ;
349+
350+ if (!isset ($ declared [$ cui ])) {
351+ parent ::addRDF (
352+ parent ::describeClass ($ meddra_id , $ cui_label )
353+ );
354+ $ declared [$ cui ] = '' ;
355+ }
354356
355357 parent ::addRDF (
356- parent ::describeClass ($ cui , $ cui_label ).
357- parent ::triplify ($ id , parent ::getVoc ()."indication " , $ cui )
358+ parent ::describeIndividual ($ id , $ stitch_id ." - " .$ meddra_id ." indication " , parent ::getVoc ()."Drug-Indication-Association " ).
359+ parent ::describeClass (parent ::getVoc ()."Drug-Indication-Association " ,"Drug-Disease Association " ).
360+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_id ).
361+ parent ::triplify ($ id , parent ::getVoc ()."indication " , $ meddra_id ).
362+ parent ::triplifyString ($ id , parent ::getVoc ()."provenance " , $ provenance )
358363 );
359- parent ::setCheckpoint ('record ' );
360364
361365 }
362366 parent ::setCheckpoint ('file ' );
@@ -384,30 +388,26 @@ function indications_raw()
384388matches the upper bound. Due to the nature of the data, there can be more than one frequency for the same label,
385389e.g. from different clinical trials or for different levels of severeness.
386390*/
387- function meddra_freq_parsed ()
391+ function freq ()
388392 {
389- $ cols = 12 ;
393+ $ cols = 10 ;
390394 $ i = 1 ;
391395 parent ::setCheckpoint ('file ' );
392396 while ($ l = parent ::getReadFile ()->read ()) {
393397 parent ::setCheckpoint ('record ' );
394-
395398 $ a = explode ("\t" ,str_replace ("% " ,"" ,$ l ));
396399 if (count ($ a ) != $ cols ) {
397- trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " );
400+ trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " , E_USER_ERROR );
398401 return false ;
399402 }
400- $ label = $ a [2 ];
401- $ label_id = parent ::getNamespace ().urlencode ($ label );
402- $ effect_id = "umls: " .$ a [3 ];
403-
404- $ id = parent ::getRes ().md5 ($ a [2 ].$ a [3 ].$ a [6 ]);
405- $ label = "$ a [4 ] in $ label $ a [2 ]" ;
403+ list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label );
404+ $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
405+ $ label = "side effect frequency of $ meddra_concept_label for $ stitch_id " ;
406406 parent ::addRDF (
407- parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect " ).
408- parent ::describeClass (parent ::getVoc ()."Drug-Effect " ,"SIDER Drug-Effect " ).
409- parent ::triplify ($ id , parent ::getVoc ()."drug " , $ label_id ).
410- parent ::triplify ($ id , parent ::getVoc ()."effect " , $ effect_id )
407+ parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect-Frequency " ).
408+ parent ::describeClass (parent ::getVoc ()."Drug-Effect-Frequency " ,"SIDER Drug-Effect and Frequency " ).
409+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat ).
410+ parent ::triplify ($ id , parent ::getVoc ()."effect " , " meddra: " . $ meddra_concept_id )
411411 );
412412
413413 if ($ a [5 ]){
@@ -416,25 +416,25 @@ function meddra_freq_parsed()
416416 );
417417 }
418418
419- $ fid = $ id .md5 ($ a [5 ].$ a [6 ].$ a [7 ].$ a [8 ]);
420- // $fid = $id.($i++);
421- $ flabel = $ a [6 ];
422- $ ftype = parent ::getVoc ().ucfirst ($ a [6 ])."-Frequency " ;
423419 $ number = false ;
424- if (is_numeric ($ a [ 6 ] )) {
425- $ flabel = $ a [ 6 ] ."% " ;
426- $ ftype_label = "Specified -Frequency " ;
420+ if (is_numeric ($ freq )) {
421+ $ flabel = $ freq ."% " ;
422+ $ ftype_label = "Exact -Frequency " ;
427423 $ ftype = parent ::getVoc ().$ ftype_label ;
428424 $ number = true ;
425+ } else {
426+ $ flabel = $ freq ;
427+ $ ftype_label = "Qualitative-Frequency " ;
428+ $ ftype = parent ::getVoc ()."$ ftype_label;
429429 }
430- if ($ a [ 7 ] != $ a [ 8 ] ) {
431- $ flabel .= "( $ a [ 7 ] - $ a [ 8 ] ) " ;
430+ if( $ freq_lower != $ freq_upper ) {
431+ $ flabel .= " ($ freq_lower - $ freq_upper )";
432432 $ ftype_label = "Range-Frequency " ;
433433 $ ftype = parent ::getVoc ().$ ftype_label ;
434- }
434+ }
435435
436436 parent ::addRDF (
437- parent ::triplify ($ id ,parent ::getVoc ()."reported-frequency " ,$ fid ).
437+ parent ::triplify ($ id ,parent ::getVoc ()."AQualitative-Frequency " ,$ fid ).
438438 parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
439439 parent ::describeClass ($ ftype , $ ftype_label )
440440 );
0 commit comments