@@ -108,8 +108,6 @@ function run() {
108108 ->setLicense ("http://creativecommons.org/licenses/by-nc-sa/3.0/ " )
109109 ->setDataset ("http://identifiers.org/sider.effect/ " );
110110
111- if ($ file == "label_mapping " ) $ source_file ->setLicense ("http://creativecommons.org/publicdomain/zero/1.0/ " );
112-
113111 $ prefix = parent ::getPrefix ();
114112 $ bVersion = parent ::getParameterValue ('bio2rdf_release ' );
115113 $ date = parent ::getDate (filemtime ($ odir .$ ofile ));
@@ -141,131 +139,8 @@ function run() {
141139
142140 //reset graph URI to default value
143141 parent ::setGraphURI ($ graph_uri );
144-
145142 }
146- /*
147- 1 & 2: generic and brand names
148-
149- 3: a marker if the drug could be successfully mapped to STITCH. Possible values:
150- - [empty field]: success
151- - combination: two or more drugs were combined
152- - not found: could not find the name in the database
153- - mapping conflict: the available names point to two different compounds
154- - template: a package insert that contains information for a group of related drugs
155-
156- 4 & 5: STITCH compound ids, based on PubChem. Salt forms and stereo-isomers have been merged.
157- Column 4: "flat compound", i.e. stereo-isomers have been merged into one compound
158- Column 5: stereo-specific compound id
159-
160- To get the PubChem Compound Ids: take absolute value, for flat compounds ids: subtract 100000000
161- E.g. aspirin: -100002244 --> 2244
162-
163- 6: URL of the downloaded PDF. This column is empty for FDA SPL labels, which are available in XML.
164- Unfortunately, many links have become stale since the labels were downloaded in 2009.
165-
166- 7: label identifier
167- */
168- function label_mapping ()
169- {
170- parent ::setCheckpoint ('file ' );
171-
172- $ declared = null ;
173- while ($ l = parent ::getReadFile ()->Read (1000000 )) {
174- parent ::setCheckpoint ('record ' );
175-
176- $ a = explode ("\t" ,$ l );
177- $ id = parent ::getNamespace ().urlencode (trim ($ a [6 ]));
178-
179- $ gnames_list = explode ("; " ,strtolower (trim ($ a [1 ])));
180- array_unique ($ gnames_list );
181- asort ($ gnames_list );
182- $ gnames = implode (" + " ,$ gnames_list );
183- if ($ a [2 ] == "combination " ) {
184- $ label = "combination: $ gnames " ;
185- $ type = "Combination-Drug " ;
186- } else {
187- if ($ a [0 ]) $ label .= $ a [0 ]." ( " .$ gnames .") " ;
188- else $ label = $ gnames ;
189- $ type = "Drug " ;
190- }
191-
192- parent ::addRDF (
193- parent ::describeIndividual ($ id , $ label , parent ::getVoc ().$ type ).
194- parent ::describeClass (parent ::getVoc ().$ type ,"SIDER " .$ type )
195- );
196-
197- // attempt to extract the spl id
198- $ b = explode ("_ " ,trim ($ a [6 ]));
199- if (isset ($ b [1 ])) {
200- $ c = explode ("- " ,$ b [1 ]);
201- if (count ($ c ) == 5 ) {
202- // possibly an SPL id
203- parent ::addRDF (parent ::triplify ($ id ,parent ::getVoc ()."x-spl " ,"dailymed: " .$ b [1 ]));
204- }
205- }
206-
207- if (trim ($ a [0 ])) {
208- $ brand_label = strtolower (trim ($ a [0 ]));
209- $ brand_qname = parent ::getRes ().md5 ($ brand_label );
210- parent ::addRDF (
211- parent ::describeIndividual ($ brand_qname , $ brand_label , parent ::getVoc ()."Brand-Drug-Name " ).
212- parent ::describeClass (parent ::getVoc ()."Brand-Drug-Name " ,"Brand Drug Name " ).
213- parent ::triplify ($ id , parent ::getVoc ()."brand-name " , $ brand_qname )
214- );
215- }
216- if (trim ($ a [1 ])) {
217- foreach ($ gnames_list AS $ generic_name ) {
218- $ generic_label = trim ($ generic_name );
219- $ generic_qname = parent ::getRes ().md5 ($ generic_label );
220- parent ::addRDF (
221- parent ::describeIndividual ($ generic_qname , $ generic_label , parent ::getVoc ()."Generic-Drug-Name " ).
222- parent ::describeClass (parent ::getVoc ()."Generic-Drug-Name " ,"Generic Drug Name " ).
223- parent ::triplify ($ id , parent ::getVoc ()."generic-name " , $ generic_qname )
224- );
225- }
226- }
227-
228- if ($ a [2 ]){
229- $ mapping_result = str_replace (" " ,"- " ,$ a [2 ]);
230- parent ::addRDF (
231- parent ::triplify ($ id , parent ::getVoc ()."mapping-result " , parent ::getVoc ().$ mapping_result )
232- );
233- }
234-
235- if ($ a [3 ]){
236- parent ::addRDF (
237- parent ::triplify ($ id , parent ::getVoc ()."stitch-flat-compound-id " , "stitch: " .$ a [3 ])
238- );
239-
240- $ pubchemcompound = $ this ->GetPCFromFlat ($ a [3 ]);
241- parent ::addRDF (
242- parent ::triplify ($ id , parent ::getVoc ()."pubchem-flat-compound-id " , "pubchemcompound: " .$ pubchemcompound )
243- );
244- }
245-
246- if ($ a [4 ]){
247- parent ::addRDF (
248- parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo-compound-id " , "stitch: " .$ a [4 ])
249- );
250- $ pubchemcompound = $ this ->GetPCFromStereo ($ a [4 ]);
251- parent ::addRDF (
252- parent ::triplify ($ id , parent ::getVoc ()."pubchem-stereo-compound-id " , "pubchemcompound: " .$ pubchemcompound )
253- );
254- }
255143
256- if ($ a [5 ]){
257- $ url = str_replace (" " ,"+ " ,$ a [5 ]);
258- parent ::addRDF (
259- parent ::QQuadO_URL ($ id , parent ::getVoc ()."pdf-url " , $ url )
260- );
261- }
262-
263- parent ::setCheckpoint ('record ' );
264-
265- }
266- parent ::setCheckpoint ('file ' );
267- }
268-
269144 function GetPCFromFlat ($ id )
270145 {
271146 return ltrim (abs ($ id )-100000000 , "0 " );
@@ -313,12 +188,32 @@ function se()
313188 );
314189 $ declared [$ cui ] = '' ;
315190 }
316-
191+ if (!isset ($ declared [$ stitch_flat ])) {
192+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_flat ,4 ), "0 " );
193+ $ stereo_id = "pubchem.compound: " .ltrim ( substr ($ stitch_stereo ,4 ), "0 " );
194+ parent ::addRDF (
195+ parent ::triplify ($ stitch_flat , "rdf:type " , parent ::getVoc ()."Flat-Compound " ).
196+ parent ::describeClass (parent ::getVoc ()."Flat-Compound " , "Flat compound " ).
197+ parent ::triplify ($ stitch_flat , parent ::getVoc ()."x-pubchem.compound " , $ pubchem_id ).
198+ parent ::triplify ($ stitch_flat , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
199+ );
200+ $ declared [$ stitch_flat ] = '' ;
201+ }
202+ if (!isset ($ declared [$ stitch_stereo ])) {
203+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_stereo ,4 ), "0 " );
204+ parent ::addRDF (
205+ parent ::triplify ($ stitch_stereo , "rdf:type " , parent ::getVoc ()."Stereo-Compound " ).
206+ parent ::describeClass (parent ::getVoc ()."Stereo-Compound " , "Stereo compound " ).
207+ parent ::triplify ($ stitch_stereo , parent ::getVoc ()."x-pubchem.compound " , $ pubchem_id ).
208+ parent ::triplify ($ stitch_stereo , parent ::getVoc ()."stitch-flat " , $ stitch_flat )
209+ );
210+ $ declared [$ stitch_stereo ] = '' ;
211+ }
212+
317213 parent ::addRDF (
318- parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label side effect " , parent ::getVoc ()."Drug-Side-Effect " ).
319- parent ::triplify ($ id , parent ::getVoc ()."side-effect " , $ cui ).
320- parent ::triplify ($ id , parent ::getVoc ()."stitch-flat " , $ stitch_flat ).
321- parent ::triplify ($ id , parent ::getVoc ()."stitch-stereo " , $ stitch_stereo )
214+ parent ::describeIndividual ($ id , "$ stitch_flat $ cui_label effect " , parent ::getVoc ()."Drug-Effect-Association " ).
215+ parent ::triplify ($ id , parent ::getVoc ()."effect " , $ cui ).
216+ parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat )
322217 );
323218 parent ::setCheckpoint ('record ' );
324219 }
@@ -343,7 +238,6 @@ function indications()
343238 $ list [$ id ] = '' ;
344239 }
345240
346-
347241 $ stitch_id = "stitch: $ stitch_flat " ;
348242 $ meddra_id = "meddra: $ cui " ;
349243
@@ -353,6 +247,15 @@ function indications()
353247 );
354248 $ declared [$ cui ] = '' ;
355249 }
250+ if (!isset ($ declared [$ stitch_flat ])) {
251+ $ pubchem_id = "pubchem.compound: " .ltrim ( substr ($ stitch_flat ,4 ), "0 " );
252+ parent ::addRDF (
253+ parent ::triplify ($ stitch_id , "rdf:type " , parent ::getVoc ()."Flat-Compound " ).
254+ parent ::describeClass (parent ::getVoc ()."Flat-Compound " , "STITCH Flat compound " ).
255+ parent ::triplify ($ stitch_id , parent ::getVoc ()."x-flat-pubchem.compound " , $ pubchem_id )
256+ );
257+ $ declared [$ stitch_flat ] = '' ;
258+ }
356259
357260 parent ::addRDF (
358261 parent ::describeIndividual ($ id , $ stitch_id ." - " .$ meddra_id ." indication " , parent ::getVoc ()."Drug-Indication-Association " ).
@@ -394,103 +297,74 @@ function freq()
394297 $ i = 1 ;
395298 parent ::setCheckpoint ('file ' );
396299 while ($ l = parent ::getReadFile ()->read ()) {
397- parent ::setCheckpoint ('record ' );
398300 $ a = explode ("\t" ,str_replace ("% " ,"" ,$ l ));
399301 if (count ($ a ) != $ cols ) {
400302 trigger_error ("Expecting $ cols, but found " .count ($ a )." instead... skipping file! " , E_USER_ERROR );
401303 return false ;
402304 }
403- list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label );
404- $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
405- $ label = "side effect frequency of $ meddra_concept_label for $ stitch_id " ;
305+ list ($ stitch_flat , $ stitch_stereo , $ cui , $ placebo , $ freq , $ freq_lower , $ freq_upper , $ concept_type , $ meddra_concept_id , $ meddra_concept_label ) = $ a ;
306+ if ($ concept_type == "LLT " ) continue ;
307+ $ meddra_concept_label = trim ($ meddra_concept_label );
308+
309+ $ id = "stitch_resource: " .md5 ("se_freq " .$ l );
310+ $ stitch_flat = "stitch: $ stitch_flat " ;
311+ $ label = "$ meddra_concept_label frequency for $ stitch_flat " ;
406312 parent ::addRDF (
407313 parent ::describeIndividual ($ id , $ label , parent ::getVoc ()."Drug-Effect-Frequency " ).
408314 parent ::describeClass (parent ::getVoc ()."Drug-Effect-Frequency " ,"SIDER Drug-Effect and Frequency " ).
409315 parent ::triplify ($ id , parent ::getVoc ()."drug " , $ stitch_flat ).
410316 parent ::triplify ($ id , parent ::getVoc ()."effect " , "meddra: " .$ meddra_concept_id )
411317 );
412318
413- if ($ a [ 5 ] ){
319+ if ($ placebo ){
414320 parent ::addRDF (
415321 parent ::triplifyString ($ id , parent ::getVoc ()."placebo " , "true " , "xsd:boolean " )
416322 );
417323 }
418324
419- $ number = false ;
420- if (is_numeric ($ freq )) {
421- $ flabel = $ freq ."% " ;
422- $ ftype_label = "Exact-Frequency " ;
423- $ ftype = parent ::getVoc ().$ ftype_label ;
424- $ number = true ;
425- } else {
426- $ flabel = $ freq ;
427- $ ftype_label = "Qualitative-Frequency " ;
428- $ ftype = parent ::getVoc ()."$ ftype_label;
429- }
430- if( $ freq_lower != $ freq_upper) {
431- $ flabel .= " ($ freq_lower -$ freq_upper )";
432- $ ftype_label = "Range-Frequency " ;
433- $ ftype = parent ::getVoc ().$ ftype_label ;
434- }
325+ $ number = false ;
326+ if (is_numeric ($ freq )) {
327+ $ flabel = $ freq ."% " ;
328+ $ ftype_label = "Exact-Frequency " ;
329+ $ ftype = parent ::getVoc ().$ ftype_label ;
330+ $ number = true ;
331+ } else {
332+ $ flabel = $ freq ;
333+ $ ftype_label = "Qualitative-Frequency " ;
334+ $ ftype = parent ::getVoc ()."$ ftype_label" ;
335+ }
336+ if ($ freq_lower != $ freq_upper ) {
337+ $ flabel .= "( $ freq_lower- $ freq_upper) " ;
338+ $ ftype_label = "Range-Frequency " ;
339+ $ ftype = parent ::getVoc ().$ ftype_label ;
340+ }
435341
342+ $ fid = $ id .md5 ($ a [5 ].$ a [6 ].$ a [8 ]);
343+ parent ::addRDF (
344+ parent ::triplify ($ id ,parent ::getVoc ()."frequency " ,$ fid ).
345+ parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
346+ parent ::describeClass ($ ftype , $ ftype_label )
347+ );
348+
349+ if ($ number == true ) {
436350 parent ::addRDF (
437- parent ::triplify ($ id ,parent ::getVoc ()."AQualitative-Frequency " ,$ fid ).
438- parent ::describeIndividual ($ fid ,$ flabel ,$ ftype ).
439- parent ::describeClass ($ ftype , $ ftype_label )
351+ parent ::triplifyString ($ fid , parent ::getVoc ()."frequency-value " , $ freq /100 )
440352 );
441-
442- if ($ number == true ) {
443- parent ::addRDF (
444- parent ::triplifyString ($ fid , parent ::getVoc ()."frequency " , $ a [6 ]/100 )
445- );
446- } else {
447- parent ::addRDF (
448- parent ::triplifyString ($ fid , parent ::getVoc ()."frequency " , $ a [6 ])
449- );
450- }
451- // if($a[7] != $a[8]){
452- parent ::addRDF (
453- parent ::triplifyString ($ fid , parent ::getVoc ()."lower-frequency " , $ a [7 ]).
454- parent ::triplifyString ($ fid , parent ::getVoc ()."upper-frequency " , $ a [8 ])
455- );
456- // }
457-
458- $ meddra_id = "umls: $ a [10 ]" ;
459- $ label = "" ;
460- if (trim ($ a [11 ])) $ label = strtolower (trim ($ a [11 ]));
461- $ rel = "preferred-term " ;
462- if ($ a [9 ] != "LLT " ) $ rel = "lower-level-term " ;
463-
353+ } else {
464354 parent ::addRDF (
465- parent ::triplify ($ fid , parent ::getVoc ().$ rel , $ meddra_id ).
466- parent ::describeClass ($ meddra_id ,$ label )
355+ parent ::triplifyString ($ fid , parent ::getVoc ()."frequency-value " , $ freq )
467356 );
357+ }
358+ parent ::addRDF (
359+ parent ::triplifyString ($ fid , parent ::getVoc ()."lower-frequency " , sprintf ("%.3f " ,$ freq_lower )).
360+ parent ::triplifyString ($ fid , parent ::getVoc ()."upper-frequency " , sprintf ("%.3f " ,$ freq_upper ))
361+ );
468362
469363 parent ::setCheckpoint ('record ' );
470364 }
471365 parent ::setCheckpoint ('file ' );
472366
473367 }
474-
475- /*
476- meddra_adverse_effects.tsv.gz
477- -----------------------------
478368
479- 1 & 2: STITCH compound ids (flat/stereo, see above)
480- 3: UMLS concept id as it was found on the label
481- 4: drug name
482- 5: side effect name
483- 6: MedDRA concept type (LLT = lowest level term, PT = preferred term)
484- 7: UMLS concept id for MedDRA term
485- 8: MedDRA side effect name
486-
487- All side effects found on the labels are given as LLT. Additionally, the PT is shown. There is at least one
488- PT for every side effect, but sometimes the PT is the same as the LLT.
489- */
490- // @TODO
491- function meddra_adverse_effects ()
492- {
493-
494- }
495369}
496370?>
0 commit comments