1 from obitools.utils import skipWhiteLineIterator,multiLineWrapper
2 from obitools.utils import universalOpen
3 from obitools.format.genericparser import genericEntryIteratorGenerator
4 from logging import debug,warning
5
6 import re
7
8
9
10
11
12
13
19
20 _oboEntryIterator = genericEntryIteratorGenerator(endEntry='^ *$',
21 strip=True)
22
24 '''
25 Iterator of stanza. The stanza are the basic units of OBO files.
26
27 @param inputfile: a stream of strings from an opened OBO file.
28 @type inputfile: a stream of strings
29
30 @return: a stream of stanza
31 @rtype: a stream of aggregated strings
32
33 @note: The iterator constructs stanza by aggregate strings from the
34 OBO file.
35 '''
36 inputfile = universalOpen(inputfile)
37 inputfile = multiLineWrapper(inputfile)
38 return _oboEntryIterator(inputfile)
39
40
41
42
43
44
45
46
48 '''
49 A class object which inherits from the class dict. Trailing modifiers can be found
50 at the end of TaggedValue objects when they exist.
51 '''
52
53 _match_brace = re.compile('(?<=\ {)[^\]]*(\}) *( !|$)')
54
56
57
58 trailing_modifiers = TrailingModifier._match_brace.search(string)
59
60
61 if trailing_modifiers:
62 trailing_modifiers=trailing_modifiers.group(0).strip()
63 print trailing_modifiers
64
65 dict.__init__(self,(x.strip().split('=',1) for x in trailing_modifiers.split(',')))
66
67
69 '''
70 Dispatcher of trailing modifiers.
71
72 @param string: a string from a TaggedValue object with a trailing modifiers signal.
73 @type string: string
74
75 @return: a class object
76
77 @note: The dispatcher is currently very simple. Only one case is treated by the function.
78 `the function returns a class object inherited from the class dict if the trailing modifiers
79 exist, None if they don't.
80 '''
81
82 trailing_modifiers = TrailingModifier(string)
83 if not trailing_modifiers:
84 trailing_modifiers=None
85 return trailing_modifiers
86
87
88
89
90
91
92
94 '''
95 A couple 'tag:value' of an OBOEntry.
96 '''
97
98 _match_value = re.compile('(("(\\\\"|[^\"])*")|(\\\\"|[^\"]))*?( !| {|$)')
99 _split_comment = re.compile('^!| !')
100 _match_quotedString = re.compile('(?<=")(\\\\"|[^\"])*(?=")')
101 _match_bracket = re.compile('\[[^\]]*\]')
102
104 '''
105 Constructor of the class TaggedValue.
106
107 @param line: a line of an OBOEntry composed of a tag and a value.
108 @type line: string
109
110 @note: The constructor separates tags from right terms. 'value' is extracted
111 from right terms using a regular expression (value is at the beginning of the
112 string, between quotes or not). Then, 'comment' is extracted from the rest of the
113 string using another regular expression ('comment' is at the end of the string
114 after a '!'. By default, 'comment' is set to None). Finally, 'trailing_modifiers'
115 are extracted from the last string using another regular expression.
116 The tag, the value, the comment and the trailing_modifiers are saved.
117 '''
118
119 debug("tagValueParser : %s" % line)
120
121
122 trailing_modifiers = None
123 comment = None
124
125
126 tag,rigth = line.split(':',1)
127
128
129 value = TaggedValue._match_value.search(rigth).group(0)
130 debug("Extracted value : %s" % value)
131
132
133 if value and value[-1] in '!{':
134 lvalue = len(value)
135
136 extra = rigth[lvalue-1:].strip()
137
138 extra =TaggedValue._split_comment.split(extra,1)
139
140 if len(extra)==2:
141 comment=extra[1].strip()
142
143 extra=extra[0]
144 trailing_modifiers = trailingModifierFactory(extra)
145
146 value = value[0:-1]
147
148 if tag=='use_term':
149 tag='consider'
150 raise DeprecationWarning,"user_term is a deprecated tag, you should instead use consider"
151
152
153 self.value =value.strip()
154 self.tag = tag
155 self.__doc__=comment
156 self.trailing_modifiers=trailing_modifiers
157
159 return str(self.value)
160
162 return '''"""%s"""''' % str(self)
163
164
166 '''
167 A couple 'name:value' inherited from the class TaggedValue. Used to manage name tags.
168 '''
169
171
172
173 tag,rigth = line.split(':',1)
174
175
176 self.value = rigth.strip()
177 self.tag = 'name'
178 self.__doc__=None
179 self.trailing_modifiers=None
180
181
182
184 '''
185 A couple 'def:value' inherited from the class TaggedValue. Used to manage def tags.
186 '''
187
189 '''
190 Constructor of the class DefValue.
191
192 @param line: a line of an OBOEntry composed of a tag named 'def' and a value.
193 @type line: string
194
195 @note: The constructor calls the TaggedValue constructor. A regular expression
196 is used to extract the 'definition' from TaggedValue.value (definition is a not
197 quoted TaggedValue.value). A regular expression is used to extract 'dbxrefs'
198 from the aggedValue.value without the definition (dbxrefs are between brackets
199 and definition can be so). Definition is saved as the new value of the DefValue.
200 dbxrefs are saved.
201 '''
202
203
204 TaggedValue.__init__(self, line)
205
206
207 definition = TaggedValue._match_quotedString.search(self.value).group(0)
208
209
210 cleanvalue = self.value.replace(definition,'')
211 cleanvalue = cleanvalue.replace(' ',' ')
212
213
214 dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0)
215
216
217 self.tag = 'def'
218
219 self.value=definition
220 self.dbxrefs=xrefFactory(dbxrefs)
221
222
224 '''
225 A couple 'synonym:value' inherited from the class TaggedValue. Used to manage
226 synonym tags, exact_synonym tags, broad_synonym tags and narrow_synonym tags.
227 '''
228
229 _match_scope = re.compile('(?<="")[^\[]*(?=\[|$)')
230
232 '''
233 Constructor of the class SynonymValue.
234
235 @param line: a line of an OBOEntry composed of a tag named 'synonym' or
236 'exact_synonym' or 'broad_synonym' or 'narrow_synonym' and a value.
237 @type line: string
238
239 @note: SynonymValue is composed of a tag, a value, a scope, a list of types and
240 dbxrefs.
241 The constructor calls the TaggedValue constructor. A regular expression
242 is used to extract 'definition' from TaggedValue.value (definition is a not
243 quoted TaggedValue.value). Definition is saved as the new value of the class
244 SynonymValue.
245 A regular expression is used to extract 'attributes' from the rest of the
246 string. Attributes may contain an optional synonym scope and an optional list
247 of synonym types. The scope is extracted from attributes or set by default to
248 'RELATED'. It is saved as the scope of the class. The types are the rest of the
249 attributes and are saved as the list of types of the class.
250 For deprecated tags 'exact_synonym', 'broad_synonym' and 'narrow_synonym', tag
251 is set to 'synonym' and scope is set respectively to 'EXACT', 'BROAD' and 'NARROW'.
252 A regular expression is used to extract 'dbxrefs' from the TaggedValue.value
253 without the definition (dbxrefs are between brackets and definition can be so).
254 dbxrefs are saved.
255 '''
256
257
258 TaggedValue.__init__(self, line)
259
260
261 definition = TaggedValue._match_quotedString.search(self.value).group(0)
262
263
264 cleanvalue = self.value.replace(definition,'')
265 cleanvalue = cleanvalue.replace(' ',' ')
266
267
268
269
270
271 attributes = set(SynonymValue._match_scope.search(cleanvalue).group(0).strip().split())
272
273
274 scopes = attributes & set(['RELATED','EXACT','BROAD','NARROW'])
275
276
277 types = attributes - scopes
278
279
280 assert len(scopes)< 2,"Only one synonym scope allowed"
281
282
283 if scopes:
284 scope = scopes.pop()
285 else:
286 scope = 'RELATED'
287
288
289 if self.tag == 'exact_synonym':
290 raise DeprecationWarning,'exact_synonym is a deprecated tag use instead synonym tag'
291 self.tag = 'synonym'
292 scope = 'EXACT'
293
294 if self.tag == 'broad_synonym':
295 raise DeprecationWarning,'broad_synonym is a deprecated tag use instead synonym tag'
296 self.tag = 'synonym'
297 scope = 'BROAD'
298
299 if self.tag == 'narrow_synonym':
300 raise DeprecationWarning,'narrow_synonym is a deprecated tag use instead synonym tag'
301 self.tag = 'synonym'
302 scope = 'NARROW'
303
304 if self.tag == 'systematic_synonym':
305
306 self.tag = 'synonym'
307 scope = 'SYSTEMATIC'
308
309
310 assert self.tag =='synonym',"%s synonym type is not managed" % self.tag
311
312
313 dbxrefs = TaggedValue._match_bracket.search(cleanvalue).group(0)
314
315
316
317 self.value = definition
318 self.dbxrefs = xrefFactory(dbxrefs)
319 self.scope = scope
320 self.types = list(types)
321
323 return ((self.value==b.value) and (self.dbxrefs==b.dbxrefs)
324 and (self.scope==b.scope) and (self.types==b.types)
325 and (self.__doc__==b.__doc__) and (self.tag==b.tag)
326 and (self.trailing_modifiers==b.trailing_modifiers))
327
329 return (reduce(lambda x,y:x+y,(hash(z) for z in [self.__doc__,
330 self.value,
331 frozenset(self.dbxrefs),
332 self.scope,
333 frozenset(self.types),
334 self.tag,
335 self.trailing_modifiers]),0)) % (2**31)
336
337
339 '''
340 A couple 'xref:value' inherited from the class TaggedValue. Used to manage
341 xref tags.
342 '''
343
345
346
347 TaggedValue.__init__(self, line)
348
349
350 self.value=xrefFactory(self.value)
351
352 if self.tag in ('xref_analog','xref_unk'):
353 raise DeprecationWarning,'%s is a deprecated tag use instead sysnonym tag' % self.tag
354 self.tag='xref'
355
356
357 assert self.tag=='xref'
358
359
361 '''
362 A couple 'xref:value' inherited from the class TaggedValue. Used to manage
363 xref tags.
364 '''
365
386
387
391
400
401
403 '''
404 A function used to dispatch lines of an OBOEntry between the class TaggedValue
405 and its inherited classes.
406
407 @param line: a line of an OBOEntry composed of a tag and a value.
408 @type line: string
409
410 @return: a class object
411 '''
412
413 if (line[0:9]=='namespace' or
414 line[0:17]=='default-namespace'):
415 return NamespaceValue(line)
416
417 elif line[0:3]=='def':
418 return DefValue(line)
419
420 elif ((line[0:7]=="synonym" and line[0:14]!="synonymtypedef") or
421 line[0:13]=="exact_synonym" or
422 line[0:13]=="broad_synonym" or
423 line[0:14]=="narrow_synonym"):
424 return SynonymValue(line)
425
426 elif line[0:4]=='xref':
427 return XrefValue(line)
428
429 elif line[0:4]=='name':
430 return NameValue(line)
431
432 elif (line[0:15]=='intersection_of' or
433 line[0:8] =='union_of' or
434 line[0:12]=='relationship'):
435 return RelationshipValue(line)
436 elif (line[0:6]=='remark'):
437 return RemarkValue(line)
438
439 else:
440 return TaggedValue(line)
441
442
443
444
445
446
447
448
450 '''
451 A xref object of an OBOentry. It may be the 'dbxrefs' of SynonymValue and
452 DefValue objects or the 'value' of XrefValue objects.
453 '''
454
455 __splitdata__ = re.compile(' +(?=["{])')
456
476
478 return ((self.reference==b.reference) and (self.description==b.description)
479 and (self.trailing_modifiers==b.trailing_modifiers))
480
482 return (reduce(lambda x,y:x+y,(hash(z) for z in [self.reference,
483 self.description,
484 self.trailing_modifiers]),0)) % (2**31)
485
486
488 '''
489 Dispatcher of xrefs.
490
491 @param string: a string (between brackets) from an inherited TaggedValue object with a dbxrefs
492 signal (actually, the signal can only be found into SynonymValue and DefValue
493 objects) or a string (without brackets) from a XrefValue object.
494 @type string: string
495
496 @return: a class object
497
498 @note: The dispatcher treats differently the strings between brackets (from SynonymValue and
499 DefValue objects) and without brackets (from XrefValue objects).
500 '''
501
502 string = string.strip()
503 if string[0]=='[':
504 return [Xref(x.strip()) for x in string[1:-1].split(',')]
505 else:
506 return Xref(string)
507
508
509
510
511
512
513
514 -class OBOEntry(dict):
515 '''
516 An entry of an OBOFile. It can be a header (without a stanza name) or
517 a stanza (with a stanza name between brackets). It inherits from the class dict.
518 '''
519 _match_stanza_name = re.compile('(?<=^\[)[^\]]*(?=\])')
520
521 - def __init__(self,stanza):
522
523 self.isHeader = stanza[0]!='['
524 lines = stanza.split('\n')
525
526 if not self.isHeader:
527 self.stanzaName = lines[0].strip()[1:-1]
528 lines=lines[1:]
529
530 for line in lines:
531
532 taggedvalue = taggedValueFactory(line)
533 if taggedvalue.tag in self:
534 self[taggedvalue.tag].append(taggedvalue)
535 else:
536 self[taggedvalue.tag]=[taggedvalue]
537
538
539 - def parseStanzaName(stanza):
540 sm = OBOEntry._match_stanza_name.search(stanza)
541 if sm:
542 return sm.group(0)
543 else:
544 return None
545
546 parseStanzaName=staticmethod(parseStanzaName)
547
548
549
551 '''
552 A stanza named 'Term'. It inherits from the class OBOEntry.
553 '''
555
556
557 OBOEntry.__init__(self, stanza)
558
559 assert self.stanzaName=='Term'
560 assert 'id' in self and len(self['id'])==1,"An OBOTerm must have an id"
561 assert 'name' in self and len(self['name'])==1,"An OBOTerm must have a name"
562 assert 'namespace' not in self or len(self['namespace'])==1, "Only one namespace is allowed for an OBO term"
563
564 assert 'def' not in self or len(self['def'])==1,"Only one definition is allowed for an OBO term"
565 assert 'comment' not in self or len(self['comment'])==1,"Only one comment is allowed for an OBO term"
566
567 assert 'union_of' not in self or len(self['union_of'])>=2,"Only one union relationship is allowed for an OBO term"
568 assert 'intersection_of' not in self or len(self['intersection_of'])>=2,"Only one intersection relationship is allowed for an OBO term"
569
570 if self._isObsolete():
571 assert 'is_a' not in self
572 assert 'relationship' not in self
573 assert 'inverse_of' not in self
574 assert 'disjoint_from' not in self
575 assert 'union_of' not in self
576 assert 'intersection_of' not in self
577
578 assert 'replaced_by' not in self or self._isObsolete()
579 assert 'consider' not in self or self._isObsolete()
580
581
583 if 'def' in self:
584 return self['def'][0]
585 return None
586
589
591 return self['namespace'][0]
592
594 return self['name'][0]
595
600
602 if 'alt_id' in self:
603 return list(set(self.get('alt_id',None)))
604 return None
605
607 if 'is_a' in self:
608 return list(set(self.get('is_a',None)))
609 return None
610
612 if 'synonym' in self :
613 return list(set(self.get('synonym',None)))
614 return None
615
617 if self.get('subset',None) != None:
618 return list(set(self.get('subset',None)))
619 else:
620 return None
621
623 if 'xref' in self:
624 return list(set(self.get('xref',None)))
625 return None
626
628 if 'relationship' in self:
629 return list(set(self.get('relationship',None)))
630 return None
631
633 return list(set(self.get('union_of',None)))
634
636 return list(set(self.get('intersection_of',None)))
637
639 return list(set(self.get('disjoint_from',None)))
640
642 return 'is_obsolete' in self and str(self['is_obsolete'][0])=='true'
643
645 if 'replaced_by' in self:
646 return list(set(self.get('replaced_by',None)))
647 return None
648
650 if 'consider' in self:
651 return list(set(self.get('consider',None)))
652 return None
653
654
655 definition = property(_getDefinition,None,None)
656 id = property(_getId,None,None)
657 namespace = property(_getNamespace,None,None)
658 name = property(_getName,None,None)
659 comment = property(_getComment,None,None)
660 alt_ids = property(_getAltIds,None,None)
661 is_a = property(_getIsA,None,None)
662 synonyms = property(_getSynonym,None,None)
663 subsets = property(_getSubset,None,None)
664 xrefs = property(_getXref,None,None)
665 relationship = property(_getRelationShip,None,None)
666 union_of = property(_getUnion,None,None)
667 intersection_of = property(_getIntersection,None,None)
668 disjoint_from = property(_getDisjonction,None,None)
669 is_obsolete = property(_isObsolete,None,None)
670 replaced_by = property(_getReplaceBy,None,None)
671 consider = property(_getConsider,None,None)
672
673
674 -def OBOEntryFactory(stanza):
675 '''
676 Dispatcher of stanza.
677
678 @param stanza: a stanza composed of several lines.
679 @type stanza: text
680
681 @return: an C{OBOTerm} | C{OBOEntry} instance
682
683 @note: The dispatcher treats differently the stanza which are OBO "Term"
684 and the others.
685 '''
686
687 stanzaType = OBOEntry.parseStanzaName(stanza)
688
689 if stanzaType=="Term":
690 return OBOTerm(stanza)
691 else:
692 return OBOEntry(stanza)
693
695 entries = stanzaIterator(file)
696 for e in entries:
697 debug(e)
698 yield OBOEntryFactory(e)
699