Package obitools :: Package location :: Module feature
[hide private]
[frames] | no frames]

Source Code for Module obitools.location.feature

  1  from obitools.location import Location,locationGenerator 
  2  import logging 
  3  import sys 
  4  import re 
  5   
  6   
  7           
  8           
  9  _featureMatcher = re.compile('^(FT|  )   [^ ].+\n((FT|  )    .+\n)+',re.M) 
 10  _featureCleaner = re.compile('^FT',re.M) 
 11   
 12   
13 -def textFeatureIterator(fttable):
14 ''' 15 Iterate through a textual description of a feature table in a genbank 16 or embl format. Return at each step a text representation of each individual 17 feature composing the table. 18 19 @param fttable: a string corresponding to the feature table of a genbank 20 or an embl entry 21 22 @type fttable: C{str} 23 24 @return: an iterator on str 25 @rtype: iterator 26 27 @see: L{ftParser} 28 ''' 29 for m in _featureMatcher.finditer(fttable): 30 t = m.group() 31 t = _featureCleaner.sub(' ',t) 32 yield t
33 34 _qualifierMatcher = re.compile('(?<=^ {21}/).+(\n {21}[^/].+)*',re.M) 35 _qualifierCleanner= re.compile("^ +",re.M) 36
37 -def qualifierIterator(qualifiers):
38 ''' 39 Parse a textual description of a feature in embl or genbank format 40 as returned by the textFeatureIterator iterator and iterate through 41 the key, value qualified defining this location. 42 43 @param qualifiers: substring containing qualifiers 44 @type qualifiers: str 45 46 @return: an iterator on tuple (key,value), where keys are C{str} 47 @rtype: iterator 48 ''' 49 for m in _qualifierMatcher.finditer(qualifiers): 50 t = m.group() 51 t = _qualifierCleanner.sub('',t) 52 t = t.split('=',1) 53 if len(t)==1: 54 t = (t[0],None) 55 else: 56 if t[0]=='translation': 57 value = t[1].replace('\n','') 58 else: 59 value = t[1].replace('\n',' ') 60 try: 61 value = eval(value) 62 except: 63 pass 64 t = (t[0],value) 65 yield t
66 67 68 _ftmatcher = re.compile('(?<=^ {5})\S+') 69 _locmatcher= re.compile('(?<=^.{21})[^/]+',re.DOTALL) 70 _cleanloc = re.compile('[\s\n]+') 71 _qualifiersMatcher = re.compile('^ +/.+',re.M+re.DOTALL) 72
73 -def ftParser(feature):
74 fttype = _ftmatcher.search(feature).group() 75 location=_locmatcher.search(feature).group() 76 location=_cleanloc.sub('',location) 77 qualifiers=_qualifiersMatcher.search(feature) 78 if qualifiers is not None: 79 qualifiers=qualifiers.group() 80 else: 81 qualifiers="" 82 logging.debug("Qualifiers regex not matching on \n=====\n%s\n========" % feature) 83 84 return fttype,location,qualifiers
85 86
87 -class Feature(dict,Location):
88 - def __init__(self,type,location):
89 self._fttype=type 90 self._loc=location
91
92 - def getFttype(self):
93 return self._fttype
94 95
96 - def extractSequence(self,sequence,withQualifier=False):
97 seq = self._loc.extractSequence(sequence) 98 if withQualifier: 99 seq.getInfo().update(self) 100 return seq
101
102 - def isDirect(self):
103 return self._loc.isDirect()
104
105 - def isSimple(self):
106 return self._loc.isSimple()
107
108 - def isFullLength(self):
109 return self._loc.isFullLength()
110
111 - def simplify(self):
112 f = Feature(self._fttype,self._loc.simplify()) 113 f.update(self) 114 return f
115
116 - def locStr(self):
117 return str(self._loc)
118
119 - def needNucleic(self):
120 return self._loc.needNucleic()
121
122 - def __str__(self):
123 return repr(self)
124
125 - def __repr__(self):
126 return str((self.ftType,str(self._loc),dict.__repr__(self)))
127
128 - def __cmp__(self,y):
129 return self._loc.__cmp__(y)
130
131 - def _getglocpos(self):
132 return self._loc._getglocpos()
133 134 ftType = property(getFttype, None, None, "Feature type name") 135
136 - def shift(self,s):
137 assert (self.getBegin() + s) > 0,"shift to large (%d)" % s 138 if s == 0: 139 return self 140 f = Feature(self._fttype,self._loc.shift(s)) 141 f.update(self) 142 return f
143 144
145 - def getBegin(self):
146 return self._loc.getBegin()
147
148 - def getEnd(self):
149 return self._loc.getEnd()
150 151 begin = property(getBegin,None,None,"beginning position of the location") 152 end = property(getEnd,None,None,"ending position of the location")
153 154
155 -def featureFactory(featureDescription):
156 fttype,location,qualifiers = ftParser(featureDescription) 157 location = locationGenerator(location) 158 feature = Feature(fttype,location) 159 feature.raw = featureDescription 160 161 for k,v in qualifierIterator(qualifiers): 162 feature.setdefault(k,[]).append(v) 163 164 return feature
165
166 -def featureIterator(featureTable,skipError=False):
167 for tft in textFeatureIterator(featureTable): 168 try: 169 feature = featureFactory(tft) 170 except AssertionError,e: 171 logging.debug("Parsing error on feature :\n===============\n%s\n===============" % tft) 172 if not skipError: 173 raise e 174 logging.debug("\t===> Error skipped") 175 continue 176 177 yield feature
178