00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016 import cPickle
00017 import logging
00018 import numpy
00019 import os
00020 import string
00021 import sys
00022
00023
00024
00025 import pyp_db
00026 import pyp_demog
00027 import pyp_io
00028 import pyp_metrics
00029 import pyp_nrm
00030 import pyp_utils
00031
00032 try:
00033 from psyco.classes import __metaclass__
00034 except ImportError:
00035 pass
00036
00037
00038
00039 class NewPedigree:
00040
00041
00042
00043
00044
00045
00046
00047 def __init__(self,kw={}, kwfile='pypedal.ini'):
00048 """
00049 __init__() initializes a NewPedigree object.
00050 """
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060 if len(kw) == 0:
00061 import dict4ini
00062 kw = dict4ini.DictIni(kwfile)
00063 if kw == {}:
00064 print '[ERROR]: pyp_newclasses.NewPedigree.__init__() was unable to load a configuration file named %s!' % ( kwfile )
00065 import sys
00066 sys.exit(0)
00067
00068
00069
00070 kw = kw.dict()
00071
00072
00073
00074
00075
00076
00077
00078
00079 if not kw.has_key('simulate_pedigree'): kw['simulate_pedigree'] = 0
00080 if kw['simulate_pedigree']:
00081
00082
00083 if not kw.has_key('simulate_n'): kw['simulate_n'] = 15
00084 if not kw.has_key('simulate_g'): kw['simulate_g'] = 3
00085 if not kw.has_key('simulate_ns'): kw['simulate_ns'] = 4
00086 if not kw.has_key('simulate_nd'): kw['simulate_nd'] = 4
00087 if not kw.has_key('simulate_mp'): kw['simulate_mp'] = 0
00088 if not kw.has_key('simulate_po'): kw['simulate_po'] = 0
00089 if not kw.has_key('simulate_fs'): kw['simulate_fs'] = 0
00090 if not kw.has_key('simulate_sr'): kw['simulate_sr'] = 0.5
00091 if not kw.has_key('simulate_ir'): kw['simulate_ir'] = 0.0
00092 if not kw.has_key('simulate_pmd'): kw['simulate_pmd'] = 100
00093 if not kw.has_key('simulate_save'): kw['simulate_save'] = 0
00094 else:
00095 if not kw.has_key('pedfile'): raise PyPedalPedigreeInputFileNameError
00096 if not kw.has_key('pedformat'): kw['pedformat'] = 'asd'
00097 if not kw.has_key('pedname'): kw['pedname'] = 'Untitled'
00098 if not kw.has_key('messages'): kw['messages'] = 'verbose'
00099 if not kw.has_key('renumber'): kw['renumber'] = 0
00100 if not kw.has_key('reorder'): kw['reorder'] = 0
00101 if not kw.has_key('reorder_max_rounds'): kw['reorder_max_rounds'] = 100
00102 if not kw.has_key('pedigree_is_renumbered'): kw['pedigree_is_renumbered'] = 0
00103 if not kw.has_key('set_generations'): kw['set_generations'] = 0
00104 if not kw.has_key('gen_coeff'): kw['gen_coeff'] = 0
00105 if not kw.has_key('set_ancestors'): kw['set_ancestors'] = 0
00106 if not kw.has_key('set_alleles'): kw['set_alleles'] = 0
00107 if not kw.has_key('set_offspring'): kw['set_offspring'] = 0
00108 if not kw.has_key('set_sexes'): kw['set_sexes'] = 0
00109 if not kw.has_key('pedcomp'): kw['pedcomp'] = 0
00110 if not kw.has_key('pedcomp_gens'): kw['pedcomp_gens'] = 3
00111 if not kw.has_key('sepchar'): kw['sepchar'] = ' '
00112 if not kw.has_key('alleles_sepchar'): kw['alleles_sepchar'] = '/'
00113 if not kw.has_key('counter'): kw['counter'] = 1000
00114 if not kw.has_key('slow_reorder'): kw['slow_reorder'] = 1
00115 if not kw.has_key('missing_bdate'): kw['missing_bdate'] = '01011900'
00116 if not kw.has_key('missing_byear'): kw['missing_byear'] = 1900
00117 if not kw.has_key('missing_parent'): kw['missing_parent'] = 0
00118 if not kw.has_key('missing_name'): kw['missing_name'] = 'Unknown Name'
00119 if not kw.has_key('missing_breed'): kw['missing_breed'] = 'Unknown Breed'
00120 if not kw.has_key('missing_herd'): kw['missing_herd'] = 'Unknown Herd'
00121 if not kw.has_key('missing_sex'): kw['missing_sex'] = 'u'
00122 if not kw.has_key('file_io'): kw['file_io'] = '1'
00123 if not kw.has_key('debug_messages'): kw['debug_messages'] = 0
00124 if not kw.has_key('form_nrm'): kw['form_nrm'] = 0
00125 if not kw.has_key('nrm_method'): kw['nrm_method'] = 'nrm'
00126 if not kw.has_key('nrm_format'): kw['nrm_format'] = 'text'
00127 if not kw.has_key('f_computed'): kw['f_computed'] = 0
00128 if not kw.has_key('log_ped_lines'): kw['log_ped_lines'] = 0
00129 if not kw.has_key('log_long_filenames'): kw['log_long_filenames'] = 0
00130 if not kw.has_key('pedigree_summary'): kw['pedigree_summary'] = 1
00131 if kw['pedigree_summary'] not in [0,1,2]:
00132 kw['pedigree_summary'] = 1
00133 if not kw.has_key('animal_type'): kw['animal_type'] = 'new'
00134 if kw['animal_type'] not in ['new','light']:
00135 kw['animal_type'] = 'new'
00136 if 'f' in kw['pedformat']:
00137 kw['f_computed'] = 1
00138
00139
00140 if kw['simulate_pedigree']:
00141 kw['pedfile'] = 'simulated_pedigree'
00142 kw['filetag'] = string.split(kw['pedfile'],'.')[0]
00143 if len(kw['filetag']) == 0:
00144 kw['filetag'] = 'untitled_pedigree'
00145
00146
00147
00148 if not kw.has_key('database_name'): kw['database_name'] = 'pypedal'
00149 if not kw.has_key('database_table'):
00150 kw['database_table'] = pyp_utils.string_to_table_name(kw['filetag'])
00151 else:
00152 kw['database_table'] = pyp_utils.string_to_table_name(kw['database_table'])
00153 if not kw.has_key('database_debug'): kw['database_debug'] = False
00154 if not kw.has_key('database_type'): kw['database_type'] = 'sqlite'
00155 if not kw.has_key('database_host'): kw['database_host'] = 'localhost'
00156 if not kw.has_key('database_user'): kw['database_user'] = 'anonymous'
00157 if not kw.has_key('database_passwd'): kw['database_passwd'] = 'anonymous'
00158 if not kw.has_key('database_port'): kw['database_port'] = ''
00159
00160
00161
00162
00163 if not kw.has_key('paper_size'): kw['paper_size'] = 'letter'
00164 if kw['paper_size'] == 'a4':
00165 kw['paper_size'] = 'A4'
00166 if kw['paper_size'] not in ['A4','letter']:
00167 kw['paper_size'] = 'letter'
00168 if not kw.has_key('default_unit'): kw['default_unit'] = 'inch'
00169 if kw['default_unit'] not in ['cm','inch']:
00170 kw['default_unit'] = 'inch'
00171
00172
00173
00174 if not kw.has_key('default_fontsize'): kw['default_fontsize'] = 10
00175 try:
00176 kw['default_fontsize'] = int(kw['default_fontsize'])
00177 except:
00178 kw['default_fontsize'] = 10
00179 if kw['default_fontsize'] < 1:
00180 kw['default_fontsize'] = 10
00181
00182
00183 if not kw.has_key('default_report'): kw['default_report'] = kw['filetag']
00184
00185
00186
00187 if not kw.has_key('newanimal_caller'): kw['newanimal_caller'] = 'loader'
00188
00189
00190
00191
00192 self.kw = kw
00193
00194
00195 self.pedigree = []
00196 self.metadata = {}
00197 self.idmap = {}
00198 self.backmap = {}
00199 self.namemap = {}
00200 self.backmap = {}
00201 self.namebackmap = {}
00202 self.stringmap = {}
00203
00204 self.starline = '*'*80
00205
00206 if not self.kw.has_key('logfile'):
00207 if kw['log_long_filenames']:
00208 self.kw['logfile'] = '%s_%s.log' % ( self.kw['filetag'], pyp_utils.pyp_datestamp() )
00209 else:
00210 self.kw['logfile'] = '%s.log' % ( self.kw['filetag'] )
00211 logging.basicConfig(level=logging.DEBUG,
00212 format='%(asctime)s %(levelname)-8s %(message)s',
00213 datefmt='%a, %d %b %Y %H:%M:%S',
00214 filename=self.kw['logfile'],
00215 filemode='w')
00216 logging.info('Logfile %s instantiated.',self.kw['logfile'])
00217 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00218 print '[INFO]: Logfile %s instantiated.' % (self.kw['logfile'])
00219
00220 try:
00221 _lpl = kw['log_ped_lines']
00222 kw['log_ped_lines'] = int(kw['log_ped_lines'])
00223 except ValueError:
00224 kw['log_ped_lines'] = 0
00225 log.warning('An incorrect value (%s) was provided for the option log_ped_lines, which must be a number greater than or equal 0. It has been set to 0.', _lpl)
00226 if kw['log_ped_lines'] < 0:
00227 kw['log_ped_lines'] = 0
00228 log.warning('A negative value (%s) was provided for the option log_ped_lines, which must be greater than or equal 0. It has been set to 0.', kw['log_ped_lines'])
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239 def load(self,pedsource='file',pedgraph=0,pedstream=''):
00240 """
00241 load() wraps several processes useful for loading and preparing a pedigree for
00242 use in an analysis, including reading the animals into a list of animal objects,
00243 forming lists of sires and dams, checking for common errors, setting ancestor
00244 flags, and renumbering the pedigree.
00245 """
00246
00247
00248 if self.kw['simulate_pedigree']:
00249 self.simulate()
00250
00251 elif pedsource == 'db':
00252 self.kw['pedformat'] = 'ASDx'
00253 self.kw['sepchar'] = ','
00254 logging.info('Loading from database %s.%s at %s.',self.kw['database_name'], \
00255 self.kw['database_table'], pyp_utils.pyp_nice_time())
00256 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00257 print '[INFO]: Loading from database %s.%s' % ( self.kw['database_name'], \
00258 self.kw['database_table'] )
00259 try:
00260
00261 if pyp_db.doesTableExist(self):
00262 conn = pyp_db.connectToDatabase(self)
00263 if conn:
00264 sql = 'SELECT * FROM %s' % ( self.kw['database_table'] )
00265 dbstream = conn.GetAll(sql)
00266 conn.Close()
00267
00268 self.preprocess(dbstream=dbstream)
00269 else:
00270 logging.error('Unable to connect to the database %s at %s.', \
00271 self.kw['database_name'], pyp_utils.pyp_nice_time())
00272 if self.kw['messages'] == 'verbose':
00273 print '[ERROR]: Unable to connect to the database %s at %s.' % \
00274 ( self.kw['database_name'], pyp_utils.pyp_nice_time() )
00275 sys.exit(0)
00276 except:
00277 logging.error('Unable to load pedigree from database %s.%s at %s.', \
00278 self.kw['database_name'], self.kw['database_table'], pyp_utils.pyp_nice_time())
00279 if self.kw['messages'] == 'verbose':
00280 print '[ERROR]: Unable to load pedigree from database %s.%s' % ( \
00281 self.kw['database_name'], \
00282 self.kw['database_table'] )
00283 sys.exit(0)
00284
00285 elif pedsource == 'graph':
00286 if pedgraph:
00287 self.fromgraph(pedgraph)
00288 else:
00289 logging.error('Unable to load pedigree from a directed graph: no pedgraph provided!')
00290 sys.exit(0)
00291
00292
00293
00294
00295 elif pedsource == 'graphfile':
00296 try:
00297 import networkx as NX
00298 except:
00299 logging.error('Unable to load pedigree from a directed graph stored in adjacency list format because the NetworkX module could not be imported!')
00300 sys.exit(0)
00301 if self.kw['pedfile']:
00302 try:
00303 pedgraph = NX.read_adjlist(self.kw['pedfile'])
00304 self.fromgraph(pedgraph)
00305 except:
00306 logging.error('Unable to load pedigree from a directed graph stored in adjacency list format!')
00307 sys.exit(0)
00308 else:
00309 logging.error('Unable to load pedigree from a directed graph stored in adjacency list format because no filename was provided!')
00310 sys.exit(0)
00311
00312
00313 elif pedsource == 'gedcomfile':
00314 if self.kw['pedfile']:
00315
00316
00317 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00318 print '[INFO]: Loading GEDCOM file %s.' % (self.kw['pedfile'])
00319 pedformat = pyp_io.load_from_gedcom(infilename=self.kw['pedfile'], \
00320 standalone = 0, \
00321 messages=self.kw['messages'], \
00322 missing_sex=self.kw['missing_sex'], \
00323 missing_parent=self.kw['missing_parent'], \
00324 missing_name=self.kw['missing_name'], \
00325 missing_byear=self.kw['missing_byear'])
00326 if pedformat != 'xxxx':
00327 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00328 print '[INFO]: Changing pedformat from %s to %s as part of GEDCOM file processing.' % ( self.kw['pedformat'], pedformat )
00329 logging.info('Changing pedformat from %s to %s.tmp as part of GEDCOM file processing.', self.kw['pedformat'], pedformat)
00330 self.kw['pedformat'] = pedformat
00331
00332 logging.info('Changing sepchar from %s to , as part of GEDCOM file processing.',self.kw['sepchar'])
00333 self.kw['sepchar'] = ','
00334
00335 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00336 print '[INFO]: Changing pedfile from %s to %s.tmp as part of GEDCOM file processing.' % ( self.kw['pedfile'], self.kw['pedfile'] )
00337 logging.info('Changing pedfile from %s to %s.tmp as part of GEDCOM file processing.',self.kw['pedfile'],self.kw['pedfile'])
00338 self.kw['pedfile'] = '%s.tmp' % ( self.kw['pedfile'] )
00339
00340 self.preprocess()
00341 else:
00342 if self.kw['messages'] == 'verbose':
00343 print '[load] Unable to load pedigree from a GEDCOM file because an invalid pedigree format code, %s, was returned!' % (pedformat)
00344 logging.error('Unable to load pedigree from a GEDCOM file because an invalid pedigree format code, %s, was returned!',pedformat)
00345
00346
00347
00348
00349
00350 else:
00351 if self.kw['messages'] == 'verbose':
00352 print '[load] Unable to load pedigree from a GEDCOM file because no filename was provided!'
00353 logging.error('Unable to load pedigree from a GEDCOM file because no filename was provided!')
00354 sys.exit(0)
00355
00356
00357
00358 elif pedsource == 'textstream':
00359
00360 self.kw['pedformat'] = 'ASD'
00361 self.kw['sepchar'] = ','
00362 logging.info('Preprocessing a textstream')
00363 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00364 print '[INFO]: Preprocessing a textstream'
00365 self.preprocess(textstream=pedstream)
00366 else:
00367 logging.info('Preprocessing %s',self.kw['pedfile'])
00368 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00369 print '[INFO]: Preprocessing %s' % (self.kw['pedfile'])
00370 self.preprocess()
00371
00372
00373 if self.kw['reorder'] == 1 and not self.kw['renumber']:
00374 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00375 print '\t[INFO]: Reordering pedigree at %s' % ( pyp_utils.pyp_nice_time() )
00376 logging.info('Reordering pedigree')
00377 if not self.kw['slow_reorder']:
00378 self.pedigree = pyp_utils.fast_reorder(self.pedigree)
00379 else:
00380 self.pedigree = pyp_utils.reorder(self.pedigree,missingparent=self.kw['missing_parent'],max_rounds=self.kw['reorder_max_rounds'])
00381
00382 if self.kw['renumber'] == 1:
00383 self.renumber()
00384 if self.kw['set_generations']:
00385 logging.info('Assigning generations')
00386 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00387 print '\t[INFO]: Assigning generations at %s' % ( pyp_utils.pyp_nice_time() )
00388 pyp_utils.set_generation(self)
00389 if self.kw['set_ancestors']:
00390 logging.info('Setting ancestor flags')
00391 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00392 print '\t[INFO]: Setting ancestor flags at %s' % ( pyp_utils.pyp_nice_time() )
00393 pyp_utils.set_ancestor_flag(self)
00394 if self.kw['set_sexes']:
00395 logging.info('Assigning sexes')
00396 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00397 print '\t[INFO]: Assigning sexes at %s' % ( pyp_utils.pyp_nice_time() )
00398 pyp_utils.assign_sexes(self)
00399 if self.kw['set_alleles']:
00400 logging.info('Gene dropping to compute founder genome equivalents')
00401 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00402 print '\t[INFO]: Gene dropping at %s' % ( pyp_utils.pyp_nice_time() )
00403 pyp_metrics.effective_founder_genomes(self)
00404 if self.kw['form_nrm']:
00405 logging.info('Forming numerator relationship matrix')
00406 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00407 print '\t[INFO]: Forming numerator relationship matrix at %s' % ( pyp_utils.pyp_nice_time() )
00408 self.nrm = NewAMatrix(self.kw)
00409 self.nrm.form_a_matrix(self.pedigree)
00410 if self.kw['set_offspring'] and not self.kw['renumber']:
00411 logging.info('Assigning offspring')
00412 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00413 print '\t[INFO]: Assigning offspring at %s' % ( pyp_utils.pyp_nice_time() )
00414 pyp_utils.assign_offspring(self)
00415 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00416 print '[INFO]: Creating pedigree metadata object'
00417 self.metadata = PedigreeMetadata(self.pedigree,self.kw)
00418 if self.kw['messages'] != 'quiet' and self.kw['pedigree_summary']:
00419 self.metadata.printme()
00420
00421 if self.kw['pedcomp']:
00422 logging.info('Calculating %s generation pedigree completeness', self.kw['pedcomp_gens'])
00423 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00424 print '[INFO]: Calculating %s generation pedigree completeness' % ( self.kw['pedcomp_gens'] )
00425 _foo = pyp_metrics.pedigree_completeness(self,self.kw['pedcomp_gens'])
00426
00427
00428
00429
00430
00431
00432
00433
00434
00435
00436 def save(self,filename='',outformat='o',idformat='o'):
00437 """
00438 save() writes a PyPedal pedigree to a user-specified file. The saved pedigree
00439 includes all fields recognized by PyPedal, not just the original fields read
00440 from the input pedigree file.
00441 """
00442
00443
00444
00445
00446
00447 if filename == '':
00448 filename = '%s_saved.ped' % ( self.kw['filetag'] )
00449 if self.kw['messages'] == 'verbose':
00450 print '[WARNING]: Saving pedigree to file %s to avoid overwriting %s.' % ( filename, self.kw['pedfile'] )
00451 logging.warning('Saving pedigree to file %s to avoid overwriting %s.',filename,self.kw['pedfile'])
00452 try:
00453 ofh = file(filename,'w')
00454 if self.kw['messages'] == 'verbose':
00455 print '[INFO]: Opened file %s for pedigree save at %s.' % ( filename, pyp_utils.pyp_nice_time() )
00456 logging.info('Opened file %s for pedigree save at %s.',filename, pyp_utils.pyp_nice_time())
00457
00458 if outformat == 'l':
00459
00460 _newpedformat = 'asdgx'
00461 if 'y' in self.kw['pedformat']:
00462 _newpedformat = '%sy' % ( _newpedformat )
00463 else:
00464 _newpedformat = '%sb' % ( _newpedformat )
00465 _newpedformat = '%sfrnleh' % (_newpedformat )
00466 else:
00467 if self.kw['f_computed']:
00468 _newpedformat = '%sf' % ( self.kw['pedformat'] )
00469 else:
00470 _newpedformat = self.kw['pedformat']
00471
00472
00473 ofh.write('# %s created by PyPedal at %s\n' % ( filename, pyp_utils.pyp_nice_time() ) )
00474 ofh.write('# Current pedigree metadata:\n')
00475 ofh.write('#\tpedigree file: %s\n' % (filename) )
00476 ofh.write('#\tpedigree name: %s\n' % (self.kw['pedname']) )
00477 ofh.write('#\tpedigree format: \'%s\'\n' % ( _newpedformat) )
00478 if idformat == 'o':
00479 ofh.write('#\tNOTE: Animal, sire, and dam IDs are RENUMBERED IDs, not original IDs!\n')
00480 ofh.write('# Original pedigree metadata:\n')
00481 ofh.write('#\tpedigree file: %s\n' % (self.kw['pedfile']) )
00482 ofh.write('#\tpedigree name: %s\n' % (self.kw['pedname']) )
00483 ofh.write('#\tpedigree format: %s\n' % (self.kw['pedformat']) )
00484 for _a in self.pedigree:
00485 if idformat == 'o':
00486 _outstring = '%s %s %s' % \
00487 (_a.originalID, self.pedigree[int(_a.sireID)-1].originalID, \
00488 self.pedigree[int(_a.damID)-1].originalID )
00489 else:
00490 _outstring = '%s %s %s' % (_a.animalID, _a.sireID, _a.damID )
00491 if 'g' in _newpedformat:
00492 _outstring = '%s %s' % ( _outstring, _a.gen )
00493 if 'p' in _newpedformat:
00494 _outstring = '%s %s' % ( _outstring, _a.gencoeff )
00495 if 'x' in _newpedformat:
00496 _outstring = '%s %s' % ( _outstring, _a.sex )
00497 if 'y' in _newpedformat:
00498 _outstring = '%s %s' % ( _outstring, _a.bd )
00499 else:
00500 _outstring = '%s %s' % ( _outstring, _a.by )
00501 if 'f' in _newpedformat:
00502 _outstring = '%s %s' % ( _outstring, _a.fa )
00503 if 'r' in _newpedformat:
00504 _outstring = '%s %s' % ( _outstring, _a.breed )
00505 if 'n' in _newpedformat:
00506 _outstring = '%s %s' % ( _outstring, _a.name )
00507 if 'l' in _newpedformat:
00508 _outstring = '%s %s' % ( _outstring, _a.alive )
00509 if 'e' in _newpedformat:
00510 _outstring = '%s %s' % ( _outstring, _a.age )
00511 if 'h' in _newpedformat or 'H' in _newpedformat:
00512 _outstring = '%s %s' % ( _outstring, _a.herd )
00513 _outstring = '%s %s' % ( _outstring, _a.originalHerd )
00514 if 'u' in _newpedformat:
00515 _outstring = '%s %s' % ( _outstring, _a.userField )
00516 ofh.write( '%s\n' % (_outstring) )
00517 ofh.close()
00518 if self.kw['messages'] == 'verbose':
00519 print '[INFO]: Closed file %s after pedigree save at %s.' % ( filename, pyp_utils.pyp_nice_time() )
00520 logging.info('Closed file %s after pedigree save at %s.',filename, pyp_utils.pyp_nice_time())
00521 return 1
00522 except:
00523 if self.kw['messages'] == 'verbose':
00524 print '[ERROR]: Unable to open file %s for pedigree save!' % ( filename )
00525 logging.error('Unable to open file %s for pedigree save.',filename)
00526 return 0
00527
00528
00529
00530
00531
00532
00533
00534 def savegraph(self,pedoutfile=0,pedgraph=0):
00535 """
00536 Preprocess a pedigree file, which includes reading the animals into a list, forming lists of sires and dams, and checking for common errors.
00537 """
00538 if not pedoutfile:
00539 pedoutfile = '%s.adjlist' % ( self.kw['pedfile'] )
00540 if not pedgraph:
00541 try:
00542 import pyp_network
00543 pedgraph = pyp_network.ped_to_graph(self)
00544 except:
00545 logging.error('[savegraph]: Unable to convert pedigree to a directed graph.')
00546 try:
00547 import networkx as NX
00548 NX.write_adjlist(pedgraph, pedoutfile)
00549 except:
00550 logging.error('[savegraph]: Unable to save directed graph to a file.')
00551
00552
00553
00554
00555
00556
00557 def savegedcom(self,pedoutfile=0):
00558 """
00559 Preprocess a pedigree file, which includes reading the animals into a list, forming lists of sires and dams, and checking for common errors.
00560 """
00561 if not pedoutfile:
00562 pedoutfile = '%s.ged' % ( self.kw['pedfile'] )
00563
00564 pyp_io.save_to_gedcom(self,pedoutfile)
00565 logging.error('[savegedcom]: Saved GEDCOM pedigree to the file %s.', pedoutfile)
00566
00567
00568
00569
00570
00571
00572
00573
00574
00575 def savedb(self,drop=False):
00576 """
00577 savedb() saves a pedigree to a database table in ASDx format
00578 for NewAnimals and LightAnimals.
00579 """
00580 _savedb_status = False
00581 _table_loaded = False
00582 _table_created = False
00583
00584
00585 if pyp_db.doesTableExist(self) and drop == True:
00586 pyp_db.deleteTable(self)
00587 conn = pyp_db.connectToDatabase(self)
00588 if conn:
00589 if not pyp_db.doesTableExist(self):
00590 try:
00591 sql = 'create table %s ( \
00592 animalName varchar(128) primary key, \
00593 sireName varchar(128), \
00594 damName varchar(128), \
00595 sex char(1) \
00596 );' % ( self.kw['database_table'] )
00597 cursor = conn.Execute(sql)
00598 cursor.Close()
00599 _table_created = True
00600 except:
00601 pass
00602 else:
00603 if self.kw['messages'] == 'verbose':
00604 print '[WARNING]: The table %s already exists in database %s and you told me to save the existing data. You may end up with duplicate data or multiple pedigrees stored in the same table!' % ( self.kw['database_table'], self.kw['database_name'] )
00605 logging.warning('The table %s already exists in database %s and you told me to save the existing data. You may end up with duplicate data or multiple pedigrees stored in the same table!',self.kw['database_table'], self.kw['database_name'])
00606
00607 if pyp_db.doesTableExist(self):
00608 try:
00609 for p in self.pedigree:
00610 an = p.name
00611 si = p.sireName
00612 da = p.damName
00613 if si == self.kw['missing_name']:
00614 si = self.kw['missing_parent']
00615 if da == self.kw['missing_name']:
00616 da = self.kw['missing_parent']
00617 sql = "INSERT INTO %s ( animalName, sireName, damName, sex ) VALUES ('%s', '%s', '%s', '%s')" % ( self.kw['database_table'], an, si, da, p.sex )
00618 cursor = conn.Execute(sql)
00619 cursor.Close()
00620 _table_loaded = True
00621 except:
00622 pass
00623 conn.Close()
00624 else:
00625 pass
00626 if _table_loaded:
00627 if self.kw['messages'] == 'verbose':
00628 print '[INFO]: Saved pedigree to %s.%s at %s.' % ( self.kw['database_name'], \
00629 self.kw['database_table'], pyp_utils.pyp_nice_time() )
00630 logging.info('Saved pedigree to %s.%s at %s.',self.kw['database_name'], \
00631 self.kw['database_table'], pyp_utils.pyp_nice_time())
00632 _savedb_status = True
00633 else:
00634 if self.kw['messages'] == 'verbose':
00635 print '[ERROR]: Could not save pedigree to %s.%s at %s.' % ( \
00636 self.kw['database_name'], self.kw['database_table'], pyp_utils.pyp_nice_time() )
00637 logging.error('Could not save pedigree to %s.%s at %s.',self.kw['database_name'], \
00638 self.kw['database_table'], pyp_utils.pyp_nice_time())
00639 return _savedb_status
00640
00641
00642
00643
00644
00645
00646
00647
00648
00649 def preprocess(self,textstream='',dbstream=''):
00650 """
00651 Preprocess a pedigree file, which includes reading the animals into a list, forming lists of sires and dams, and checking for common errors.
00652 """
00653 lineCounter = 0
00654 animalCounter = 0
00655 pedformat_codes = ['a','s','d','g','x','b','f','r','n','y','l','e','p','A','S','D','L','Z','h','H','u']
00656 critical_count = 0
00657 pedformat_locations = {}
00658 _sires = {}
00659 _dams = {}
00660
00661
00662
00663
00664
00665
00666
00667 if not self.kw['pedformat']:
00668 self.kw['pedformat'] = 'asd'
00669 logging.error('Null pedigree format string assigned a default value.')
00670 if self.kw['messages'] == 'verbose':
00671 print '[ERROR]: Null pedigree format string assigned a default value.'
00672
00673
00674 _pedformat = []
00675 for _char in self.kw['pedformat']:
00676 if _char in pedformat_codes and _char != 'Z':
00677 _pedformat.append(_char)
00678 elif _char in pedformat_codes and _char == 'Z':
00679 _pedformat.append('.')
00680 if self.kw['messages'] == 'verbose':
00681 print '[INFO]: Skipping one or more columns in the input file'
00682 logging.info('Skipping one or more columns in the input file as requested by the pedigree format string %s',self.kw['pedformat'])
00683 else:
00684
00685 _pedformat.append('.')
00686 if self.kw['messages'] == 'verbose':
00687 print '[DEBUG]: Invalid format code, %s, encountered!' % (_char)
00688 logging.error('Invalid column format code %s found while reading pedigree format string %s',_char,self.kw['pedformat'])
00689 for _char in _pedformat:
00690 try:
00691 pedformat_locations['animal'] = _pedformat.index('a')
00692 except ValueError:
00693 try:
00694 pedformat_locations['animal'] = _pedformat.index('A')
00695 except ValueError:
00696 print '[CRITICAL]: No animal identification code was specified in the pedigree format string %s! This is a critical error and the program will halt.' % (_pedformat)
00697 critical_count = critical_count + 1
00698 try:
00699 pedformat_locations['sire'] = _pedformat.index('s')
00700 except ValueError:
00701 try:
00702 pedformat_locations['sire'] = _pedformat.index('S')
00703 except ValueError:
00704 print '[CRITICAL]: No sire identification code was specified in the pedigree format string %s! This is a critical error and the program will halt.' % (_pedformat)
00705 critical_count = critical_count + 1
00706 try:
00707 pedformat_locations['dam'] = _pedformat.index('d')
00708 except ValueError:
00709 try:
00710 pedformat_locations['dam'] = _pedformat.index('D')
00711 except ValueError:
00712 print '[CRITICAL]: No dam identification code was specified in the pedigree format string %s! This is a critical error and the program will halt.' % (_pedformat)
00713 critical_count = critical_count + 1
00714 try:
00715 pedformat_locations['generation'] = _pedformat.index('g')
00716 except ValueError:
00717 pedformat_locations['generation'] = -999
00718 if self.kw['messages'] == 'all':
00719 print '[DEBUG]: No generation code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00720 try:
00721 pedformat_locations['gencoeff'] = _pedformat.index('p')
00722 if not self.kw['gen_coeff']:
00723 self.kw['gen_coeff'] = 1
00724 except ValueError:
00725 pedformat_locations['gencoeff'] = -999
00726 if self.kw['gen_coeff']:
00727 self.kw['gen_coeff'] = 0
00728 if self.kw['messages'] == 'all':
00729 print '[DEBUG]: No generation coefficient was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00730 try:
00731 pedformat_locations['sex'] = _pedformat.index('x')
00732 except ValueError:
00733 pedformat_locations['sex'] = -999
00734 if self.kw['messages'] == 'all':
00735 print '[DEBUG]: No sex code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00736 try:
00737 pedformat_locations['birthyear'] = _pedformat.index('y')
00738 except ValueError:
00739 pedformat_locations['birthyear'] = -999
00740 if self.kw['messages'] == 'all':
00741 print '[DEBUG]: No birth date (YYYY) code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00742 try:
00743 pedformat_locations['inbreeding'] = _pedformat.index('f')
00744 except ValueError:
00745 pedformat_locations['inbreeding'] = -999
00746 if self.kw['messages'] == 'all':
00747 print '[DEBUG]: No coeffcient of inbreeding code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00748 try:
00749 pedformat_locations['breed'] = _pedformat.index('r')
00750 except ValueError:
00751 pedformat_locations['breed'] = -999
00752 if self.kw['messages'] == 'all':
00753 print '[DEBUG]: No breed code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00754 try:
00755 pedformat_locations['name'] = _pedformat.index('n')
00756 except ValueError:
00757 pedformat_locations['name'] = -999
00758 if self.kw['messages'] == 'all':
00759 print '[DEBUG]: No name code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00760 try:
00761 pedformat_locations['birthdate'] = _pedformat.index('b')
00762 except ValueError:
00763 pedformat_locations['birthdate'] = -999
00764 if self.kw['messages'] == 'all':
00765 print '[DEBUG]: No birth date (MMDDYYYY) code was specified in the pedigree format string %s. This program will continue.' % ( self.kw['pedformat'] )
00766 try:
00767 pedformat_locations['alive'] = _pedformat.index('l')
00768 except ValueError:
00769 pedformat_locations['alive'] = -999
00770 if self.kw['messages'] == 'all':
00771 print '[DEBUG]: No alive/dead code was specified in the pedigree format string %s. This program will continue.' % ( self.kw['pedformat'] )
00772 try:
00773 pedformat_locations['age'] = _pedformat.index('e')
00774 except ValueError:
00775 pedformat_locations['age'] = -999
00776 if self.kw['messages'] == 'all':
00777 print '[DEBUG]: No age code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00778 try:
00779 pedformat_locations['alleles'] = _pedformat.index('L')
00780 if self.kw['alleles_sepchar'] == self.kw['sepchar']:
00781 if self.kw['messages'] == 'all':
00782 print '[DEBUG]: The same separating character was specified for both columns of input (option sepchar) and alleles (option alleles_sepchar) in an animal\'s allelotype. The allelotypes will not be used in this pedigree.'
00783 logging.warning('The same separating character was specified for both columns of input (option sepchar) and alleles (option alleles_sepchar) in an animal\'s allelotype. The allelotypes will not be used in this pedigree.')
00784 pedformat_locations['alleles'] = -999
00785 except ValueError:
00786 pedformat_locations['alleles'] = -999
00787 if self.kw['messages'] == 'all':
00788 print '[DEBUG]: No alleles code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00789 try:
00790 pedformat_locations['herd'] = _pedformat.index('h')
00791 except ValueError:
00792 pedformat_locations['herd'] = -999
00793 if self.kw['messages'] == 'all':
00794 print '[DEBUG]: No herd code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00795 try:
00796 pedformat_locations['herd'] = _pedformat.index('H')
00797 except ValueError:
00798 pedformat_locations['herd'] = -999
00799 if self.kw['messages'] == 'all':
00800 print '[DEBUG]: No herd code was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00801 try:
00802 pedformat_locations['userfield'] = _pedformat.index('u')
00803 except ValueError:
00804 pedformat_locations['userfield'] = -999
00805 if self.kw['messages'] == 'all':
00806 print '[DEBUG]: No user-defined field was specified in the pedigree format string %s. This program will continue.' % (self.kw['pedformat'])
00807
00808
00809
00810
00811
00812 if 'f' in self.kw['pedformat']:
00813 self.kw['f_computed'] = 1
00814 if critical_count > 0:
00815 sys.exit(0)
00816 else:
00817 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
00818 print '[INFO]: Opening pedigree file %s' % ( self.kw['pedfile'] )
00819 logging.info('Opening pedigree file %s',self.kw['pedfile'])
00820 if textstream == '' and dbstream == '':
00821 infile = open(self.kw['pedfile'],'r')
00822 elif dbstream == '':
00823
00824
00825
00826
00827 infile = textstream.split('\n')
00828 infile = infile[:-1]
00829 else:
00830
00831 infile = dbstream
00832 while 1:
00833 if textstream == '' and dbstream == '':
00834 line = infile.readline()
00835 elif dbstream == '':
00836 try:
00837 line = infile.pop()
00838
00839 except IndexError:
00840 logging.warning('Reached the end of the textstream after reading %s records.', lineCounter)
00841 line = False
00842 else:
00843
00844 try:
00845 dbline = dbstream.pop()
00846 line = ','.join(dbline)
00847 except IndexError:
00848 logging.warning('Reached the end of the dbstream after reading %s records.', lineCounter)
00849 line = False
00850 if not line:
00851 logging.warning('Reached end-of-line in %s after reading %s lines.', self.kw['pedfile'],lineCounter)
00852 break
00853 else:
00854
00855
00856
00857
00858
00859 lineCounter = lineCounter + 1
00860 if lineCounter <= self.kw['log_ped_lines']:
00861 logging.info('Pedigree (line %s): %s',lineCounter,string.strip(line))
00862 if line[0] == '#':
00863 logging.info('Pedigree comment (line %s): %s',lineCounter,string.strip(line))
00864 pass
00865 elif line[0] == '%':
00866 self.kw['old_pedformat'] = string.strip(line[1:])
00867 logging.warning('Encountered deprecated pedigree format string (%s) on line %s of the pedigree file.',line,lineCounter)
00868
00869
00870
00871 elif len(string.strip(line)) == 0:
00872 logging.warning('Encountered an empty (blank) record on line %s of the pedigree file.',lineCounter)
00873 else:
00874 animalCounter = animalCounter + 1
00875 if numpy.fmod(animalCounter,self.kw['counter']) == 0:
00876 logging.info('Records read: %s ',animalCounter)
00877
00878
00879
00880 l = string.split(string.strip(line),self.kw['sepchar'])
00881
00882
00883
00884
00885
00886 if len(self.kw['pedformat']) == len(l):
00887 self.namemap = {}
00888 self.namebackmap = {}
00889
00890
00891
00892
00893
00894 for i in range(len(l)):
00895 l[i] = string.strip(l[i])
00896 if len(l) < 3:
00897 errorString = 'The record on line %s of file %s is too short - all records must contain animal, sire, and dam ID numbers (%s fields detected).\n' % (lineCounter,self.kw['pedfile'],len(l))
00898 print '[ERROR]: %s' % (errorString)
00899 print '[ERROR]: %s' % (line)
00900 sys.exit(0)
00901 else:
00902 if l[0] != self.kw['missing_parent']:
00903 if self.kw['animal_type'] == 'light':
00904 an = LightAnimal(pedformat_locations,l,self.kw)
00905 else:
00906 an = NewAnimal(pedformat_locations,l,self.kw)
00907
00908
00909 else:
00910 errorString = 'The record on line %s of file %s has an animal ID that is the same as the missing value code specified for the pedigree. This animal is being skipped and will not have an entry in the pedigree.\n' % ( lineCounter, self.kw['pedfile'] )
00911 print '[ERROR]: %s' % (errorString)
00912 logging.error(errorString)
00913
00914
00915
00916
00917
00918
00919
00920 if 'S' in self.kw['pedformat']:
00921 if an.sireName != self.kw['missing_name']:
00922 _sires[an.sireName] = an.sireName
00923 else:
00924
00925 pass
00926 else:
00927
00928 if str(an.sireID) != str(self.kw['missing_parent']):
00929 _sires[an.sireID] = an.sireID
00930 if 'D' in self.kw['pedformat']:
00931 if an.damName != self.kw['missing_name']:
00932 _dams[an.damName] = an.damName
00933 else:
00934
00935 pass
00936 else:
00937 if str(an.damID) != str(self.kw['missing_parent']):
00938 _dams[an.damID] = an.damID
00939
00940
00941 self.pedigree.append(an)
00942
00943
00944
00945
00946
00947
00948
00949
00950
00951 if 'A' in self.kw['pedformat']:
00952 self.idmap[an.name] = an.name
00953 self.backmap[an.name] = an.name
00954 if self.kw['animal_type'] == 'new':
00955 self.namemap[an.name] = an.name
00956 self.namebackmap[an.name] = an.name
00957 else:
00958 self.idmap[an.animalID] = an.animalID
00959 self.backmap[an.animalID] = an.animalID
00960 if self.kw['animal_type'] == 'new':
00961 self.namemap[an.name] = an.animalID
00962 self.namebackmap[an.animalID] = an.name
00963 else:
00964 errorString = 'The record on line %s of file %s has %s columns, but the pedigree format string (%s) says that it should have %s columns. Please check your pedigree file and the pedigree format string for errors.\n' % ( lineCounter, self.kw['pedfile'], len(l), \
00965 self.kw['pedformat'], len(self.kw['pedformat']) )
00966 print '[ERROR]: %s' % (errorString)
00967 logging.error(errorString)
00968 sys.exit(0)
00969
00970
00971
00972
00973
00974 _null_locations = pedformat_locations
00975 for _n in _null_locations.keys():
00976 _null_locations[_n] = -999
00977 _null_locations['animal'] = 0
00978 _null_locations['sire'] = 1
00979 _null_locations['dam'] = 2
00980
00981
00982
00983
00984 for _s in _sires.keys():
00985 try:
00986 _i = self.idmap[_s]
00987 except KeyError:
00988 if ( 'S' in self.kw['pedformat'] and str(_s) != str(self.kw['missing_name']) ) or ( 's' in self.kw['pedformat'] and str(_s) != str(self.kw['missing_parent']) ):
00989 print '[NOTE]: Adding sire %s to the pedigree' % ( _s )
00990 an = NewAnimal(_null_locations,[_s,self.kw['missing_parent'],self.kw['missing_parent']],self.kw)
00991
00992 self.pedigree.append(an)
00993 self.idmap[an.animalID] = an.animalID
00994 self.backmap[an.animalID] = an.animalID
00995 self.namemap[an.name] = an.animalID
00996 self.namebackmap[an.animalID] = an.name
00997 logging.info('Added pedigree entry for sire %s' % (_s))
00998 print '[NOTE]: Added pedigree entry for sire %s' % (_s)
00999 for _d in _dams.keys():
01000 try:
01001 _i = self.idmap[_d]
01002 except KeyError:
01003 if ( 'D' in self.kw['pedformat'] and str(_d) != str(self.kw['missing_name']) ) or ( 'd' in self.kw['pedformat'] and str(_d) != str(self.kw['missing_parent']) ):
01004 an = NewAnimal(_null_locations,[_d,self.kw['missing_parent'],self.kw['missing_parent']],self.kw)
01005
01006 self.pedigree.append(an)
01007 self.idmap[an.animalID] = an.animalID
01008 self.backmap[an.animalID] = an.animalID
01009 self.namemap[an.name] = an.animalID
01010 self.namebackmap[an.animalID] = an.name
01011 logging.info('Added pedigree entry for dam %s' % (_d))
01012 print '[NOTE]: Added pedigree entry for dam %s' % (_d)
01013
01014
01015
01016 logging.info('Closing pedigree file')
01017 if textstream == '' and dbstream == '':
01018 infile.close()
01019 elif textstream == '':
01020 del dbstream
01021 del infile
01022 else:
01023 del textstream
01024 del infile
01025
01026
01027
01028
01029
01030
01031
01032 def fromgraph(self,pedgraph):
01033 """
01034 fromgraph() loads the animals to populate the pedigree from an
01035 XDiGraph object.
01036 """
01037 missing = ['sex','generation','gencoeff','birthyear', \
01038 'inbreeding','breed','name','birthdate','alive','age', \
01039 'alleles','herd','userfield']
01040 pedformat_locations = {}
01041 pedformat_locations['animal'] = 0
01042 pedformat_locations['sire'] = 1
01043 pedformat_locations['dam'] = 2
01044 for _m in missing:
01045 pedformat_locations[_m] = -999
01046 for _n in pedgraph.nodes():
01047 _s = self.kw['missing_parent']
01048 _d = self.kw['missing_parent']
01049 for _e in pedgraph.in_edges(_n):
01050 if _e[2] == 's': _s = _e[0]
01051 if _e[2] == 'd': _d = _e[0]
01052 an = NewAnimal(pedformat_locations,[_n,_s,_d],self.kw)
01053 self.pedigree.append(an)
01054 self.idmap[an.animalID] = an.animalID
01055 self.backmap[an.animalID] = an.animalID
01056 self.namemap[an.name] = an.animalID
01057 self.namebackmap[an.animalID] = an.name
01058 logging.info('Added pedigree entry for animal %s' % (_n))
01059
01060
01061
01062
01063
01064
01065
01066 def tostream(self):
01067 """
01068 tostream() creates a text stream from a pedigree.
01069 """
01070 streamout = ''
01071 try:
01072 for p in self.pedigree:
01073 an = p.name
01074 si = p.sireName
01075 da = p.damName
01076 if si == self.kw['missing_name']:
01077 si = self.kw['missing_parent']
01078 if da == self.kw['missing_name']:
01079 da = self.kw['missing_parent']
01080 streamout = '%s%s,%s,%s\\n' % ( streamout, an, si, da )
01081 if self.kw['messages'] == 'verbose':
01082 print '[INFO]: Created text stream from pedigree.'
01083 logging.info('Created text stream from pedigree.')
01084 except:
01085 if self.kw['messages'] == 'verbose':
01086 print '[ERROR]: Could not create text stream from pedigree!'
01087 logging.error('Could not create text stream from pedigree!')
01088 return streamout
01089
01090
01091
01092
01093
01094
01095
01096 def renumber(self):
01097 """
01098 renumber() updates the ID map after a pedigree has been renumbered so that all
01099 references are to renumbered rather than original IDs.
01100 """
01101 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
01102 print '\t[INFO]: Renumbering pedigree at %s' % ( pyp_utils.pyp_nice_time() )
01103 print '\t\t[INFO]: Reordering pedigree at %s' % ( pyp_utils.pyp_nice_time() )
01104 logging.info('Reordering pedigree')
01105 if ( 'b' in self.kw['pedformat'] or 'y' in self.kw['pedformat'] ) and not self.kw['slow_reorder']:
01106 self.pedigree = pyp_utils.fast_reorder(self.pedigree)
01107 else:
01108 self.pedigree = pyp_utils.reorder(self.pedigree,missingparent=self.kw['missing_parent'],max_rounds=self.kw['reorder_max_rounds'])
01109
01110 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
01111 print '\t\t[INFO]: Renumbering at %s' % ( pyp_utils.pyp_nice_time() )
01112 logging.info('Renumbering pedigree')
01113 self.pedigree = pyp_utils.renumber(self.pedigree,missingparent=self.kw['missing_parent'],animaltype=self.kw['animal_type'])
01114 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
01115 print '\t\t[INFO]: Updating ID map at %s' % ( pyp_utils.pyp_nice_time() )
01116 logging.info('Updating ID map')
01117 self.updateidmap()
01118
01119 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
01120 print '\t[INFO]: Assigning offspring at %s' % ( pyp_utils.pyp_nice_time() )
01121 logging.info('Assigning offspring')
01122 pyp_utils.assign_offspring(self)
01123 self.kw['pedigree_is_renumbered'] = 1
01124 self.kw['assign_offspring'] = 1
01125
01126
01127
01128
01129
01130
01131 def addanimal(self,animalID,sireID,damID):
01132
01133
01134
01135 _added = 0
01136 if not self.kw['pedigree_is_renumbered']:
01137 logging.warning('Adding an animal to an unrenumbered pedigree using NewPedigree::addanimal() is unsafe!')
01138 try:
01139 missing = ['sex','generation','gencoeff','birthyear', \
01140 'inbreeding','breed','name','birthdate','alive','age', \
01141 'alleles','herd','userfield']
01142 pedformat_locations = {}
01143 pedformat_locations['animal'] = 0
01144 pedformat_locations['sire'] = 1
01145 pedformat_locations['dam'] = 2
01146 for _m in missing:
01147 pedformat_locations[_m] = -999
01148 l = []
01149 l.append(animalID)
01150 if sireID == 0:
01151 l.append(self.kw['missing_parent'])
01152 elif 'S' in self.kw['pedformat']:
01153
01154 l.append(self.namebackmap[self.backmap[sireID]])
01155 else:
01156 l.append(str(sireID))
01157 if damID == 0:
01158 l.append(self.kw['missing_parent'])
01159 elif 'D' in self.kw['pedformat']:
01160
01161 l.append(self.namebackmap[self.backmap[damID]])
01162 else:
01163 l.append(str(damID))
01164
01165
01166
01167
01168
01169
01170 self.kw['newanimal_caller'] = 'addanimal'
01171 an = NewAnimal(pedformat_locations,l,self.kw)
01172 self.kw['newanimal_caller'] = 'loader'
01173 self.pedigree.append(an)
01174
01175
01176
01177
01178
01179 an.animalID = max(self.idmap.values()) + 1
01180 an.renumberedID = an.animalID
01181 if 'n' in self.kw['pedformat']:
01182 an.name = self.kw['missing_name']
01183 if an.name == an.originalID:
01184 an.name = an.renumberedID
01185 an.sireID = self.idmap[an.sireID]
01186 an.damID = self.idmap[an.damID]
01187
01188 self.idmap[an.originalID] = an.animalID
01189 self.backmap[an.animalID] = an.originalID
01190 self.namemap[an.name] = an.originalID
01191 self.namebackmap[an.originalID] = an.name
01192
01193 _added = 1
01194 except:
01195 _added = 0
01196 return _added
01197
01198
01199
01200
01201
01202
01203
01204
01205
01206
01207 def delanimal(self,animalID):
01208 _deleted = 0
01209 if not self.kw['pedigree_is_renumbered']:
01210 logging.warning('Deleting an animal from an unrenumbered pedigree using NewPedigree::delanimal() is unsafe!')
01211 try:
01212 anidx = self.idmap[animalID]-1
01213 del(self.namebackmap[self.pedigree[anidx].originalID])
01214 del(self.namemap[self.pedigree[anidx].name])
01215 del(self.backmap[self.pedigree[anidx].renumberedID])
01216 del(self.idmap[animalID])
01217 del(self.pedigree[anidx])
01218 _deleted = 1
01219 except:
01220 _deleted
01221 return _deleted
01222
01223
01224
01225
01226
01227
01228
01229 def updateidmap(self):
01230 """
01231 updateidmap() updates the ID map after a pedigree has been renumbered so that
01232 all references are to renumbered rather than original IDs.
01233 """
01234
01235 self.idmap = {}
01236 self.backmap = {}
01237 self.namemap = {}
01238 self.namebackmap = {}
01239 for _a in self.pedigree:
01240 try:
01241
01242
01243
01244
01245
01246 self.idmap[_a.originalID] = _a.animalID
01247 self.backmap[_a.renumberedID] = _a.originalID
01248 if self.kw['animal_type'] == 'new':
01249 self.namemap[_a.name] = _a.originalID
01250 self.namebackmap[_a.originalID] = _a.name
01251
01252 except KeyError:
01253 pass
01254
01255
01256
01257
01258
01259
01260
01261
01262 def printoptions(self):
01263 """
01264 printoptions() prints the contents of the options dictionary.
01265 """
01266 print '%s OPTIONS' % (self.kw['pedname'])
01267 for _k, _v in self.kw.iteritems():
01268 if len(_k) <= 14:
01269 print '\t%s:\t\t%s' % ( _k, _v )
01270 else:
01271 print '\t%s:\t%s' % ( _k, _v )
01272
01273
01274
01275
01276
01277
01278
01279
01280
01281
01282 def simulate(self):
01283 """
01284 Simulate simulates an arbitrary pedigree of size n with g generations
01285 starting from n_s base sires and n_d base dams. This method is based
01286 on the concepts and algorithms in the Pedigree::sample method from
01287 Matvec 1.1a (src/classes/pedigree.cpp), although all of the code in
01288 this implementation was written from scratch.
01289 """
01290
01291 if self.kw['messages'] == 'verbose':
01292 print '[SIMULATE]: Preparing to simulate a pedigree'
01293 logging.info('Preparing to simulate a pedigree')
01294
01295
01296 if len(self.pedigree) > 0:
01297 logging.error('The simulate() method did not create a new randomly-generated pedigree because the pedigree %s has already been populated with animals.', self.kw['pedname'])
01298 if self.kw['messages'] == 'verbose':
01299 print '[ERROR]: The simulate() method did not create a new randomly-generated pedigree because the pedigree %s has already been populated with animals.' % ( self.kw['pedname'] )
01300
01301
01302
01303 self.kw['simulate_n'] = int(self.kw['simulate_n'])
01304 self.kw['simulate_g'] = int(self.kw['simulate_g'])
01305 self.kw['simulate_ns'] = int(self.kw['simulate_ns'])
01306 self.kw['simulate_nd'] = int(self.kw['simulate_nd'])
01307 self.kw['simulate_sr'] = float(self.kw['simulate_sr'])
01308 self.kw['simulate_ir'] = float(self.kw['simulate_ir'])
01309 self.kw['simulate_pmd'] = int(self.kw['simulate_pmd'])
01310 if self.kw['simulate_n'] < 1:
01311 logging.warning('You asked that a pedigree containing an invalid number of animals, %s, be simulated. The default number of animals, 15, is being used.', self.kw['simulate_n'])
01312 if self.kw['messages'] == 'verbose':
01313 print '\t[SIMULATE]: You asked that a pedigree containing an invalid number of animals, %s, be simulated. The default number of animals, 15, is being used.' % ( self.kw['simulate_n'] )
01314 self.kw['simulate_n'] = 15
01315 if self.kw['simulate_g'] < 1:
01316 logging.warning('You asked that a pedigree containing an invalid number of generations, %s, be simulated. The default number of generations, 3, is being used.', self.kw['simulate_g'])
01317 if self.kw['messages'] == 'verbose':
01318 print '\t[SIMULATE]: You asked that a pedigree containing an invalid number of generations, %s, be simulated. The default number of generations, 3, is being used.' % ( self.kw['simulate_g'] )
01319 kw['simulate_g'] = 3
01320 if self.kw['simulate_ns'] < 1:
01321 logging.warning('You asked that a pedigree containing an invalid number of sires, %s, be simulated. The default number of sires, 4, is being used.', self.kw['simulate_ns'])
01322 if self.kw['messages'] == 'verbose':
01323 print '\t[SIMULATE]: You asked that a pedigree containing an invalid number of sires, %s, be simulated. The default number of sires, 4, is being used.' % ( self.kw['simulate_ns'] )
01324 self.kw['simulate_ns'] = 4
01325 if self.kw['simulate_nd'] < 1:
01326 logging.warning('You asked that a pedigree containing an invalid number of dams, %s, be simulated. The default number of dams, 4, is being used.', self.kw['simulate_nd'])
01327 if self.kw['messages'] == 'verbose':
01328 print '\t[SIMULATE]: You asked that a pedigree containing an invalid number of dams, %s, be simulated. The default number of dams, 4, is being used.' % ( self.kw['simulate_nd'] )
01329 self.kw['simulate_nd'] = 4
01330 if self.kw['simulate_sr'] < 0. or self.kw['simulate_sr'] > 1.:
01331 logging.warning('You asked that a pedigree with an invalid sex ratio, %s, be simulated. The default sex ratio, 0.5, is being used.', self.kw['simulate_sr'])
01332 if self.kw['messages'] == 'verbose':
01333 print '\t[SIMULATE]: You asked that a pedigree containing an invalid sex ratio, %s, be simulated. The default sex ratio, 0.5, is being used.' % ( self.kw['simulate_sr'] )
01334 self.kw['simulate_sr'] = 0.5
01335 if self.kw['simulate_ir'] < 0. or self.kw['simulate_ir'] > 1.:
01336 logging.warning('You asked that a pedigree with an invalid immigration rate, %s, be simulated. The default rate, 0.5, is being used.', self.kw['simulate_ir'])
01337 if self.kw['messages'] == 'verbose':
01338 print '\t[SIMULATE]: You asked that a pedigree containing an invalid immigration rate, %s, be simulated. The default rate, 0.5, is being used.' % ( self.kw['simulate_ir'] )
01339 self.kw['simulate_ir'] = 0.5
01340 if self.kw['simulate_ns'] >= self.kw['simulate_n']:
01341 logging.error('You asked that a pedigree with more founder sires than total animals be simulated. This is a fatal error!')
01342 if self.kw['messages'] == 'verbose':
01343 print '\t[SIMULATE]: You asked that a pedigree with more founder sires than total animals be simulated. This is a fatal error!'
01344 sys.exit(0)
01345 if self.kw['simulate_nd'] >= self.kw['simulate_n']:
01346 logging.error('You asked that a pedigree with more founder dams than total animals be simulated. This is a fatal error!')
01347 if self.kw['messages'] == 'verbose':
01348 print '\t[SIMULATE]: You asked that a pedigree with more founder dams than total animals be simulated. This is a fatal error!'
01349 sys.exit(0)
01350 if ( self.kw['simulate_ns'] + self.kw['simulate_ns'] ) > self.kw['simulate_n']:
01351 logging.error('You asked that a pedigree with more founder parents than total animals be simulated. This is a fatal error!')
01352 if self.kw['messages'] == 'verbose':
01353 print '\t[SIMULATE]: You asked that a pedigree with more founder parents than total animals be simulated. This is a fatal error!'
01354 sys.exit(0)
01355 if self.kw['simulate_pmd'] < 1:
01356 logging.warning('You specified an invalid number of maximum parent draws, %s. The default number of maximum draws, 100, is being used.', self.kw['simulate_pmd'])
01357 if self.kw['messages'] == 'verbose':
01358 print '\t[SIMULATE]: You specified an invalid number of maximum parent draws, %s. The default number of maximum draws, 100, is being used.' % ( self.kw['simulate_pmd'] )
01359 kw['simulate_pmd'] = 100
01360
01361 seed = 5048665
01362 numpy.random.seed(seed)
01363
01364
01365
01366
01367
01368
01369
01370 _snt = self.kw['simulate_n'] + 2
01371 _sng = self.kw['simulate_g']
01372 _sns = self.kw['simulate_ns']
01373 _snd = self.kw['simulate_nd']
01374 _ssr = self.kw['simulate_sr']
01375 _smp = self.kw['simulate_mp']
01376 _sir = self.kw['simulate_ir']
01377 _spo = self.kw['simulate_po']
01378 _sfs = self.kw['simulate_fs']
01379 _spmd = self.kw['simulate_pmd']
01380 _sff = int( round( _snt * 0.1 ) )
01381
01382
01383
01384
01385 _smd = int( round( ( _snt - _sns - _snd ) * ( 1 - _ssr ) ) )
01386
01387 _sms = int ( round( ( _snt - _sns - _snd ) * ( _ssr ) ) )
01388
01389
01390
01391
01392
01393
01394
01395 _smf = _snd + _smd + _sff
01396 _smm = _sns + _sms + _sff
01397
01398
01399
01400 _tgdam = _snd
01401 _tdam = _snd
01402 _tgsire = _sns
01403 _tsire = _sns
01404
01405 females = []
01406 males = []
01407
01408
01409
01410
01411
01412
01413
01414
01415 for i in range(_snd):
01416
01417 females.append(i)
01418
01419 for i in range(_sns):
01420 if i == 0:
01421
01422 males.append(0)
01423 else:
01424
01425 males.append(_snd + i)
01426
01427
01428
01429 _totalna = _snd + _sns
01430
01431
01432
01433 _npg = ( ( _snt - _snd - _sns ) / _sng ) + 1
01434
01435
01436 _pedholder = []
01437
01438
01439 for i in range(_snt+1):
01440 _pedholder.append(None)
01441 for i in range(_snd):
01442 _pedholder[i] = SimAnimal(females[i],0,0,'f',0)
01443 for i in range(_snd,_totalna):
01444 _pedholder[i] = SimAnimal(males[i-_snd],0,0,'m',0)
01445
01446
01447
01448
01449
01450 for g in range(_sng):
01451 _tgdam = _tdam
01452 _tgsire = _tsire
01453 if _totalna >= _snt:
01454 break
01455 for _j in range(_npg):
01456 if _totalna >= _snt:
01457 break
01458
01459
01460
01461
01462
01463 if numpy.random.ranf() > _sir:
01464
01465
01466
01467
01468 _ntry = 0
01469 while 1:
01470
01471
01472
01473
01474 _selsire = numpy.random.randint(0,len(males)-1)
01475
01476 _seldam = numpy.random.randint(0,len(females)-1)
01477
01478
01479
01480
01481
01482
01483
01484
01485
01486
01487
01488
01489 if _selsire == 0:
01490 _selsire = 1 - _smp
01491
01492 if _seldam == 0:
01493 _seldam = 1 - _smp
01494
01495
01496
01497
01498 _d = females[_seldam]
01499 _s = males[_selsire]
01500 _dd = _pedholder[_d].damID
01501 _ds = _pedholder[_d].sireID
01502 _sd = _pedholder[_s].damID
01503 _ss = _pedholder[_s].sireID
01504 _tryagain = 0
01505
01506
01507
01508
01509
01510
01511
01512
01513
01514
01515
01516
01517
01518
01519 if not _spo:
01520 if _d > 0 and _sd == _d:
01521
01522 _tryagain = 1
01523 if _s > 0 and _ss == _s:
01524
01525 _tryagain = 1
01526
01527
01528 if not _sfs:
01529 if _ss > 0 and _sd == 0:
01530 if _ss == _ds and _sd == _dd:
01531 _tryagain = 1
01532 if _tryagain:
01533 _ntry = _ntry + 1
01534 if _ntry > _spmd:
01535
01536 _tryagain = 0
01537 _seldam = 0
01538 _selsire = 0
01539
01540
01541 if not _tryagain:
01542 break
01543
01544 else:
01545
01546
01547 _selsire = 0
01548 _seldam = 0
01549
01550 _totalna = _totalna + 1
01551
01552
01553 if numpy.random.ranf() < _ssr and _tgdam < _smf:
01554 _tgdam = _tgdam + 1
01555
01556 females.append(_totalna)
01557 _pedholder[_totalna] = SimAnimal(_totalna,males[_selsire],females[_seldam],'f',g+1)
01558 else:
01559 if _tgsire < _smm:
01560 _tgsire = _tgsire + 1
01561
01562 males.append(_totalna)
01563 _pedholder[_totalna] = SimAnimal(_totalna,males[_selsire],females[_seldam],'m',g+1)
01564 else:
01565 _tgdam = _tgdam + 1
01566
01567 females.append(_totalna)
01568 _pedholder[_totalna] = SimAnimal(_totalna,males[_selsire],females[_seldam],'f',g+1)
01569 _tdam = _tgdam
01570 _tsire = _tgsire
01571
01572
01573
01574
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586
01587 self.kw['pedformat'] = 'asdxg'
01588 pedformat_locations = {}
01589 pedformat_locations['animal'] = 0
01590 pedformat_locations['sire'] = 1
01591 pedformat_locations['dam'] = 2
01592 pedformat_locations['sex'] = 3
01593 pedformat_locations['generation'] = 4
01594 pedformat_locations['gencoeff'] = -999
01595 pedformat_locations['birthyear'] = -999
01596 pedformat_locations['inbreeding'] = -999
01597 pedformat_locations['breed'] = -999
01598 pedformat_locations['name'] = -999
01599 pedformat_locations['birthdate'] = -999
01600 pedformat_locations['alive'] = -999
01601 pedformat_locations['age'] = -999
01602 pedformat_locations['alleles'] = -999
01603 pedformat_locations['herd'] = -999
01604 pedformat_locations['userfield'] = -999
01605
01606
01607 for _ip in _pedholder:
01608 if _ip:
01609
01610
01611 if _ip.animalID <> 0:
01612
01613 l = []
01614 l.append(str(_ip.animalID))
01615
01616
01617
01618
01619 if _ip.sireID == 0:
01620 l.append(self.kw['missing_parent'])
01621 else:
01622 l.append(str(_ip.sireID))
01623 if _ip.damID == 0:
01624 l.append(self.kw['missing_parent'])
01625 else:
01626 l.append(str(_ip.damID))
01627 l.append(_ip.sex)
01628 l.append(_ip.gen)
01629 if self.kw['animal_type'] == 'light':
01630 an = LightAnimal(pedformat_locations,l,self.kw)
01631
01632 self.pedigree.append(an)
01633 self.idmap[an.animalID] = an.animalID
01634 self.backmap[an.animalID] = an.animalID
01635 self.namemap[an.name] = an.animalID
01636 self.namebackmap[an.animalID] = an.name
01637 else:
01638 an = NewAnimal(pedformat_locations,l,self.kw)
01639
01640 self.pedigree.append(an)
01641 self.idmap[an.animalID] = an.animalID
01642 self.backmap[an.animalID] = an.animalID
01643 self.namemap[an.name] = an.animalID
01644 self.namebackmap[an.animalID] = an.name
01645
01646 if self.kw['pedigree_save']:
01647 try:
01648 _sfn = '%s.ped' % ( self.kw['filetag'] )
01649 of = file(_sfn,'w')
01650 for _ip in _pedholder:
01651 if _ip and _ip.animalID <> 0:
01652 _ofl = '%s\n' % ( _ip.stringme() )
01653 of.write(_ofl)
01654 of.close()
01655 except:
01656 pass
01657
01658
01659 del(males)
01660 del(females)
01661 del(_pedholder)
01662
01663
01664 class SimAnimal:
01665 """The simple class is a placeholder used for simulating animals."""
01666
01667
01668
01669
01670
01671
01672
01673
01674 def __init__(self,animalID,sireID=0,damID=0,sex='u',gen=0):
01675 self.animalID = animalID
01676 self.sireID = sireID
01677 self.damID = damID
01678 self.sex = sex
01679 self.gen = gen
01680
01681
01682
01683
01684
01685
01686 def printme(self):
01687 try:
01688 print '\t%s\t%s\t%s\t%s\t%s' % (self.animalID, self.sireID, \
01689 self.damID, self.sex, self.gen)
01690 except:
01691 pass
01692
01693
01694
01695
01696
01697
01698 def stringme(self):
01699 try:
01700 mystring = '%s %s %s %s %s' % (self.animalID, self.sireID, \
01701 self.damID, self.sex, self.gen)
01702 return mystring
01703 except:
01704 return 0
01705
01706
01707
01708 class NewAnimal:
01709 """A simple class to hold animals records read from a pedigree file."""
01710
01711
01712
01713
01714
01715
01716 def __init__(self,locations,data,mykw):
01717 """
01718 __init__() initializes a NewAnimal() object.
01719 """
01720
01721
01722 if locations['animal'] != -999:
01723
01724
01725
01726 if 'A' in mykw['pedformat']:
01727 if mykw['newanimal_caller'] == 'addanimal':
01728 self.name = str(data[locations['animal']])
01729 self.animalID = data[locations['animal']]
01730 self.originalID = data[locations['animal']]
01731 else:
01732
01733
01734
01735 self.animalID = self.string_to_int(data[locations['animal']])
01736 self.originalID = self.string_to_int(data[locations['animal']])
01737 self.name = string.strip(str(data[locations['animal']]))
01738 else:
01739 if locations['name'] != -999:
01740 self.name = string.strip(str(data[locations['name']]))
01741 else:
01742 self.name = string.strip(str(data[locations['animal']]))
01743 self.animalID = int(string.strip(str(data[locations['animal']])))
01744 self.originalID = int(string.strip(str(data[locations['animal']])))
01745 if locations['sire'] != -999 and str(data[locations['sire']]) != str(mykw['missing_parent']):
01746 if 'S' in mykw['pedformat']:
01747 self.sireID = self.string_to_int(data[locations['sire']])
01748 else:
01749
01750
01751
01752 if type(data[locations['sire']]) == 'str':
01753 self.sireID = int(string.strip(data[locations['sire']]))
01754 else:
01755 self.sireID = int(data[locations['sire']])
01756
01757
01758 if type(data[locations['sire']]) == 'str':
01759 self.sireName = string.strip(data[locations['sire']])
01760 else:
01761 self.sireName = string.strip(str(data[locations['sire']]))
01762 else:
01763 self.sireID = mykw['missing_parent']
01764 self.sireName = mykw['missing_name']
01765 if locations['dam'] != -999 and str(data[locations['dam']]) != str(mykw['missing_parent']):
01766 if 'D' in mykw['pedformat']:
01767 self.damID = self.string_to_int(str(data[locations['dam']]))
01768 else:
01769 if type(data[locations['dam']]) == 'str':
01770 self.damID = int(string.strip(data[locations['dam']]))
01771 else:
01772 self.damID = int(data[locations['dam']])
01773 if type(data[locations['dam']]) == 'str':
01774 self.damName = string.strip(data[locations['dam']])
01775 else:
01776 self.damName = string.strip(str(data[locations['dam']]))
01777 else:
01778 self.damID = mykw['missing_parent']
01779 self.damName = mykw['missing_name']
01780 if locations['generation'] != -999:
01781 self.gen = data[locations['generation']]
01782 else:
01783 self.gen = -999
01784 if locations['gencoeff'] != -999.:
01785 self.gencoeff = data[locations['gencoeff']]
01786 else:
01787 self.gencoeff = -999.
01788 if locations['sex'] != -999:
01789 self.sex = string.strip(data[locations['sex']]).lower()
01790 else:
01791 self.sex = mykw['missing_sex']
01792 if locations['birthdate'] != -999:
01793 self.bd = string.strip(data[locations['birthdate']])
01794 else:
01795 self.bd = mykw['missing_bdate']
01796 if locations['birthyear'] != -999:
01797 self.by = int(string.strip(data[locations['birthyear']]))
01798 if self.by == 0:
01799 self.by = mykw['missing_byear']
01800 elif locations['birthyear'] == -999 and locations['birthdate'] != -999:
01801 self.by = int(string.strip(data[locations['birthdate']])[:4])
01802 else:
01803 self.by = mykw['missing_byear']
01804 if locations['inbreeding'] != -999:
01805 self.fa = float(string.strip(data[locations['inbreeding']]))
01806 else:
01807 self.fa = 0.
01808 if locations['breed'] != -999:
01809 self.breed = string.strip(data[locations['breed']])
01810 else:
01811 self.breed = mykw['missing_breed']
01812 if locations['age'] != -999:
01813 self.age = int(string.strip(data[locations['age']]))
01814 else:
01815 self.age = -999
01816 if locations['alive'] != -999:
01817 self.alive = int(string.strip(data[locations['alive']]))
01818 else:
01819 self.alive = 0
01820 if locations['herd'] != -999:
01821 if 'H' in mykw['pedformat']:
01822 self.herd = self.string_to_int(data[locations['herd']])
01823 else:
01824 self.herd = int(string.strip(data[locations['herd']]))
01825 self.originalHerd = string.strip(data[locations['herd']])
01826 else:
01827 self.herd = self.string_to_int(mykw['missing_herd'])
01828 self.originalHerd = mykw['missing_herd']
01829 self.renumberedID = -999
01830 self.igen = -999
01831 if str(self.sireID) == str(mykw['missing_parent']) and str(self.damID) == str(mykw['missing_parent']):
01832 self.founder = 'y'
01833 else:
01834 self.founder = 'n'
01835 self.paddedID = self.pad_id()
01836 self.ancestor = 0
01837 self.sons = {}
01838 self.daus = {}
01839 self.unks = {}
01840
01841
01842 if locations['alleles'] != -999:
01843 self.alleles = [string.split(data[locations['alleles']], self.kw['alleles_sepchar'])[0], \
01844 string.split(data[locations['alleles']],self.kw['alleles_sepchar'])[1]]
01845 else:
01846
01847 if self.founder == 'y':
01848 _allele_1 = '%s%s' % (self.paddedID,'__1')
01849 _allele_2 = '%s%s' % (self.paddedID,'__2')
01850 self.alleles = [_allele_1,_allele_2]
01851
01852 elif self.sireID == mykw['missing_parent']:
01853 _allele_1 = '%s%s' % (self.paddedID,'__1')
01854 _allele_2 = ''
01855 self.alleles = [_allele_1,_allele_2]
01856 elif self.damID == mykw['missing_parent']:
01857 _allele_1 = ''
01858 _allele_2 = '%s%s' % (self.paddedID,'__2')
01859 self.alleles = [_allele_1,_allele_2]
01860 else:
01861 self.alleles = ['','']
01862 self.pedcomp = -999.9
01863 if locations['userfield'] != -999:
01864 self.userField = string.strip(data[locations['userfield']])
01865 else:
01866 self.userField = ''
01867
01868
01869
01870
01871
01872 def printme(self):
01873 """
01874 Print the contents of an animal record - used for debugging.
01875 """
01876 print 'ANIMAL %s RECORD' % (self.animalID)
01877 print '\tAnimal ID:\t%s' % (self.animalID)
01878 print '\tAnimal name:\t%s' % (self.name)
01879 print '\tSire ID:\t%s' % (self.sireID)
01880 print '\tSire name:\t%s' % (self.sireName)
01881 print '\tDam ID:\t\t%s' % (self.damID)
01882 print '\tDam name:\t%s' % (self.damName)
01883 print '\tGeneration:\t%s' % (self.gen)
01884 print '\tGen coeff:\t%s' % (self.gencoeff)
01885 print '\tInferred gen.:\t%s' % (self.igen)
01886 print '\tBirth Year:\t%s' % (self.by)
01887 print '\tBirth Date:\t%s' % (self.bd)
01888 print '\tSex:\t\t%s' % (self.sex)
01889 print '\tCoI (f_a):\t%s' % (self.fa)
01890 print '\tFounder:\t%s' % (self.founder)
01891 print '\tSons:\t\t%s' % (self.sons)
01892 print '\tDaughters:\t%s' % (self.daus)
01893 print '\tUnknowns:\t%s' % (self.unks)
01894 print '\tAncestor:\t%s' % (self.ancestor)
01895 print '\tAlleles:\t%s' % (self.alleles)
01896 print '\tOriginal ID:\t%s' % (self.originalID)
01897 print '\tRenumbered ID:\t%s' % (self.renumberedID)
01898 print '\tPedigree Comp.:\t%s' % (self.pedcomp)
01899 print '\tBreed:\t%s' % (self.breed)
01900 print '\tAge:\t%s' % (self.age)
01901 print '\tAlive:\t%s' % (self.alive)
01902 print '\tHerd:\t%s' % (self.herd)
01903 print '\tHerd name:\t%s' % (self.originalHerd)
01904
01905
01906
01907
01908 def stringme(self):
01909 """
01910 Return the contents of an animal record as a string.
01911 """
01912 _me = ''
01913 _me = '%s%s' % ( _me, 'ANIMAL %s RECORD\n' % (int(self.animalID)) )
01914 _me = '%s%s' % ( _me, '\tAnimal ID:\t%s\n' % (int(self.animalID)) )
01915 _me = '%s%s' % ( _me, '\tAnimal name:\t%s\n' % (self.name) )
01916 _me = '%s%s' % ( _me, '\tSire ID:\t%s\n' % (int(self.sireID)) )
01917 _me = '%s%s' % ( _me, '\tSire name:\t%s\n' % (self.sireName) )
01918 _me = '%s%s' % ( _me, '\tDam ID:\t\t%s\n' % (int(self.damID)) )
01919 _me = '%s%s' % ( _me, '\tDam name:\t%s\n' % (self.damName) )
01920 _me = '%s%s' % ( _me, '\tGeneration:\t%s\n' % (self.gen) )
01921 _me = '%s%s' % ( _me, '\tGen coeff:\t%s\n' % (self.gencoeff) )
01922 _me = '%s%s' % ( _me, '\tInferred gen.:\t%s\n' % (self.igen) )
01923 _me = '%s%s' % ( _me, '\tBirth Year:\t%s\n' % (int(self.by)) )
01924 _me = '%s%s' % ( _me, '\tBirth Date:\t%s\n' % (int(self.bd)) )
01925 _me = '%s%s' % ( _me, '\tSex:\t\t%s\n' % (self.sex) )
01926 _me = '%s%s' % ( _me, '\tCoI (f_a):\t%s\n' % (self.fa) )
01927 _me = '%s%s' % ( _me, '\tFounder:\t%s\n' % (self.founder) )
01928 _me = '%s%s' % ( _me, '\tSons:\t\t%s\n' % (self.sons) )
01929 _me = '%s%s' % ( _me, '\tDaughters:\t%s\n' % (self.daus) )
01930 _me = '%s%s' % ( _me, '\tUnknowns:\t%s\n' % (self.unks) )
01931 _me = '%s%s' % ( _me, '\tAncestor:\t%s\n' % (self.ancestor) )
01932 _me = '%s%s' % ( _me, '\tAlleles:\t%s\n' % (self.alleles) )
01933 _me = '%s%s' % ( _me, '\tOriginal ID:\t%s\n' % (self.originalID) )
01934 _me = '%s%s' % ( _me, '\tRenumbered ID:\t%s\n' % (self.renumberedID) )
01935 _me = '%s%s' % ( _me, '\tPedigree Comp.:\t%s\n' % (self.pedcomp) )
01936 _me = '%s%s' % ( _me, '\tBreed:\t%s' % (self.breed) )
01937 _me = '%s%s' % ( _me, '\tAge:\t%s' % (self.age) )
01938 _me = '%s%s' % ( _me, '\tAlive:\t%s' % (self.alive) )
01939 _me = '%s%s' % ( _me, '\tHerd:\t%s\n' % (self.herd) )
01940 _me = '%s%s' % ( _me, '\tHerd name:\t%s\n' % (self.originalHerd) )
01941 return _me
01942
01943
01944
01945
01946
01947 def dictme(self):
01948 """
01949 Return the contents of an animal record in a dictionary.
01950 """
01951 try:
01952 _dict = {}
01953 _dict[animalID] = self.animalID
01954 _dict[name] = self.name
01955 _dict[sireID] = self.sireID
01956 _dict[sireName] = self.sireName
01957 _dict[damID] = self.damID
01958 _dict[damName] = self.damName
01959 _dict[gen] = self.gen
01960 _dict[gencoeff] = self.gencoeff
01961 _dict[igen] = self.igen
01962 _dict[by] = self.by
01963 _dict[bd] = self.bd
01964 _dict[sex] = self.sex
01965 _dict[fa] = self.fa
01966 _dict[founder] = self.founder
01967 _dict[sons] = self.sons
01968 _dict[daus] = self.daus
01969 _dict[unks] = self.unks
01970 _dict[ancestor] = self.ancestor
01971 _dict[alleles] = self.alleles
01972 _dict[originalID] = self.originalID
01973 _dict[renumberedID] = self.renumberedID
01974 _dict[pedcomp] = self.pedcomp
01975 _dict[breed] = self.breed
01976 _dict[age] = self.age
01977 _dict[alive] = self.alive
01978 _dict[herd] = self.herd
01979 _dict[originalHerd] = self.originalHerd
01980 return _dict
01981 except:
01982 return {}
01983
01984
01985
01986
01987 def trap(self):
01988 """
01989 Trap common errors in pedigree file entries.
01990 """
01991 if int(self.animalID) == int(self.sireID):
01992 print '[ERROR]: Animal %s has an ID number equal to its sire\'s ID (sire ID %s).\n' % (self.animalID,self.sireID)
01993 if int(self.animalID) == int(self.damID):
01994 print '[ERROR]: Animal %s has an ID number equal to its dam\'s ID (dam ID %s).\n' % (self.animalID,self.damID)
01995 if int(self.animalID) < int(self.sireID):
01996 print '[ERROR]: Animal %s is older than its sire (sire ID %s).\n' % (self.animalID,self.sireID)
01997 if int(self.animalID) < int(self.damID):
01998 print '[ERROR]: Animal %s is older than its dam (dam ID %s).\n' % (self.animalID,self.damID)
01999
02000
02001
02002
02003
02004
02005
02006 def pad_id(self):
02007 """
02008 Take an Animal ID, pad it to fifteen digits, and prepend the birthyear (or 1900
02009 if the birth year is unknown). The order of elements is: birthyear, animalID,
02010 count of zeros, zeros.
02011 """
02012
02013 l = len(str(self.animalID))
02014 pl = 15 - l - 1
02015 if pl > 0:
02016 zs = '0'*pl
02017 pid = '%s%s%s%s' % (self.by,zs,self.animalID,l)
02018 else:
02019 pid = '%s%s%s' % (self.by,self.animalID,l)
02020 return pid
02021
02022
02023
02024
02025 def string_to_int(self,idstring,mymaxint=9223372036854775807):
02026 """
02027 Convert a string to an integer.
02028 """
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040 import md5
02041 try:
02042
02043 md5hash = md5.md5(idstring)
02044 result = string.atoi(md5hash.hexdigest(),16)
02045 except:
02046
02047
02048
02049
02050
02051
02052
02053
02054
02055
02056
02057 shift = 6
02058 mask = ~0 << ( 31 - shift )
02059 result = 0
02060 for c in idstring:
02061 result = ( ( result & mask ) ^ result << shift ^ ord(c) ) & mymaxint
02062 return result
02063
02064
02065
02066
02067
02068
02069 class LightAnimal:
02070 """
02071 The LightAnimal() class holds animals records read from a pedigree file. It
02072 is a much simpler object than the NewAnimal() object and is intended for use
02073 with the graph theoretic routines in pyp_network. The only attributes of these
02074 objects are: animal ID, sire ID, dam ID, original ID, birth year, and sex.A simple class to
02075 hold animals records read from a pedigree file.
02076 """
02077
02078
02079
02080
02081
02082
02083 def __init__(self,locations,data,mykw):
02084 """
02085 __init__() initializes a LightAnimal() object.
02086 """
02087 if locations['animal'] != -999:
02088
02089
02090
02091 if 'A' in mykw['pedformat']:
02092 self.animalID = self.string_to_int(data[locations['animal']])
02093 self.originalID = self.string_to_int(data[locations['animal']])
02094 else:
02095 self.animalID = string.strip(data[locations['animal']])
02096 self.originalID = string.strip(data[locations['animal']])
02097 if locations['sire'] != -999 and string.strip(data[locations['sire']]) != mykw['missing_parent']:
02098 if 'S' in mykw['pedformat']:
02099 if data[locations['sire']] == mykw['missing_parent']:
02100 self.sireID = data[locations['sire']]
02101 else:
02102 self.sireID = self.string_to_int(data[locations['sire']])
02103 else:
02104 self.sireID = string.strip(data[locations['sire']])
02105 else:
02106
02107 self.sireID = mykw['missing_parent']
02108 if locations['dam'] != -999 and string.strip(data[locations['dam']]) != mykw['missing_parent']:
02109 if 'D' in mykw['pedformat']:
02110 if data[locations['dam']] == mykw['missing_parent']:
02111 self.damID = data[locations['dam']]
02112 else:
02113 self.damID = self.string_to_int(data[locations['dam']])
02114 else:
02115 self.damID = string.strip(data[locations['dam']])
02116 else:
02117
02118 self.damID = mykw['missing_parent']
02119 if locations['sex'] != -999:
02120 self.sex = string.strip(data[locations['sex']])
02121 else:
02122 self.sex = 'u'
02123 if locations['birthyear'] != -999:
02124 self.by = int(string.strip(data[locations['birthyear']]))
02125 if self.by == 0:
02126 self.by = mykw['missing_byear']
02127 elif locations['birthyear'] == -999 and locations['birthdate'] != -999:
02128 self.by = int(string.strip(data[locations['birthdate']]))
02129 self.by = self.by[:4]
02130 else:
02131 self.by = mykw['missing_byear']
02132 self.paddedID = self.pad_id()
02133
02134
02135
02136
02137 def printme(self):
02138 """
02139 Print the contents of an animal record - used for debugging.
02140 """
02141 print 'ANIMAL %s RECORD' % (int(self.animalID))
02142 print '\tAnimal ID:\t%s' % (int(self.animalID))
02143 print '\tSire ID:\t%s' % (int(self.sireID))
02144 print '\tDam ID:\t\t%s' % (int(self.damID))
02145 print '\tBirth Year:\t%s' % (int(self.by))
02146 print '\tSex:\t\t%s' % (self.sex)
02147 print '\tOriginal ID:\t%s' % (self.originalID)
02148 print '\tRenumbered ID:\t%s' % (self.renumberedID)
02149
02150
02151
02152
02153 def stringme(self):
02154 """
02155 Return the contents of an animal record as a string.
02156 """
02157 _me = ''
02158 _me = '%s%s' % ( _me, 'ANIMAL %s RECORD\n' % (int(self.animalID)) )
02159 _me = '%s%s' % ( _me, '\tAnimal ID:\t%s\n' % (int(self.animalID)) )
02160 _me = '%s%s' % ( _me, '\tSire ID:\t%s\n' % (int(self.sireID)) )
02161 _me = '%s%s' % ( _me, '\tDam ID:\t\t%s\n' % (int(self.damID)) )
02162 _me = '%s%s' % ( _me, '\tBirth Year:\t%s\n' % (int(self.by)) )
02163 _me = '%s%s' % ( _me, '\tSex:\t\t%s\n' % (self.sex) )
02164 _me = '%s%s' % ( _me, '\tOriginal ID:\t%s\n' % (self.originalID) )
02165 _me = '%s%s' % ( _me, '\tRenumbered ID:\t%s\n' % (self.renumberedID) )
02166 return _me
02167
02168
02169
02170
02171
02172 def dictme(self):
02173 """
02174 Return the contents of an animal record in a dictionary.
02175 """
02176 try:
02177 _dict = {}
02178 _dict[animalID] = self.animalID
02179 _dict[sireID] = self.sireID
02180 _dict[damID] = self.damID
02181 _dict[by] = self.by
02182 _dict[sex] = self.sex
02183 _dict[originalID] = self.originalID
02184 _dict[renumberedID] = self.renumberedID
02185 return _dict
02186 except:
02187 return {}
02188
02189
02190
02191
02192 def trap(self):
02193 """
02194 Trap common errors in pedigree file entries.
02195 """
02196 if int(self.animalID) == int(self.sireID):
02197 print '[ERROR]: Animal %s has an ID number equal to its sire\'s ID (sire ID %s).\n' % (self.animalID,self.sireID)
02198 if int(self.animalID) == int(self.damID):
02199 print '[ERROR]: Animal %s has an ID number equal to its dam\'s ID (dam ID %s).\n' % (self.animalID,self.damID)
02200 if int(self.animalID) < int(self.sireID):
02201 print '[ERROR]: Animal %s is older than its sire (sire ID %s).\n' % (self.animalID,self.sireID)
02202 if int(self.animalID) < int(self.damID):
02203 print '[ERROR]: Animal %s is older than its dam (dam ID %s).\n' % (self.animalID,self.damID)
02204
02205
02206
02207
02208
02209
02210
02211 def pad_id(self):
02212 """
02213 Take an Animal ID, pad it to fifteen digits, and prepend the birthyear (or 1900
02214 if the birth year is unknown). The order of elements is: birthyear, animalID,
02215 count of zeros, zeros.
02216 """
02217
02218 l = len(self.animalID)
02219 pl = 15 - l - 1
02220 if pl > 0:
02221 zs = '0'*pl
02222 pid = '%s%s%s%s' % (self.by,zs,self.animalID,l)
02223 else:
02224 pid = '%s%s%s' % (self.by,self.animalID,l)
02225 return pid
02226
02227
02228
02229
02230 def string_to_int(self,idstring,mymaxint=9223372036854775807):
02231 """
02232 Convert a string to an integer.
02233 """
02234
02235
02236
02237
02238
02239
02240
02241
02242
02243
02244
02245 import md5
02246 try:
02247
02248 md5hash = md5.md5(idstring)
02249 result = string.atoi(md5hash.hexdigest(),16)
02250 except:
02251
02252
02253
02254
02255
02256
02257
02258
02259
02260
02261
02262 shift = 6
02263 mask = ~0 << ( 31 - shift )
02264 result = 0
02265 for c in idstring:
02266 result = ( ( result & mask ) ^ result << shift ^ ord(c) ) & mymaxint
02267 return result
02268
02269
02270
02271
02272 class PedigreeMetadata:
02273 """A class to hold metadata about pedigrees. Hopefully this will help improve performance in some procedures, as well as
02274 provide some useful summary data."""
02275
02276
02277
02278
02279
02280
02281
02282 def __init__(self,myped,kw):
02283 """
02284 Initialize a pedigree record.
02285 """
02286 self.kw = kw
02287 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02288 print '\t[INFO]: Instantiating a new PedigreeMetadata() object...'
02289 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02290 print '\t[INFO]: Naming the Pedigree()...'
02291 self.name = kw['pedname']
02292 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02293 print '\t[INFO]: Assigning a filename...'
02294 self.filename = kw['pedfile']
02295 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02296 print '\t[INFO]: Attaching a pedigree...'
02297 self.myped = myped
02298 if kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02299 print '\t[INFO]: Setting the pedcode...'
02300 self.pedcode = kw['pedformat']
02301 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02302 print '\t[INFO]: Counting the number of animals in the pedigree...'
02303 self.num_records = len(self.myped)
02304 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02305 print '\t[INFO]: Counting and finding unique sires...'
02306 self.num_unique_sires, self.unique_sire_list = self.nus()
02307 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02308 print '\t[INFO]: Counting and finding unique dams...'
02309 self.num_unique_dams, self.unique_dam_list = self.nud()
02310 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02311 print '\t[INFO]: Setting renumbered flag...'
02312 self.renumbered = kw['renumber']
02313 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02314 print '\t[INFO]: Counting and finding unique generations...'
02315 self.num_unique_gens, self.unique_gen_list = self.nug()
02316 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02317 print '\t[INFO]: Counting and finding unique birthyears...'
02318 self.num_unique_years, self.unique_year_list = self.nuy()
02319 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02320 print '\t[INFO]: Counting and finding unique founders...'
02321 self.num_unique_founders, self.unique_founder_list = self.nuf()
02322 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02323 print '\t[INFO]: Counting and finding unique herds...'
02324 self.num_unique_herds, self.unique_herd_list = self.nuherds()
02325 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary']:
02326 print '\t[INFO]: Detaching pedigree...'
02327 self.myped = []
02328
02329
02330
02331 def printme(self):
02332 """
02333 Print the pedigree metadata.
02334 """
02335 print 'Metadata for %s (%s)' % (self.name,self.filename)
02336 print '\tRecords:\t\t%s' % (self.num_records)
02337 print '\tUnique Sires:\t\t%s' % (self.num_unique_sires)
02338 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02339 print '\tSires:\t\t%s' % (self.unique_sire_list)
02340 print '\tUnique Dams:\t\t%s' % (self.num_unique_dams)
02341 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02342 print '\tDams:\t\t%s' % (self.unique_dam_list)
02343 print '\tUnique Gens:\t\t%s' % (self.num_unique_gens)
02344 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02345 print '\tGenerations:\t\t%s' % (self.unique_gen_list)
02346 print '\tUnique Years:\t\t%s' % (self.num_unique_years)
02347 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02348 print '\tYear:\t\t%s' % (self.unique_year_list)
02349 print '\tUnique Founders:\t%s' % (self.num_unique_founders)
02350 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02351 print '\tFounders:\t\t%s' % (self.unique_founder_list)
02352 print '\tUnique Herds:\t\t%s' % (self.num_unique_herds)
02353 if self.kw['messages'] == 'verbose' and self.kw['pedigree_summary'] > 1:
02354 print '\tHerds:\t\t%s' % (self.unique_herd_list)
02355 print '\tPedigree Code:\t\t%s' % (self.pedcode)
02356
02357
02358
02359
02360
02361 def stringme(self):
02362 """
02363 Build a string from the pedigree metadata.
02364 """
02365 _me = ''
02366 _me = '%s%s' % ( _me, 'PEDIGREE %s (%s)\n' % (self.name,self.filename) )
02367 _me = '%s%s' % ( _me, '\tRecords:\t\t\t%s\n' % (self.num_records) )
02368 _me = '%s%s' % ( _me, '\tUnique Sires:\t\t%s\n' % (self.num_unique_sires) )
02369 _me = '%s%s' % ( _me, '\tUnique Dams:\t\t%s\n' % (self.num_unique_dams) )
02370 _me = '%s%s' % ( _me, '\tUnique Gens:\t\t%s\n' % (self.num_unique_gens) )
02371 _me = '%s%s' % ( _me, '\tUnique Years:\t\t%s\n' % (self.num_unique_years) )
02372 _me = '%s%s' % ( _me, '\tUnique Founders:\t%s\n' % (self.num_unique_founders) )
02373 _me = '%s%s' % ( _me, '\tUnique Herds:\t%s\n' % (self.num_unique_herds) )
02374 _me = '%s%s' % ( _me, '\tPedigree Code:\t\t%s\n' % (self.pedcode) )
02375 return _me
02376
02377
02378
02379 def fileme(self):
02380 """
02381 Save the pedigree metadata to a file.
02382 """
02383 outputfile = '%s%s%s' % (self.name,'_ped_metadata_','.dat')
02384 aout = open(outputfile,'w')
02385 line = '='*60+'\n'
02386 aout.write('%s\n' % line)
02387 aout.write('PEDIGREE %s (%s)\n' % self.name,self.filename)
02388 aout.write('\tPedigree Code:\t%s\n' % self.pedcode)
02389 aout.write('\tRecords:\t%s\n' % self.num_records)
02390 aout.write('\tUnique Sires:\t%s\n' % self.num_unique_sires)
02391 aout.write('\tUnique Dams:\t%s\n' % self.num_unique_dams)
02392 aout.write('\tUnique Founders:\t%s\n' % self.num_unique_founders)
02393 aout.write('\tUnique Gens:\t%s\n' % self.num_unique_gens)
02394 aout.write('\tUnique Years:\t%s\n' % self.num_unique_years)
02395 aout.write('\tUnique Herds:\t%s\n' % self.num_unique_herds)
02396 aout.write('%s\n' % line)
02397 aout.write('\tUnique Sire List:\t%s\n' % self.unique_sire_list)
02398 aout.write('\tUnique Dam List:\t%s\n' % self.unique_dam_list)
02399 aout.write('\tUnique Founder List:\t%s\n' % self.unique_founder_list)
02400 aout.write('\tUnique Gen List:\t%s\n' % self.unique_gen_list)
02401 aout.write('\tUnique Year List:\t%s\n' % self.unique_year_list)
02402 aout.write('\tUnique Herd List:\t%s\n' % self.unique_herd_list)
02403 aout.write('%s\n' % line)
02404 aout.close()
02405
02406
02407
02408
02409
02410
02411 def nus(self):
02412 """
02413 Count the number of unique sire IDs in the pedigree. Returns an integer count
02414 and a Python list of the unique sire IDs.
02415 """
02416 sirelist = set([x.sireID for x in self.myped if x.sireID != self.kw['missing_parent']])
02417 return len(sirelist), sirelist
02418
02419
02420
02421
02422
02423 def nud(self):
02424 """
02425 Count the number of unique dam IDs in the pedigree. Returns an integer count
02426 and a Python list of the unique dam IDs.
02427 """
02428 damlist = set([x.damID for x in self.myped if x.damID != self.kw['missing_parent']])
02429 return len(damlist), damlist
02430
02431
02432
02433
02434
02435
02436 def nug(self):
02437 """
02438 Count the number of unique generations in the pedigree. Returns an integer
02439 count and a Python list of the unique generations.
02440 """
02441 if self.kw['animal_type'] == 'light':
02442 genlist = []
02443 else:
02444 genlist = set([x.gen for x in self.myped])
02445 return len(genlist), genlist
02446
02447
02448
02449
02450
02451 def nuy(self):
02452 """
02453 Count the number of unique birth years in the pedigree. Returns an integer
02454 count and a Python list of the unique birth years.
02455 """
02456 bylist = set([x.by for x in self.myped])
02457 return len(bylist), bylist
02458
02459
02460
02461
02462
02463 def nuf(self):
02464 """
02465 Count the number of unique founders in the pedigree.
02466 """
02467 if self.kw['animal_type'] == 'light':
02468
02469
02470 flist = set([x.animalID for x in self.myped if x.sireID == self.kw['missing_parent'] and x.damID == self.kw['missing_parent']])
02471 else:
02472 flist = set([x.animalID for x in self.myped if x.founder == 'y'])
02473 return len(flist), flist
02474
02475
02476
02477
02478
02479
02480 def nuherds(self):
02481 """
02482 Count the number of unique herds in the pedigree.
02483 """
02484 if self.kw['animal_type'] == 'light':
02485 herdlist = []
02486 else:
02487 herdlist = set([x.originalHerd for x in self.myped])
02488 return len(herdlist), herdlist
02489
02490
02491
02492
02493
02494
02495
02496 class NewAMatrix:
02497
02498
02499
02500
02501
02502
02503 def __init__(self,kw):
02504 """
02505 Initialize a new numerator relationship matrix.
02506 """
02507 if not kw.has_key('messages'): kw['messages'] = 'verbose'
02508 if not kw.has_key('nrm_method'): kw['nrm_method'] = 'nrm'
02509
02510 if not kw.has_key('nrm_format'): kw['nrm_format'] = 'text'
02511 self.kw = kw
02512
02513
02514
02515
02516
02517
02518
02519 def form_a_matrix(self,pedigree):
02520 """
02521 form_a_matrix() calls pyp_nrm/fast_a_matrix() or pyp_nrm/fast_a_matrix_r()
02522 to form a NRM from a pedigree.
02523 """
02524 if self.kw['nrm_method'] not in ['nrm','frm']:
02525 self.kw['nrm_method'] = 'nrm'
02526 if self.kw['messages'] == 'verbose':
02527 print '[INFO]: Forming A-matrix from pedigree at %s.' % ( pyp_utils.pyp_nice_time() )
02528 logging.info('Forming A-matrix from pedigree')
02529
02530
02531 if self.kw['nrm_method'] == 'nrm':
02532 try:
02533 self.nrm = pyp_nrm.fast_a_matrix(pedigree,self.kw)
02534 if self.kw['messages'] == 'verbose':
02535 print '[INFO]: Formed A-matrix from pedigree using pyp_nrm.fast_a_matrix() at %s.' % ( pyp_utils.pyp_nice_time() )
02536 logging.info('Formed A-matrix from pedigree using pyp_nrm.fast_a_matrix()')
02537 except:
02538 if self.kw['messages'] == 'verbose':
02539 print '[ERROR]: Unable to form A-matrix from pedigree using pyp_nrm.fast_a_matrix() at %s.' % ( pyp_utils.pyp_nice_time() )
02540 logging.error('Unable to form A-matrix from pedigree using pyp_nrm.fast_a_matrix()')
02541 return 0
02542
02543
02544 else:
02545 try:
02546 self.nrm = pyp_nrm.fast_a_matrix_r(pedigree,self.kw)
02547 if self.kw['messages'] == 'verbose':
02548 print '[INFO]: Formed A-matrix from pedigree using pyp_nrm.fast_a_matrix_r() at %s.' % ( pyp_utils.pyp_nice_time() )
02549 logging.info('Formed A-matrix from pedigree using pyp_nrm.fast_a_matrix_r()')
02550 except:
02551 if self.kw['messages'] == 'verbose':
02552 print '[ERROR]: Unable to form A-matrix from pedigree using pyp_nrm.fast_a_matrix_r() at %s.' % ( pyp_utils.pyp_nice_time() )
02553 logging.error('Unable to form A-matrix from pedigree using pyp_nrm.fast_a_matrix_r()')
02554 return 0
02555
02556
02557
02558
02559
02560
02561
02562 def load(self,nrm_filename):
02563 """
02564 load() uses the Numarray Array Function "fromfile()" to load an array from a
02565 binary file. If the load is successful, self.nrm contains the matrix.
02566 """
02567 import math
02568 if self.kw['messages'] == 'verbose':
02569 print '[INFO]: Loading A-matrix from file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02570 logging.info('Loading A-matrix from file %s', nrm_filename)
02571 try:
02572 self.nrm = numpy.fromfile(nrm_filename,dtype='Float64',sep=self.kw['sepchar'])
02573 self.nrm = numpy.reshape( self.nrm, ( int(math.sqrt(self.nrm.shape[0])), int(math.sqrt(self.nrm.shape[0])) ) )
02574 if self.kw['messages'] == 'verbose':
02575 print '[INFO]: A-matrix successfully loaded from file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02576 logging.info('A-matrix successfully loaded from file %s', nrm_filename)
02577 return 1
02578 except:
02579 if self.kw['messages'] == 'verbose':
02580 print '[ERROR]: Unable to load A-matrix from file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02581 logging.error('Unable to load A-matrix from file %s', nrm_filename)
02582 return 0
02583
02584
02585
02586
02587
02588
02589 def save(self,nrm_filename,nrm_format=''):
02590 """
02591 save() uses the NumPy method "tofile()" to save an array to a binary file.
02592 """
02593 if self.kw['messages'] == 'verbose':
02594 print '[INFO]: Saving A-matrix to file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02595 logging.info('Saving A-matrix to file %s', nrm_filename)
02596 try:
02597
02598
02599 if nrm_format == '':
02600 nrm_format = self.kw['nrm_format']
02601
02602
02603 if nrm_format == 'binary':
02604 self.nrm.tofile(nrm_filename)
02605 else:
02606 self.nrm.tofile(nrm_filename, sep=self.kw['sepchar'])
02607 if self.kw['messages'] == 'verbose':
02608 print '[INFO]: A-matrix successfully saved to file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02609 logging.info('A-matrix successfully saved to file %s', nrm_filename)
02610 return 1
02611 except:
02612 if self.kw['messages'] == 'verbose':
02613 print '[ERROR]: Unable to save A-matrix to file %s at %s.' % ( nrm_filename, pyp_utils.pyp_nice_time() )
02614 logging.error('Unable to save A-matrix to file %s', nrm_filename)
02615 return 0
02616
02617
02618
02619
02620
02621
02622 def printme(self):
02623 """
02624 printme() prints the NRM to the screen.
02625 """
02626 try:
02627 print self.nrm
02628 except:
02629 pass
02630
02631
02632
02633
02634
02635
02636
02637
02638
02639
02640
02641
02642
02643 def loadPedigree(options='',optionsfile='pypedal.ini',pedsource='file',pedgraph=0,pedstream='',debugLoad=False):
02644 """
02645 loadPedigree() wraps pedigree creation and loading into a one-step
02646 process. If the user passes both a dictionary and a filename, the
02647 dictionary will be used instead of the filename unless the dictionary
02648 is empty.
02649 """
02650 if debugLoad == True:
02651 print '[DEBUG]: Debugging pyp_newclasses/loadPedigree()...'
02652 _pedigree = NewPedigree(kw=options,kwfile=optionsfile)
02653 _pedigree.load(pedsource=pedsource,pedgraph=pedgraph,pedstream=pedstream)
02654 return _pedigree
02655 else:
02656 try:
02657 _pedigree = NewPedigree(kw=options,kwfile=optionsfile)
02658 _pedigree.load(pedsource=pedsource,pedgraph=pedgraph,pedstream=pedstream)
02659 return _pedigree
02660 except:
02661 print '[ERROR]: pyp_newclasses.loadPedigree() was unable to instantiate and load the pedigree.'
02662 return 0
02663
02664
02665
02666
02667
02668
02669 class PyPedalError(Exception):
02670 """PyPedalError is the base class for exceptions in PyPedal."""
02671 pass
02672
02673
02674
02675
02676 class PyPedalPedigreeInputFileNameError(PyPedalError):
02677 """PyPedalPedigreeInputFileNameError is raised when a simulated pedigree
02678 is not requested and a pedigree file name is not provided.
02679 """
02680 def __init__(self):
02681 self.message = 'You did not request that a pedigree be simulated, and you did not provide the name of a pedigree file to be read.'
02682 def __str__(self):
02683 return repr(self.message)
02684
02685
02686
02687
02688
02689
02690
02691 class tail_recursive(object):
02692 CONTINUE = object()
02693 def __init__(self, func):
02694 self.func = func
02695 self.firstcall = True
02696 def __call__(self, *args, **kwd):
02697 try:
02698 if self.firstcall:
02699 self.firstcall = False
02700 while True:
02701 result = self.func(*args, **kwd)
02702 if result is self.CONTINUE:
02703 args, kwd = self.argskwd
02704 else:
02705 break
02706 else:
02707 self.argskwd = args, kwd
02708 return self.CONTINUE
02709 except:
02710 self.firstcall = True
02711 raise
02712 else:
02713 self.firstcall = True
02714 return result