00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045 import copy, logging, numpy, os, string, sys, time
00046 import pyp_demog
00047 import pyp_metrics
00048 import pyp_newclasses
00049
00050
00051
00052
00053
00054
00055
00056
00057 def set_ancestor_flag(pedobj):
00058 """
00059 set_ancestor_flag() loops through a pedigree to build a dictionary of all of the
00060 parents in the pedigree. It then sets the ancestor flags for the parents.
00061 set_ancestor_flag() expects a reordered and renumbered pedigree as input!
00062 """
00063 try:
00064 parents = {}
00065 l = len(pedobj.pedigree)
00066 if l < 2:
00067 print '[ERROR]: pedobj.pedigree only contains one record -- nothing to do in set_ancestor_flag()!'
00068 return
00069
00070 pedobj.pedigree.reverse()
00071 for i in xrange(l):
00072
00073
00074 if pedobj.kw['messages'] == 'debug':
00075 print '[DEBUG]:\t\tanimal: %s\tsire: %s\tdam: %s' % (pedobj.pedigree[i].animalID,pedobj.pedigree[i].sireID,pedobj.pedigree[i].damID)
00076
00077 if pedobj.pedigree[i].sireID != pedobj.kw['missing_parent']:
00078 try:
00079 _i = parents[int(pedobj.pedigree[i].sireID)]
00080 except:
00081 parents[int(pedobj.pedigree[i].sireID)] = int(pedobj.pedigree[i].sireID)
00082 pedobj.pedigree[int(pedobj.pedigree[i].sireID)-1].ancestor = 1
00083
00084
00085 if pedobj.pedigree[i].damID != pedobj.kw['missing_parent']:
00086 try:
00087 _i = parents[int(pedobj.pedigree[i].damID)]
00088 except:
00089 parents[int(pedobj.pedigree[i].damID)] = int(pedobj.pedigree[i].damID)
00090 pedobj.pedigree[int(pedobj.pedigree[i].damID)-1].ancestor = 1
00091 pedobj.pedigree.reverse()
00092
00093 if pedobj.kw['file_io']:
00094 try:
00095 a_outputfile = '%s%s%s' % (pedobj.kw['filetag'],'_ancestors','.dat')
00096 aout = open(a_outputfile,'w')
00097 aout.write('# FILE: %s\n' % a_outputfile)
00098 aout.write('# ANCESTOR list produced by PyPedal.\n')
00099 for l in parents.keys():
00100 aout.write('%s\n' % l)
00101 aout.close()
00102 logging.info('pyp_utils/set_ancestor_flag() wrote file %s.' % (a_outputfile))
00103 except:
00104 logging.error('pyp_utils/set_ancestor_flag() could not write file %s.' % (a_outputfile))
00105
00106 return 1
00107
00108 except:
00109 a_outputfile = '%s%s%s' % (pedobj.kw['filetag'],'_ancestors','.dat')
00110 logging.error('pyp_utils/set_ancestor_flag() could not write file %s.' % (a_outputfile))
00111 return 0
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121 def set_generation(pedobj):
00122 """
00123 set_generation() Works through a pedigree to infer the generation to which an animal
00124 belongs based on founders belonging to generation 1. The igen assigned to an animal
00125 as the larger of sire.igen+1 and dam.igen+1. This routine assumes that myped is
00126 reordered and renumbered.
00127 """
00128 try:
00129 if pedobj.kw['messages'] == 'debug':
00130 print '[NOTE]: pyp_utils/set_generation() assigning inferred generations in pedigree %s.' % (pedobj.kw['pedname'])
00131 for i in range(pedobj.metadata.num_records):
00132
00133 if pedobj.pedigree[i].sireID == pedobj.kw['missing_parent'] and pedobj.pedigree[i].damID == pedobj.kw['missing_parent']:
00134 if pedobj.kw['gen_coeff']:
00135 pedobj.pedigree[i].gencoeff = 1.
00136 pedobj.pedigree[i].igen = int(round(pedobj.pedigree[i].gencoeff))
00137 else:
00138 pedobj.pedigree[i].igen = 1
00139
00140 elif pedobj.pedigree[i].sireID == pedobj.kw['missing_parent']:
00141 if pedobj.kw['gen_coeff']:
00142 pedobj.pedigree[i].gencoeff = ( pedobj.pedigree[int(pedobj.pedigree[i].damID)-1].gencoeff / 2. ) + 1.
00143 pedobj.pedigree[i].igen = int(round(pedobj.pedigree[i].gencoeff))
00144 else:
00145 pedobj.pedigree[i].igen = pedobj.pedigree[int(pedobj.pedigree[i].damID)-1].igen + 1
00146
00147 elif pedobj.pedigree[i].damID == pedobj.kw['missing_parent']:
00148 if pedobj.kw['gen_coeff']:
00149 pedobj.pedigree[i].gencoeff = ( pedobj.pedigree[int(pedobj.pedigree[i].sireID)-1].gencoeff / 2. ) + 1.
00150 pedobj.pedigree[i].igen = int(round(pedobj.pedigree[i].gencoeff))
00151 else:
00152 pedobj.pedigree[i].igen = pedobj.pedigree[int(pedobj.pedigree[i].sireID)-1].igen + 1
00153 else:
00154 if pedobj.kw['gen_coeff']:
00155 pedobj.pedigree[i].gencoeff = ( pedobj.pedigree[int(pedobj.pedigree[i].damID)-1].gencoeff + pedobj.pedigree[int(pedobj.pedigree[i].sireID)-1].gencoeff / 2. ) + 1.
00156 pedobj.pedigree[i].igen = int(round(pedobj.pedigree[i].gencoeff))
00157 else:
00158 pedobj.pedigree[i].igen = max(pedobj.pedigree[int(pedobj.pedigree[i].sireID)-1].igen + 1,pedobj.pedigree[int(pedobj.pedigree[i].damID)-1].igen + 1)
00159 logging.info('pyp_utils/set_generation() assigned inferred generations in pedigree %s' % (pedobj.kw['pedname']))
00160 return 1
00161 except:
00162 logging.error('pyp_utils/set_generation() was unable to assign inferred generations in pedigree %s' % (pedobj.kw['pedname']))
00163 return 0
00164
00165
00166
00167
00168
00169
00170
00171
00172
00173 def set_age(pedobj):
00174 """
00175 set_age() Computes ages for all animals in a pedigree based on the global
00176 BASE_DEMOGRAPHIC_YEAR defined in pyp_demog.py. If the by is unknown, the
00177 inferred generation is used. If the inferred generation is unknown, the
00178 age is set to -999.
00179 """
00180 try:
00181 if pedobj.kw['messages'] == 'debug':
00182 print '[NOTE]: pyp_utils/set_age() assigning inferred ages in pedigree %s.' % (pedobj.kw['pedname'])
00183 l = len(pedobj.pedigree)
00184 for i in range(l):
00185 if pedobj.pedigree[i].by == -999 and pedobj.pedigree[i].igen == -999:
00186 pedobj.pedigree[i].age = -999
00187 elif pedobj.pedigree[i].by == -999 and pedobj.pedigree[i].igen != -999:
00188 pedobj.pedigree[i].age = pedobj.pedigree[i].igen
00189 else:
00190 pedobj.pedigree[i].age = pedobj.pedigree[i].by - pyp_demog.BASE_DEMOGRAPHIC_YEAR
00191 logging.info('pyp_utils/set_age() assigned ages in pedigree %s' % (pedobj.kw['pedname']))
00192 return 1
00193 except:
00194 logging.error('pyp_utils/set_age() was unable to assign ages in pedigree %s' % (pedobj.kw['pedname']))
00195 return 0
00196
00197
00198
00199
00200
00201
00202
00203 def set_species(pedobj,species='u'):
00204 """
00205 set_species() assigns a specie to every animal in the pedigree.
00206 """
00207 try:
00208 if pedobj.kw['messages'] == 'debug':
00209 print '[NOTE]: pyp_utils/set_species() assigning specie %s to all animals in pedigree %s.' % (species, pedobj.kw['pedname'])
00210 l = len(pedobj.pedigree)
00211 for i in range(l):
00212 if len(species) > 0:
00213 pedobj.pedigree[i].species = species
00214 else:
00215 pedobj.pedigree[i].species = 'u'
00216 logging.info('pyp_utils/set_species() assigned a specie in pedigree %s' % (pedobj.kw['pedname']))
00217 return 1
00218 except:
00219 logging.error('pyp_utils/set_age() was unable to assign ages in pedigree %s' % (pedobj.kw['pedname']))
00220 return 0
00221
00222
00223
00224
00225
00226
00227 def assign_sexes(pedobj):
00228 """
00229 assign_sexes() assigns a sex to every animal in the pedigree using sire and daughter
00230 lists for improved accuracy.
00231 """
00232 try:
00233 if pedobj.kw['messages'] == 'verbose':
00234 print '[NOTE]: pyp_utils/assign_sexes() assigning a sex to all animals in pedigree %s.' % (pedobj.kw['pedname'])
00235 for _m in pedobj.pedigree:
00236 if _m.sireID == pedobj.kw['missing_parent'] and _m.damID == pedobj.kw['missing_parent']:
00237 pass
00238 elif _m.sireID == pedobj.kw['missing_parent']:
00239 if pedobj.pedigree[int(_m.damID)-1].sex != 'f':
00240 if pedobj.kw['debug_messages']:
00241 print '\t\tAnimal %s sex changed from\t%s\tto\tf' % (_m.damID,pedobj.pedigree[int(_m.damID)-1].sex)
00242 pedobj.pedigree[int(_m.damID)-1].sex = 'f'
00243 elif _m.damID == pedobj.kw['missing_parent']:
00244 if pedobj.pedigree[int(_m.sireID)-1].sex != 'm':
00245 if pedobj.kw['debug_messages']:
00246 print '\t\tAnimal %s sex changed from\t%s\tto\tm' % (_m.sireID,pedobj.pedigree[int(_m.sireID)-1].sex)
00247 pedobj.pedigree[int(_m.sireID)-1].sex = 'm'
00248 else:
00249 if pedobj.pedigree[int(_m.damID)-1].sex != 'f':
00250 if pedobj.kw['debug_messages']:
00251 print '\t\tAnimal %s sex changed from\t%s\tto\tf' % (_m.damID,pedobj.pedigree[int(_m.damID)-1].sex)
00252 pedobj.pedigree[int(_m.damID)-1].sex = 'f'
00253 if pedobj.pedigree[int(_m.sireID)-1].sex != 'm':
00254 if pedobj.kw['debug_messages']:
00255 print '\t\tAnimal %s sex changed from\t%s\tto\tm' % (_m.sireID,pedobj.pedigree[int(_m.sireID)-1].sex)
00256 pedobj.pedigree[int(_m.sireID)-1].sex = 'm'
00257 logging.info('pyp_utils/assign_sexes() assigned sexes in pedigree %s' % (pedobj.kw['pedname']))
00258 return 1
00259 except:
00260 logging.error('pyp_utils/assign_sexes() was unable to assign sexes in pedigree %s' % (pedobj.kw['pedname']))
00261 return 0
00262
00263
00264
00265
00266
00267
00268 def assign_offspring(pedobj):
00269 """
00270 assign_offspring() assigns offspring to their parent(s)'s unknown sex offspring list
00271 (well, dictionary).
00272 """
00273 try:
00274 if pedobj.kw['messages'] == 'debug':
00275 print '[NOTE]: pyp_utils/assign_offspring() assigning offspring to all parents in pedigree %s.' % (species, pedobj.kw['pedname'])
00276 for _m in pedobj.pedigree:
00277 pedobj.pedigree[int(_m.animalID)-1].sons = {}
00278 pedobj.pedigree[int(_m.animalID)-1].daus = {}
00279 pedobj.pedigree[int(_m.animalID)-1].unks = {}
00280 if 'x' not in pedobj.kw['pedformat']:
00281 for _m in pedobj.pedigree:
00282 if _m.sireID == pedobj.kw['missing_parent'] and _m.damID == pedobj.kw['missing_parent']:
00283 pass
00284 elif _m.sireID == pedobj.kw['missing_parent']:
00285 pedobj.pedigree[int(_m.damID)-1].unks[_m.animalID] = _m.animalID
00286 elif _m.damID == pedobj.kw['missing_parent']:
00287 pedobj.pedigree[int(_m.sireID)-1].unks[_m.animalID] = _m.animalID
00288 else:
00289 pedobj.pedigree[int(_m.damID)-1].unks[_m.animalID] = _m.animalID
00290 pedobj.pedigree[int(_m.sireID)-1].unks[_m.animalID] = _m.animalID
00291 else:
00292
00293
00294 for _m in pedobj.pedigree:
00295 if _m.sex == 'm' or _m.sex == 'M':
00296 if _m.sireID != pedobj.kw['missing_parent']:
00297
00298 pedobj.pedigree[int(_m.sireID)-1].sons[_m.animalID] = _m.animalID
00299 if _m.damID != pedobj.kw['missing_parent']:
00300
00301 pedobj.pedigree[int(_m.damID)-1].sons[_m.animalID] = _m.animalID
00302 elif _m.sex == 'f' or _m.sex == 'F':
00303 if _m.sireID != pedobj.kw['missing_parent']:
00304
00305 pedobj.pedigree[int(_m.sireID)-1].daus[_m.animalID] = _m.animalID
00306 if _m.damID != pedobj.kw['missing_parent']:
00307
00308 pedobj.pedigree[int(_m.damID)-1].daus[_m.animalID] = _m.animalID
00309 else:
00310 if _m.sireID != pedobj.kw['missing_parent']:
00311
00312 pedobj.pedigree[int(_m.sireID)-1].unks[_m.animalID] = _m.animalID
00313 if _m.damID != pedobj.kw['missing_parent']:
00314
00315 pedobj.pedigree[int(_m.damID)-1].unks[_m.animalID] = _m.animalID
00316 logging.info('pyp_utils/assign_offspring() assigned offspring in pedigree %s' % (pedobj.kw['pedname']))
00317 return 1
00318 except:
00319 logging.error('pyp_utils/assign_offspring() was unable to assign offspring in pedigree %s' % (pedobj.kw['pedname']))
00320 return 0
00321
00322
00323
00324
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339 def reorder(myped,filetag='_reordered_',io='no',missingparent=0, debug=0 , max_rounds=100):
00340 """
00341 Renumber a pedigree such that parents precede their offspring in the
00342 pedigree. In order to minimize overhead as much as is reasonably possible,
00343 a list of animal IDs that have already been seen is kept. Whenever a parent
00344 that is not in the seen list is encountered, the offspring of that parent is
00345 moved to the end of the pedigree. This should ensure that the pedigree is
00346 properly sorted such that all parents precede their offspring. myped is
00347 reordered in place.
00348
00349 reorder() is VERY slow, but I am pretty sure that it works correctly.
00350 """
00351 l = len(myped)
00352 _passnum = 1
00353
00354 _moved_counter = 0
00355 _last_founder = 0
00356
00357
00358
00359
00360
00361 mypedins = myped.insert
00362 mypedidx = myped.index
00363 copycopy = copy.copy
00364
00365
00366 founderlist = []
00367 for i in xrange(l):
00368 if myped[i].sireID == missingparent and myped[i].damID == missingparent:
00369 founderlist.append(i)
00370
00371
00372 founderlist.sort()
00373
00374 for f in founderlist:
00375
00376 _founder = copycopy(myped[f])
00377
00378 del myped[f]
00379
00380 mypedins(0, _founder)
00381
00382
00383
00384
00385
00386
00387
00388 orderdict, orderbackdict = {}, {}
00389 for i in xrange(len(myped)):
00390 orderdict[myped[i].animalID] = i
00391 orderbackdict[i] = myped[i].animalID
00392
00393 while(1):
00394 if debug:
00395 print '='*70
00396 print '[DEBUG]: Pass %s' % ( _passnum )
00397
00398
00399
00400
00401 if debug: print '_last_founder: ', _last_founder
00402 for i in xrange(l):
00403
00404
00405
00406
00407
00408 animalid = myped[i].animalID
00409 sireid = myped[i].sireID
00410 damid = myped[i].damID
00411 animalname = myped[i].name
00412 sirename = myped[i].sireName
00413 damname = myped[i].damName
00414
00415
00416
00417 _anidx = orderdict[animalid]
00418
00419 if str(sireid) != str(missingparent) and str(damid) != str(missingparent):
00420 _maxidx = max(orderdict[sireid], orderdict[damid])
00421 elif str(sireid) == str(missingparent) and str(damid) != str(missingparent):
00422 _maxidx = orderdict[damid]
00423 elif str(sireid) != str(missingparent) and str(damid) == str(missingparent):
00424 _maxidx = orderdict[sireid]
00425 else:
00426
00427 _maxidx = -1
00428 if _anidx < _maxidx:
00429 _a = copycopy(myped[i])
00430
00431 mypedins(_maxidx+1, _a)
00432
00433
00434
00435
00436
00437
00438
00439
00440
00441 del myped[i]
00442 for idx in xrange(_anidx, _maxidx):
00443 anmov = orderbackdict[idx+1]
00444 orderdict[anmov] = idx
00445 orderbackdict[idx] = anmov
00446 orderdict[animalid] = _maxidx
00447 orderbackdict[_maxidx] = animalid
00448 if debug:
00449 print 'Moved animal %s (%s) ahead of its parents (sire %s (%s), dam %s (%s).' % ( animalid, animalname, sireid, sirename, damid, damname )
00450 print '\tNew animal index: ', orderdict[animalid]
00451 if str(sireid) != str(missingparent):
00452 print '\tNew sire index: ', orderdict[sireid]
00453 if str(damid) != str(missingparent):
00454 print '\tNew dam index: ', orderdict[damid]
00455
00456 del _a, animalid, sireid, damid, animalname, sirename, damname
00457 del _anidx, _maxidx
00458 _moved_counter = _moved_counter + 1
00459
00460
00461
00462
00463
00464
00465
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492 else:
00493 pass
00494
00495 if debug:
00496 for m in myped:
00497 print m.animalID, m.name, m.sireID, m.sireName, m.damID, m.damName
00498
00499 if _moved_counter == 0:
00500 break
00501 if _passnum == max_rounds:
00502 logging.error('pyp_utils/reorder() was unable to reorder the pedigree in %s rounds. Subsequent calculations that depend on the pedigree being correctly reordered may produce incorrect answers.' % ( _passnum ))
00503 print '[ERROR]: pyp_utils/reorder() was unable to reorder the pedigree in %s rounds. Subsequent calculations that depend on the pedigree being correctly reordered may produce incorrect answers.' % ( _passnum )
00504 break
00505 else:
00506 _passnum = _passnum + 1
00507 _moved_counter = 0
00508
00509 if io == 'yes':
00510
00511
00512
00513 a_outputfile = '%s%s%s' % (filetag,'_reordered','.ped')
00514 aout = open(a_outputfile,'w')
00515 aname = '# FILE: %s\n' % (a_outputfile)
00516 aout.write(aname)
00517 aout.write('# REORDERED pedigree produced by PyPedal.\n')
00518 aout.write('% asd\n')
00519 for l in range(len(myped)):
00520 aout.write('%s,%s,%s\n' % myped[l].animalID,myped[l].sireID,myped[l].damID)
00521 aout.close()
00522 del order
00523
00524 return myped
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541 def fast_reorder(myped,filetag='_new_reordered_',io='no',debug=0):
00542 """
00543 Renumber a pedigree such that parents precede their offspring in the
00544 pedigree. In order to minimize overhead as much as is reasonably possible,
00545 a list of animal IDs that have already been seen is kept. Whenever a parent
00546 that is not in the seen list is encountered, the offspring of that parent is
00547 moved to the end of the pedigree. This should ensure that the pedigree is
00548 properly sorted such that all parents precede their offspring. myped is
00549 reordered in place.
00550
00551 reorder() is VERY slow, but I am pretty sure that it works correctly. fast_reorder()
00552 appears to be VERY fast, but I am not sure if it works correctly all of the time or
00553 not. Use this procedure at your own risk!
00554 """
00555
00556 l = len(myped)
00557 idlist = []
00558 animalmap = {}
00559
00560 myped.reverse()
00561
00562 if debug == 1:
00563 print '\tPedigree contains %s animals.' % (l)
00564 print '\tMaking a dictionary of animal objects'
00565 print '\tMaking a list of padded animal IDs'
00566 for i in range(l):
00567 if debug == 1:
00568 print '\tDEBUG\tID %s: %s = %s %s %s' % (i,myped[i].animalID,myped[i].paddedID,myped[i].sireID,myped[i].damID)
00569 animalmap[myped[i].paddedID] = myped[i]
00570 idlist.append(int(myped[i].paddedID))
00571 myped = []
00572 l = len(idlist)
00573 if debug == 1:
00574 print '[DEBUG]: %s elements in idlist' % (l)
00575 print '[DEBUG]: Printing reordered pedigree...'
00576 for i in range(len(idlist)):
00577 myped.append(animalmap[str(idlist[i])])
00578 if debug == 1:
00579 print '\t[DEBUG]:\tID %s: %s = %s' % (i,myped[i].animalID,myped[i].paddedID)
00580 if io == 'yes':
00581
00582
00583
00584 a_outputfile = '%s%s%s' % (filetag,'_reord','.ped')
00585 aout = open(a_outputfile,'w')
00586 aname = '# FILE: %s\n' % (a_outputfile)
00587 aout.write(aname)
00588 aout.write('# REORDERED pedigree produced by PyPedal using fast_reorder().\n')
00589 aout.write('% asd\n')
00590 for l in range(len(myped)):
00591 aout.write('%s,%s,%s\n' % myped[l].animalID,myped[l].sireID,myped[l].damID)
00592 aout.close()
00593 return myped
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610
00611
00612
00613 def renumber(myped, filetag='_renumbered_', io='no', outformat='0', debug=0, returnmap=0, missingparent=0, \
00614 animaltype='new', cleanmap=True):
00615 """
00616 renumber() takes a pedigree as input and renumbers it such that the oldest
00617 animal in the pedigree has an ID of '1' and the n-th animal has an ID of 'n'. If the
00618 pedigree is not ordered from oldest to youngest such that all offspring precede their
00619 offspring, the pedigree will be reordered. The renumbered pedigree is written to
00620 disc in 'asd' format and a map file that associates sequential IDs with original IDs is also written.
00621 """
00622 if debug == 1:
00623 print '[DEBUG]: Pedigree of size %s passed to renumber()' % (len(myped))
00624
00625
00626 if animaltype == 'new': isnewanimal = 1
00627 else: isnewanimal = 0
00628
00629
00630
00631 id_map = {}
00632 idnum = 1
00633 for l in xrange(len(myped)):
00634 if debug == 1:
00635 if l == 0:
00636 print '[DEBUG]: Renumbering the pedigree...'
00637 if numpy.fmod(l,10000) == 0:
00638 print'\t%s ' % (l)
00639 print '[DEBUG]: An:%s (%s)\tSire: %s\tDam: %s' % (myped[l].animalID,myped[l].paddedID,myped[l].sireID,myped[l].damID)
00640 id_map[myped[l].animalID] = idnum
00641
00642 if debug == 1:
00643 print '\t[DEBUG]: Renumbering animal from %s to %s (iter %s)' % (myped[l].animalID,idnum,l)
00644 myped[l].renumberedID = idnum
00645 myped[l].animalID = idnum
00646
00647
00648
00649
00650 if isnewanimal:
00651 if myped[l].name == myped[l].originalID:
00652 myped[l].name = myped[l].renumberedID
00653
00654 s = myped[l].sireID
00655 if str(s) != str(missingparent):
00656
00657 try:
00658 if debug == 1:
00659 print '\t\t[DEBUG]: Renumbering sire from %s to %s' % (s,id_map[s])
00660 myped[l].sireID = id_map[s]
00661 except:
00662 myped[l].sireID = 0
00663 d = myped[l].damID
00664 if str(d) != str(missingparent):
00665
00666 try:
00667 if debug == 1:
00668 print '\t\t[DEBUG]: Renumbering dam from %s to %s' % (d,id_map[d])
00669 myped[l].damID = id_map[d]
00670 except:
00671 myped[l].damID = 0
00672 idnum = idnum + 1
00673 if debug == 1:
00674 print '[DEBUG]: animal ID = %s (%s)' % (myped[l].animalID, myped[l].originalID)
00675 print '[DEBUG]: An:%s\tSire: %s\tDam: %s' % (myped[l].animalID,myped[l].sireID,myped[l].damID)
00676
00677
00678
00679
00680 if isnewanimal:
00681 _sons, _daus, _unks = {}, {}, {}
00682 for m in myped:
00683
00684 for k in m.sons.keys():
00685 try: _sons[id_map[k]] = k
00686 except: pass
00687 m.sons = _sons
00688
00689 for k in m.daus.keys():
00690 try: _daus[id_map[k]] = k
00691 except: pass
00692 m.daus = _daus
00693
00694 for k in m.unks.keys():
00695 try: _unks[id_map[k]] = k
00696 except: pass
00697 m.unks = _unks
00698
00699 if io == 'yes':
00700
00701 ped_outputfile = '%s%s%s' % (filetag,'_renum','.ped')
00702 pout = open(ped_outputfile,'w')
00703 pname = '# FILE: %s\n' % (ped_outputfile)
00704 pout.write(pname)
00705 pout.write('# RENUMBERED pedigree produced by PyPedal.\n')
00706 pout.write('% asd\n')
00707 for l in range(len(myped)):
00708 if outformat == '0' or outformat == 0:
00709 pout.write('%s,%s,%s\n' % myped[l].animalID,myped[l].sireID,myped[l].damID)
00710 else:
00711 pout.write('%s,%s,%s,%s,%s,%s,%s\n' % myped[l].animalID,myped[l].sireID,myped[l].damID,myped[l].by,myped[l].sex,myped[l].fa,myped[l].gen)
00712 pout.close()
00713 if not returnmap:
00714
00715 map_outputfile = '%s%s%s' % (filetag,'_id_map','.map')
00716
00717 mout = open(map_outputfile,'w')
00718 mname = '# FILE: %s\n' % (map_outputfile)
00719 mout.write(mname)
00720 mout.write('# Renumbered ID to Old ID mapping produced by PyPedal.\n')
00721 mout.write('# The lefthand column contains the original IDs.\n')
00722 mout.write('# The righthand column contains the renumbered IDs.\n')
00723 mout.write('# Old ID\tRenum ID\n')
00724 k = id_map.keys()
00725 v = id_map.values()
00726 for l in range(len(id_map)):
00727 mout.write('%s,%s\n' % (k[l],v[l]))
00728
00729 mout.close()
00730
00731 if cleanmap == True:
00732 delete_id_map(filetag)
00733
00734 if not returnmap:
00735 return myped
00736 else:
00737 return myped, id_map
00738
00739
00740
00741
00742
00743
00744
00745
00746 def load_id_map(filetag='_renumbered_'):
00747 """
00748 load_id_map() reads an ID map from the file generated by pyp_utils/renumber()
00749 into a dictionary. There is a VERY similar function, pyp_io/id_map_from_file(), that
00750 is deprecated because it is much more fragile that this procedure.
00751 """
00752 try:
00753 _infile = '%s%s%s' % (filetag,'_id_map','.map')
00754 mapin = open(_infile,'r')
00755 idmap = {}
00756 while 1:
00757 line = mapin.readline()
00758 if not line:
00759 break
00760 else:
00761 line = string.strip(line[:-1])
00762 if line[0] == '#':
00763 pass
00764 else:
00765 _line = string.split(line,',')
00766 if len(_line) != 2:
00767 print '[ERROR]: Invalid number of elements in line read from ID map file (%s)' % (_line)
00768 break
00769 else:
00770 idmap[int(_line[1])] = int(_line[0])
00771 mapin.close()
00772 return idmap
00773 except:
00774
00775
00776 return {}
00777
00778
00779
00780
00781
00782
00783
00784 def delete_id_map(filetag='_renumbered_'):
00785 """
00786 delete_id_map() checks to see if an ID map for the given filetag exists. If the file
00787 exists, it is deleted.
00788 """
00789 try:
00790 _infile = '%s%s%s' % (filetag,'_id_map','.map')
00791 if _infile in os.listdir('.'):
00792 os.remove(_infile)
00793 return 1
00794 except:
00795 return 0
00796
00797
00798
00799
00800
00801
00802
00803
00804 def trim_pedigree_to_year(pedobj,year):
00805 """
00806 trim_pedigree_to_year() takes pedigrees and removes all individuals who were not born
00807 in birthyear 'year'.
00808 """
00809
00810
00811
00812 try:
00813 indices = []
00814 modped = pedobj.pedigree[:]
00815 for l in range(len(modped)):
00816 if int(modped[l].by) == int(year):
00817 pass
00818 else:
00819 indices.append(l)
00820 indices.reverse()
00821 for i in range(len(indices)):
00822 del modped[indices[i]]
00823 return modped
00824 except:
00825 return []
00826
00827
00828
00829
00830
00831
00832
00833
00834
00835 def pedigree_range(pedobj,n):
00836 """
00837 pedigree_range() takes a renumbered pedigree and removes all individuals
00838 with a renumbered ID > n. The reduced pedigree is returned. Assumes that
00839 the input pedigree is sorted on animal key in ascending order.
00840 """
00841
00842
00843
00844 try:
00845 modped = []
00846 for i in range(n):
00847 modped.append(pedobj.pedigree[i])
00848 return modped
00849 except:
00850 return []
00851
00852
00853
00854
00855
00856
00857
00858
00859 def sort_dict_by_keys(mydict):
00860 """
00861 sort_dict_by_keys() returns a dictionary where the values in the dictionary
00862 in the order obtained by sorting the keys. Taken from the routine sortedDictValues3
00863 in the "Python Cookbook", p. 39.
00864 """
00865 try:
00866 if len(mydict) == 0:
00867 return mydict
00868 else:
00869 keys = mydict.keys()
00870 keys.sort()
00871 return map(mydict.get, keys)
00872 except:
00873 return {}
00874
00875
00876
00877
00878
00879
00880
00881
00882
00883 def sort_dict_by_values( first, second ):
00884 """
00885 sort_dict_by_values() returns a dictionary where the keys in the dictionary
00886 are sorted ascending value, first on value and then on key within value. The
00887 implementation was taken from John Hunter's contribution to a newsgroup thread:
00888 http://groups-beta.google.com/group/comp.lang.python/browse_thread/thread
00889 /bbc259f8454e4d3f/cc686f4cd795feb4?q=python+%22sorted+dictionary%22&
00890 rnum=1&hl=en#cc686f4cd795feb4
00891 """
00892 c1 = cmp(first[1], second[1])
00893 if c1!=0:
00894 return c1
00895 return cmp(first[0], second[0])
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905 def simple_histogram_dictionary(mydict,histchar='*',histstep=5):
00906 """
00907 simple_histogram_dictionary() returns a dictionary containing a simple, text
00908 histogram. The input dictionary is assumed to contain keys which are distinct levels
00909 and values that are counts.
00910 """
00911 try:
00912 hist_dict = {}
00913 hist_sum = 0.
00914 if histstep < 0 or histstep > 100:
00915 histstep = 5
00916 for k in mydict.keys():
00917 hist_sum = hist_sum + mydict[k]
00918
00919 for k in mydict.keys():
00920 _freq = ( float(mydict[k]) / float(hist_sum) ) * 100.
00921 _v = around(_freq,0)
00922 _n_stars = int( around( (_v / float(histstep)),0 ) )
00923 if _n_stars > 0:
00924 hist_dict[k] = '%s%s' % (histchar*_n_stars,' '*(20-_n_stars))
00925 else:
00926 hist_dict[k] = '%s' % (' '*20)
00927 return hist_dict
00928 except:
00929 return {}
00930
00931
00932
00933
00934
00935
00936 def reverse_string(mystring):
00937 """
00938 reverse_string() reverses the input string and returns the reversed version.
00939 """
00940 try:
00941 if len(mystring) < 2:
00942 return mystring
00943 else:
00944 mystringreversed = []
00945 for l in range(len(mystring)):
00946 mystringreversed.append(mystring[l])
00947 mystringreversed.reverse().join()
00948 return mystringreversed
00949 except:
00950 return 0
00951
00952
00953
00954
00955
00956
00957
00958 def pyp_nice_time():
00959 """
00960 pyp_nice_time() returns the current date and time formatted as, e.g.,
00961 Wed Mar 30 10:26:31 2005.
00962 """
00963 try:
00964 return time.asctime(time.localtime(time.time()))
00965 except:
00966 return 0
00967
00968
00969
00970
00971
00972
00973
00974 def string_to_table_name(instring):
00975 """
00976 string_to_table_name() takes an arbitrary string and returns a string that
00977 is safe to use as an SQLite table name.
00978 """
00979 try:
00980
00981
00982 allowed_chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
00983 outstring = ''.join([c for c in instring if c in allowed_chars])
00984 return outstring
00985 except:
00986 return instring
00987
00988
00989
00990
00991
00992
00993
00994 def pyp_datestamp():
00995 """
00996 pyp_datestamp() returns a datestamp, as a string, of the format
00997 YYYYMMDDHHMMSS.
00998 """
00999 try:
01000
01001 return time.strftime('%Y%m%d%H%M%S', (time.localtime(time.time())))
01002 except:
01003 return '00000000000000'
01004
01005
01006
01007
01008
01009
01010
01011
01012
01013 def subpedigree(pedobj,anlist):
01014 """
01015 subpedigree() takes a NewPedigree object and list of animal
01016 IDs and returns a NewPedigree object containing only the
01017 animals in the animals list.
01018 """
01019 try:
01020 NewPed = copy.copy(pedobj)
01021 order = []
01022 _tempped = copy.copy(NewPed.pedigree)
01023 for _p in _tempped:
01024 order.append(_p.animalID)
01025 for _p in NewPed.pedigree:
01026 if _p.animalID not in anlist:
01027
01028
01029 _anidx = order.index(_p.animalID)
01030 del NewPed.namemap[NewPed.namebackmap[NewPed.backmap[_p.animalID]]]
01031 del NewPed.namebackmap[NewPed.backmap[_p.animalID]]
01032 del NewPed.idmap[NewPed.backmap[_p.animalID]]
01033 del NewPed.backmap[_p.animalID]
01034 del _tempped[_anidx]
01035 del order[_anidx]
01036 for _t in _tempped:
01037 order.append(_t.animalID)
01038
01039 NewPed.pedigree = _tempped
01040 del(_tempped)
01041 del(order)
01042 NewPed.metadata = pyp_newclasses.PedigreeMetadata(NewPed.pedigree, NewPed.kw)
01043
01044 if NewPed.kw['renumber']:
01045 NewPed.renumber()
01046 return NewPed
01047 except:
01048 return 0
01049
01050
01051
01052
01053
01054
01055
01056
01057
01058 def founders_from_list(anlist,unkID):
01059 """
01060 founders_from_list() takes a list of NewAnimal objects and returns a
01061 list of animalIDs that represent founders in that pedigree (animals
01062 with an unknown sire and dam.
01063 """
01064 try:
01065 flist = [x.animalID for x in anlist if x.sireID == unkID and x.damID == unkID]
01066 return flist
01067 except:
01068 return []
01069
01070
01071
01072
01073
01074
01075
01076 def founder_allele_dict(pedobj):
01077 """
01078 founders_allele_dict() takes a pedigree and returns a dictionary containing
01079 an entry for each unique founder allele.
01080 """
01081
01082 falist = [pedobj.pedigree[x-1].alleles for x in pedobj.metadata.unique_founder_list]
01083 fadict = {}
01084 for fa in falist:
01085 fadict[fa[0]] = 0.
01086 fadict[fa[1]] = 0.
01087 return fadict
01088
01089