00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037 import copy, logging
00038
00039 try:
00040 import networkx
00041 except ImportError:
00042 print '[WARNING]: The networkx module could not be imported in pyp_network. Routines using networkx functionality are not available.'
00043
00044 try:
00045 import psyco
00046 psyco.full()
00047 except ImportError:
00048 print '[WARNING]: The psyco module could not be imported in pyp_network. Psyco speed optiimizations are not available.'
00049
00050
00051
00052
00053
00054
00055
00056
00057 def ped_to_graph(pedobj,oid=0):
00058 """
00059 ped_to_graph() Takes a PyPedal pedigree object and returns a networkx XDiGraph
00060 object.
00061 """
00062 l = len(pedobj.pedigree)
00063 G = networkx.XDiGraph(name=pedobj.kw['pedname'], selfloops=False, multiedges=True)
00064 for i in range(l):
00065
00066
00067
00068 if str(pedobj.pedigree[i].sireID) != str(pedobj.kw['missing_parent']):
00069 if oid:
00070 G.add_edge(pedobj.pedigree[int(pedobj.pedigree[i].sireID)].originalID, int(pedobj.pedigree[i].originalID),'s')
00071 else:
00072 G.add_edge(int(pedobj.pedigree[i].sireID), int(pedobj.pedigree[i].animalID),'s')
00073 if str(pedobj.pedigree[i].damID) != str(pedobj.kw['missing_parent']):
00074 if oid:
00075 G.add_edge(pedobj.pedigree[int(pedobj.pedigree[i].damID)].originalID, int(pedobj.pedigree[i].originalID),'d')
00076 else:
00077 G.add_edge(int(pedobj.pedigree[i].damID), int(pedobj.pedigree[i].animalID),'d')
00078 if str(pedobj.pedigree[i].sireID) == str(pedobj.kw['missing_parent']) and str(pedobj.pedigree[i].damID) == str(pedobj.kw['missing_parent']):
00079 if oid:
00080 G.add_node(int(pedobj.pedigree[i].originalID))
00081 else:
00082 G.add_node(int(pedobj.pedigree[i].animalID))
00083 return G
00084
00085
00086
00087
00088
00089
00090
00091
00092 def find_ancestors(pedgraph,anid,_ancestors=[]):
00093 """
00094 find_ancestors() identifies the ancestors of an animal and returns them in a list.
00095 """
00096 anid = int(anid)
00097
00098 try:
00099 _pred = pedgraph.predecessors(anid)
00100 for _p in _pred:
00101 if _p not in _ancestors:
00102 _ancestors.append(int(_p))
00103
00104 find_ancestors(pedgraph,_p,_ancestors)
00105 except:
00106 pass
00107 return _ancestors
00108
00109
00110
00111
00112
00113
00114
00115
00116 def find_ancestors_g(pedgraph, anid, _ancestors={}, gens=3):
00117 """
00118 find_ancestors_g() identifies the ancestors of an animal and returns them in a list.
00119 """
00120
00121
00122 if gens == 0:
00123 return _ancestors
00124 else:
00125 try:
00126 _pred = pedgraph.predecessors(anid)
00127
00128 for _p in _pred:
00129 if _p not in _ancestors:
00130 _ancestors[_p] = gens
00131 find_ancestors_g(pedgraph, _p, _ancestors, gens-1)
00132 except:
00133 pass
00134
00135 return _ancestors
00136
00137
00138
00139
00140
00141
00142
00143
00144 def find_descendants(pedgraph,anid,_descendants=[]):
00145 """
00146 find_descendants() identifies the descendants of an animal and returns them in a list.
00147 """
00148 try:
00149
00150 _desc = pedgraph.successors(anid)
00151 for _d in _desc:
00152 if _d not in _descendants:
00153 _descendants.append(_d)
00154 find_descendants(pedgraph,_d,_descendants)
00155 except:
00156 pass
00157 return _descendants
00158
00159
00160
00161
00162
00163
00164
00165 def immediate_family(pedgraph,anid):
00166 """
00167 immediate_family() returns parents and offspring of an animal.
00168 """
00169 try:
00170 _family = networkx.neighbors(pedgraph,anid)
00171 except:
00172 pass
00173 return _family
00174
00175
00176
00177
00178
00179
00180
00181 def count_offspring(pedgraph,anid):
00182 """
00183 count_offspring() returns the number of offspring of an animal.
00184 """
00185 try:
00186 _count = len(networkx.neighbors(pedgraph,anid)) - len(pedgraph.predecessors(anid))
00187 except:
00188 _count = 0
00189 return _count
00190
00191
00192
00193
00194
00195
00196
00197 def offspring_influence(pedgraph,anid):
00198 """
00199 offspring_influence() returns the number of grand-children by each child of a given
00200 animal.
00201 """
00202 try:
00203 _offspring = pedgraph.successors(anid)
00204 _degrees = networkx.degree(pedgraph, nbunch=_offspring, with_labels=True)
00205
00206 for _d in _degrees:
00207 _degrees[_d] = _degrees[_d] - len(pedgraph.predecessors(_d))
00208 except:
00209 pass
00210 return _degrees
00211
00212
00213
00214
00215
00216
00217
00218
00219 def most_influential_offspring(pedgraph,anid,resolve='all'):
00220 """
00221 most_influential_offspring() returns the most influential offspring of an animal as
00222 measured by their number of offspring.
00223 """
00224 try:
00225 _max_off = -999
00226 _offid = -999
00227 _offdict = {}
00228 if resolve == 'all':
00229 _tempoffdict = {}
00230 _offspring = offspring_influence(pedgraph,anid)
00231
00232 for _o in _offspring:
00233
00234
00235 if resolve == 'first':
00236 if _offspring[_o] > _max_off:
00237 _max_off = _offspring[_o]
00238 _offid = _o
00239
00240
00241 elif resolve == 'last':
00242 if _offspring[_o] >= _max_off:
00243 _max_off = _offspring[_o]
00244 _offid = _o
00245
00246
00247 else:
00248 if _offspring[_o] > _max_off:
00249 _tempoffdict = {}
00250 _tempoffdict[_o] = _offspring[_o]
00251 _max_off = _offspring[_o]
00252 _offid = _o
00253 elif _offspring[_o] == _max_off:
00254 _tempoffdict[_o] = _offspring[_o]
00255 _offid = _o
00256 if resolve == 'all':
00257 _offdict = _tempoffdict
00258 else:
00259 _offdict[_offid] = _max_off
00260 except:
00261 pass
00262 return _offdict
00263
00264
00265
00266
00267
00268
00269
00270 def get_founder_descendants(pedgraph):
00271 """
00272 get_founder_descendants() returns a dictionary containing a list of descendants of
00273 each founder in the pedigree.
00274 """
00275 try: logging.info('Entered get_founder_descendants()')
00276 except: pass
00277 founder_desc = {}
00278 for _n in pedgraph.nodes_iter():
00279 if pedgraph.in_degree(_n) < 2:
00280 _desc = {}
00281 _desc = find_descendants(pedgraph,_n,[])
00282 founder_desc[_n] = _desc
00283 print '\t%s\t%s' % ( _n, founder_desc[_n] )
00284 try: logging.info('Exited get_founder_descendants()')
00285 except: pass
00286 return founder_desc
00287
00288
00289
00290
00291
00292
00293
00294
00295 def get_node_degrees(pg):
00296 """
00297 get_node_degrees() returns a dictionary containing the number of
00298 vertices (nodes) in pg with a given number of incoming, outgoing,
00299 or total edges.
00300 """
00301 try:
00302 node_degrees = {}
00303 node_degrees['in'] = {}
00304 node_degrees['out'] = {}
00305 node_degrees['all'] = {}
00306 for n in pg.nodes_iter():
00307 node_degrees['in'][n] = pg.in_degree(n)
00308 node_degrees['out'][n] = pg.out_degree(n)
00309 node_degrees['all'][n] = pg.degree(n)
00310 return node_degrees
00311 except:
00312 return 0
00313
00314
00315
00316
00317
00318
00319
00320
00321 def get_node_degree_histograms(node_degrees):
00322 """
00323 get_node_degree_histograms() returns a dictionary containing histograms of
00324 the number of vertices (nodes) in pg with a given number of incoming,
00325 outgoing, or total edges.
00326 """
00327 try:
00328 histogram = {}
00329 histogram['in'] = {}
00330 histogram['out'] = {}
00331 histogram['all'] = {}
00332 for i in range(0,max(node_degrees['in'].values())+1): histogram['in'][i] = 0
00333 for i in range(0,max(node_degrees['out'].values())+1): histogram['out'][i] = 0
00334 for i in range(0,max(node_degrees['all'].values())+1): histogram['all'][i] = 0
00335
00336 for k in node_degrees['in'].keys():
00337 histogram['in'][node_degrees['in'][k]] = histogram['in'][node_degrees['in'][k]] + 1
00338
00339 for k in node_degrees['out'].keys():
00340 histogram['out'][node_degrees['out'][k]] = histogram['out'][node_degrees['out'][k]] + 1
00341
00342 for k in node_degrees['all'].keys():
00343 histogram['all'][node_degrees['all'][k]] = histogram['all'][node_degrees['all'][k]] + 1
00344 return histogram
00345 except:
00346 return 0
00347
00348
00349
00350
00351
00352
00353
00354
00355 def mean_geodesic(pg, debug=0):
00356 """
00357 mean_geodesic() calculates the mean geodesic (shortest) distance
00358 between two vertices in a network.
00359 """
00360 length_sum = 0
00361 if networkx.is_directed_acyclic_graph(pg):
00362 n_pairs_with_paths = 0
00363 else:
00364 n_pairs_with_paths = ( pg.order() * ( pg.order() + 1 ) ) / 2
00365 tg = networkx.subgraph(pg, pg.nodes())
00366 for u in pg.nodes_iter():
00367 tg.delete_node(u)
00368 for v in tg.nodes_iter():
00369 try:
00370 length = networkx.shortest_path_length(pg,u,v)
00371 if length > 0:
00372 length_sum = length_sum + length
00373 if networkx.is_directed_acyclic_graph(pg):
00374 n_pairs_with_paths = n_pairs_with_paths + 1
00375 except networkx.exception.NetworkXError:
00376 pass
00377 try:
00378 geodesic = float(length_sum) / float(n_pairs_with_paths)
00379 except:
00380 geodesic = -999.
00381 if debug:
00382 print 'length_sum:\t', length_sum
00383 print 'n_pairs_with_paths:\t', n_pairs_with_paths
00384 return geodesic
00385
00386
00387
00388
00389
00390
00391
00392 def graph_density(pg):
00393 """
00394 graph_density() calculates the density of a digraph, which is the
00395 ratio of edges in the gaph to the maximum possible number of edges.
00396 """
00397 pgn = pg.order()
00398 pgne = pg.size()
00399 denom = pgn * ( pgn - 1 )
00400
00401 try:
00402 density = float(pgne) / float(denom)
00403 except:
00404 return -999.
00405 return density
00406
00407
00408
00409
00410
00411
00412
00413
00414
00415 def dyad_census(pg, debug=0, debuglog=0):
00416 """
00417 dyad_census() calculates the number of null, asymmetric, and
00418 mutual edges between all pairs of nodes in a directed graph.
00419 """
00420 if not networkx.is_directed_acyclic_graph(pg):
00421 logging.error('pyp_network.dyad_census() requires a directed graph as input!')
00422 return 0
00423 else:
00424 census = {}
00425 census['null'] = 0
00426 census['asymmetric'] = 0
00427 census['mutual'] = 0
00428 tg = networkx.subgraph(pg, pg.nodes())
00429 for u in pg.nodes_iter():
00430 tg.delete_node(u)
00431 for v in tg.nodes_iter():
00432 if not pg.has_neighbor(u,v):
00433 census['null'] = census['null'] + 1
00434 elif u in pg.predecessors(v) and v in pg.successors(u):
00435 census['mutual'] = census['mutual'] + 1
00436 if debug:
00437 print 'Nodes %s and %s link to one another!' % ( u, v )
00438 if debuglog:
00439 logging.error('Nodes %s and %s link to one another!',u, v)
00440 elif u in pg.predecessors(v) and v not in pg.successors(u):
00441 census['asymmetric'] = census['asymmetric'] + 1
00442 elif u not in pg.predecessors(v) and v in pg.successors(u):
00443 census['asymmetric'] = census['asymmetric'] + 1
00444 else:
00445 pass
00446 del(tg)
00447 return census
00448
00449
00450
00451
00452
00453
00454
00455
00456
00457
00458 def mean_degree_centrality(pg, normalize=0):
00459 """
00460 mean_degree_centrality(pg) calculates mean in- and out-degree
00461 centralities for directed graphs and simple degree-centralities
00462 for undirected graphs. If the normalize flag is set, each node's
00463 centralities are weighted by the number of edges in the (di)graph.
00464 """
00465 centrality = {}
00466 try:
00467 if networkx.is_directed_acyclic_graph(pg):
00468 cent_sum_in, cent_sum_out = 0, 0
00469 for n in pg.nodes():
00470 n_cent_in = pg.in_degree(n)
00471 n_cent_out = pg.out_degree(n)
00472 if normalize:
00473 n_cent_in = float(n_cent_in) / float(pg.size()-1)
00474 n_cent_out = float(n_cent_out) / float(pg.size()-1)
00475 cent_sum_in = cent_sum_in + n_cent_in
00476 cent_sum_out = cent_sum_out + n_cent_out
00477 centrality['in'] = cent_sum_in / float(pg.order())
00478 centrality['out'] = cent_sum_out / float(pg.order())
00479 else:
00480 cent_sum = 0
00481 for n in pg.nodes():
00482 if not normalize:
00483 n_cent = pg.degree(n)
00484 else:
00485 n_cent = networkx.degree_centrality(pg,n)
00486 cent_sum = cent_sum + n_cent
00487 centrality['all'] = cent_sum / float(pg.order())
00488 except:
00489 logging.error('pyp_network.mean_degree_centrality() failed!')
00490 return centrality
00491
00492
00493
00494
00495
00496
00497 def mean_value(mydict):
00498 """
00499 mean_value() calculates the mean from all values in a
00500 dictionary.
00501 """
00502 try:
00503 mymean = float(sum(mydict.values())) / float(len(mydict.values()))
00504 except:
00505 logging.error('pyp_network.mean_values() failed!')
00506 mymean = -999.
00507 return mymean
00508
00509
00510
00511
00512
00513
00514
00515
00516 def get_closeness_centrality(pg):
00517 """
00518 get_closeness_centrality(pg) returns a dictionary of the close-
00519 ness centrality (1/(average distance to all nodes from n)) for
00520 each node in the graph.
00521 """
00522 centrality = {}
00523 try:
00524 for n in pg.nodes():
00525 centrality[n] = networkx.closeness_centrality(pg,n)
00526 except:
00527 logging.error('pyp_network.get_closeness_centrality() failed!')
00528 return centrality
00529
00530
00531
00532
00533
00534
00535
00536
00537 def get_clustering_coefficient(pg):
00538 """
00539 get_clustering_coefficient(pg) returns a dictionary of the clustering
00540 coefficient of each node in the graph based on the definition of Watts
00541 and Strogatz (1998).
00542 """
00543 clustering = {}
00544 try:
00545 for n in pg.nodes():
00546 clustering[n] = networkx.clustering(pg,n)
00547 except:
00548 logging.error('pyp_network.get_clustering() failed!')
00549 return clustering
00550
00551
00552
00553
00554
00555
00556
00557 def get_betweenness_centrality(pg):
00558 """
00559 get_betweenness_centrality(pg) returns a dictionary of the
00560 betweenness centrality of each node in the graph.
00561 """
00562 between = {}
00563
00564 between = networkx.betweenness_centrality(pg)
00565
00566
00567 return between
00568
00569
00570
00571
00572
00573
00574
00575
00576 def get_node_betweenness(pg):
00577 """
00578 get_node_betweenness(pg) returns a dictionary of the
00579 number of shortest paths that go through each node in
00580 the graph.
00581 """
00582 between = {}
00583
00584 for n in pg.nodes():
00585 between[n] = networkx.node_betweenness(pg,n)
00586
00587
00588 return between
00589
00590
00591