#!/bin/env python import os, socket, sys, re, popen2 import newick.tree from newick.tree import Tree, Leaf from CoaSim import * class Labelling(object): def __init__(self): self.d = dict() self.count = 0 def insert(self,label): try: return self.d[label] except KeyError: self.d[label] = self.count self.count += 1 return self.count - 1 def get(self,label): return self.d[label] class BitVector(object): def __init__(self, n): self.vector = [0 for i in xrange(n)] def get(self,i): return self.vector[i] def set(self,i): self.vector[i] = 1 def union(self,other): for i in xrange(len(self.vector)): self.vector[i] = self.vector[i] | other.vector[i] def __str__(self): s = '' for i in xrange(len(self.vector)): s += str(self.vector[i]) return s class Labeller(newick.tree.TreeVisitor): def __init__(self): newick.tree.TreeVisitor.__init__(self) self.labelling = Labelling() def visit_leaf(self,l): self.labelling.insert(l.id) class Folder(newick.tree.TreeVisitor): def __init__(self, labelling): newick.tree.TreeVisitor.__init__(self) self.labelling = labelling def visit_leaf(self,leaf): leaf.bit_vector = BitVector(len(self.labelling.d)) leaf.bit_vector.set(self.labelling.get(leaf.id)) def post_visit_tree(self,t): t.bit_vector = BitVector(len(self.labelling.d)) for (child,_,__) in t.edges: t.bit_vector.union(child.bit_vector) self.update(t.bit_vector,t) class Annotator(Folder): def __init__(self, labelling): Folder.__init__(self, labelling) self.vector_set = dict() def update(self,bv,t): self.vector_set[str(bv)] = 1 class NotFound: pass class Checker(Folder): def __init__(self, root, labelling, vectors): Folder.__init__(self, labelling) self.vector_set = vectors self.root_children = [child for (child,_,_) in root.edges] def update(self,bv,t): if str(bv) not in self.vector_set: if t not in self.root_children: raise NotFound def tree_cmp(t1, t2): l = Labeller() t1.dfs_traverse(l) a = Annotator(l.labelling) t1.dfs_traverse(a) vectors = a.vector_set try: c = Checker(t2, l.labelling, vectors) t2.dfs_traverse(c) return True except NotFound: return False def branchLength(t): class LV(newick.tree.TreeVisitor): def __init__(self): self.sum = 0 def pre_visit_edge(self,src,b,l,dst): self.sum += l lv = LV() t.dfs_traverse(lv) return lv.sum def recombType(t1, t2): epsilon = 1e-5 l1 = branchLength(t1) l2 = branchLength(t2) equal_topology = tree_cmp(t1,t2) if abs(l1-l2) < epsilon and equal_topology: return 1 elif equal_topology: return 2 else: return 3 def _hasIncomp(col1,col2): x = zip(col1,col2) if (0,0) in x and (0,1) in x and (1,0) in x and (1,1) in x: return True else: return False def _getCol(dataMatrix,i): return [row[i] for row in dataMatrix] def incompatible(dataMatrix): noRows = len(dataMatrix) noCols = len(dataMatrix[0]) for i in xrange(noCols): for j in xrange(i): col1 = _getCol(dataMatrix,i) col2 = _getCol(dataMatrix,j) if _hasIncomp(col1,col2): return True return False pairwise = 'LDhat2/SC/pairwise' match1 = re.compile('Is likelihood file an exact match to data?') match2 = re.compile('Do you wish to change grid over which to estimate likelihoods') match3 = re.compile('Maximum at 4Ner\(region\) = (.*) :') match4 = re.compile('Do you wish to carry out a sliding windows analysis?') match5 = re.compile('Print out table of Rmin values?') match6 = re.compile('Lower bound on Rmin = (.*)') match7 = re.compile('Estimate 4Ner by moment method?') match8 = re.compile(': 4Ner = (.*)') match9 = re.compile('Do you wish to test for recombination?') match10= re.compile('Proportion Lkmax <= estimated = (.*)') match11= re.compile('Proportion corr\(r2,d\) <= estimated = (.*)') matchLi= re.compile(' rhobar = (.*)') ##FIXME matches to lower bound, 4Ner and two different significance testings def hudson(): cmd = "./exhap rho rhoMap = {(20,0): 0.28, (50,0): 0.22, (20,100): 3.88, (50,100): 2.15} import exceptions class reject(exceptions.Exception): pass class rejectionSampler(object): def __init__(self): self.k = None self.time = None def recombinationEvent(self, n1, n2, k): if self.k is not None: raise reject() self.k = k self.time = n1.eventTime sampleNo = 0 while True: try: cb = rejectionSampler() rho = rhoMap[(noSeqs,beta)] arg = simulate(markers, noSeqs, rho=rho, beta=beta, callbacks=cb) if cb.k is None: continue # no recombinations trees = [i.tree for i in arg.intervals] t1Str, t2Str = [str(t) for t in trees] t1 = newick.tree.parse_tree(t1Str) t2 = newick.tree.parse_tree(t2Str) seqs = arg.sequences lambda_1 = 1 lambda_2 = trees[1].branchLength/trees[0].branchLength # NB: this wont work in the pop version from random import expovariate as Exp p = 0 ; hudson_pos = [] for i in xrange(noSNPs/2): hudson_pos.append(p) p += Exp(lambda_1) for i in xrange(noSNPs/2): p += Exp(lambda_2) hudson_pos.append(p) mcvean_pos = [int(p*100) for p in hudson_pos] for i in xrange(1,len(mcvean_pos)): if mcvean_pos[i-1] == mcvean_pos[i]: mcvean_pos[i] += 1 seqH = open('/tmp/'+host+'-seqH.txt','w') seqM = open('/tmp/'+host+'-seqM.txt','w') print >> seqH, noSeqs, noSNPs print >> seqM, noSeqs, noSNPs, 1 li_pos = [p/hudson_pos[noSNPs-1] for p in hudson_pos] for i in xrange(noSNPs): print >> seqH, li_pos[i], print >> seqH print >> seqH, 'label', for q in xrange(noSNPs): print >> seqH, '?', i = 0 for seq in seqs: print >> seqM, '>', i print >> seqM, ''.join([str(a) for a in seq]) print >> seqH, i, seq i += 1 seqM.close() seqH.close() locs = open('/tmp/'+host+'-locs','w') print >> locs, noSNPs, mcvean_pos[noSNPs-1]+100, 'L' print >> locs, ' '.join([str(mcvean_pos[i]+1) for i in xrange(noSNPs)]) locs.close() mcveanRho, fourGamete, heyRho, mcveanSign, r2Sign = mcvean() hudson_est1= hudson() print cb.k, cb.time, recombType(t1,t2), incompatible(seqs), print hudson_est1, mcveanSign, r2Sign sampleNo += 1 if sampleNo >= noSamples: break except reject: continue