Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
albertwcheng committed Nov 18, 2010
0 parents commit 72a030b
Show file tree
Hide file tree
Showing 180 changed files with 29,858 additions and 0 deletions.
95 changes: 95 additions & 0 deletions AudicClaverieStatInterface.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/python

'''
Copyright 2010 Wu Albert Cheng <[email protected]>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
'''

import popen2
from sys import *

def AudicClaverieStatInPlace(XYN1N2):
r,w,e=popen2.popen3('AudicClaverieStat -interactive')
linput=len(XYN1N2)
drow=0
for x,y,n1,n2 in XYN1N2:
drow+=1
#if drow%1000==1:
#print >> stderr,"feeding in data",drow,"of",linput,x,y,n1,n2 ,"...",
w.write(str(x)+","+str(y)+","+str(n1)+","+str(n2)+"\n")
#if drow%1000==1:
#print >> stderr,"donefeed"
w.close()

lines=r.readlines()
e.close()
r.close()

loutput=len(lines)

if loutput!=linput:
print >> stderr,"error:inconsistent input line number with output line number"
return

for i in range(0,loutput):

xyn1n2=XYN1N2[i]
lin=lines[i]

fields=lin.split("\t")
x,y,n1,n2=xyn1n2
if int(fields[0])!=x:
print >> stderr,"error inconsistent input value with check value"
return
if int(fields[1])!=y:
print >> stderr,"error inconsistent input value with check value"
return
if int(fields[2])!=n1:
print >> stderr,"error inconsistent input value with check value"
return
if int(fields[3])!=n2:
print >> stderr,"error inconsistent input value with check value"
return

xyn1n2.extend([float(fields[4]),float(fields[5])])



if __name__=='__main__':
print >> stderr,"testing AudicClaverieStatInterface from python"
XYN1N2=[]
XYN1N2.append([1,14,30,27])
XYN1N2.append([1,4,30,27])
XYN1N2.append([100,100,30,27])
XYN1N2.append([100,110,30,27])
XYN1N2.append([100,120,30,27])
XYN1N2.append([100,130,30,27])
XYN1N2.append([100,140,30,27])
XYN1N2.append([100,12440,30,27])
XYN1N2.append([0,0,28,25])
XYN1N2.append([14,34,25,28])
for i in range(0,7000):
XYN1N2.append([368, 843,25, 28 ])
AudicClaverieStatInPlace(XYN1N2)
print >> stderr, XYN1N2
77 changes: 77 additions & 0 deletions BedSeqUtil.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/usr/bin/python
'''
For piping between bedSeq program from Python scripts
'''

def toStrArray(L):
L2=[]
for x in L:
L2.append(str(x))

return L2


from subprocess import *

from sys import *

defaultBedSeqCommand=["bedSeq","","/dev/stdin","bed"]

class BedSeqClient:
child_stdin=None
child_stderr=None
child_stdout=None
def __init__(self,seqDir,bedType,extraParams=None,bedSeqProgramName='bedSeq',inputFileName='/dev/stdin'):
param=[bedSeqProgramName,seqDir,inputFileName,bedType,"--print-OK"]
if extraParams:
param.extend(extraParams)

p=Popen(" ".join(param), shell=True, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
(self.child_stdin,self.child_stdout,self.child_stderr)=(p.stdin,p.stdout,p.stderr)

def getBedSeq(self,bedentry):
if type(bedentry).__name__=="list":
bedentry="\t".join(toStrArray(bedentry))

print >> self.child_stdin,bedentry

error=self.child_stderr.readline().strip()

if error=="OK":
result=self.child_stdout.readline().strip()
return result
else:
raise ValueError

def getSeq(self,bedentry):
result=self.getBedSeq(bedentry)
fields=result.split("\t")
return fields[-1]

def close(self):
self.child_stdin.close()
self.child_stdout.close()
self.child_stderr.close()



if __name__=='__main__':
programName=argv[0]
args=argv[1:]
try:
seqDir,=args
except:
print >> stderr,programName,"seqDir","> ofile"
exit()

tries=["chr10\t100225000\t100225200","chr","chr12\t11125235\t11125256"]
bedSeqClient=BedSeqClient(seqDir,"bed")
for tri in tries:
try:
print >> stdout,bedSeqClient.getBedSeq(tri)
except:
pass
bedSeqClient.close()

107 changes: 107 additions & 0 deletions ConvertBioBaseToTab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
#!/usr/bin/python

'''
Copyright 2010 Wu Albert Cheng <[email protected]>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
'''


from sys import *
from math import log
#takes in Biobase matrix and background frequency converts into logodd for MEME suite


alphMap={"A":0,"C":1,"G":2,"T":3}
indxMap={0:"A",1:"C",2:"G",3:"T"}

def getRawBiobaseMatrix(filename):
fil=open(filename)
lino=0

headerMap=[]
mat=[]
#turn into matrix of
#[ [A,C,G,T] per row ]
for lin in fil:
lino+=1
lin=lin.strip()
fields=lin.split()
for i in range(0,4):
fields[i]=fields[i].strip()

if lino==1:
for i in range(0,4):

print >> stderr,"dst=",fields[i]
dst=alphMap[fields[i]]
headerMap.append(dst)

print >> stderr, "headerMap:",headerMap
else:

row=[0,0,0,0]
fields=fields[0:len(headerMap)]
print >> stderr,fields
for field,dst in zip(fields,headerMap):
val=int(field)
row[dst]=val
print >> stderr,indxMap[dst]+":"+field
mat.append(row)

fil.close()

#now normalize to one
return mat



def printUsageAndExit(programName):
print >> stderr,programName,"biobase > output"
exit()

if __name__=="__main__":
programName=argv[0]
args=argv[1:]

try:
biobasefile,=args
except:
printUsageAndExit(programName)

mat=getRawBiobaseMatrix(biobasefile)

lmatrix=len(mat)
hmatrix=len(mat[0])
for i in range(0,hmatrix):
alpha=indxMap[i]
output=[alpha]
for j in range(0,lmatrix):
output.append(str(mat[j][i]))

print >> stdout,"\t".join(output)





Loading

0 comments on commit 72a030b

Please sign in to comment.