#! /usr/bin/env python2.7
desc = """
Split an ACE format assembly file into multiple single ACE files each with a
single contig. The resulting ACE contig files are formatted so that they can
be read by CodonCode (http://www.codoncode.com).
$ ACEAssemblySplitter.py test.ace -s 2 -e 3
- prints the second and third contigs in the test.ace file
Version 1.0
Cymon J. Cox
Nov 2011
"""
import sys
try:
import argparse
except ImportError:
print "Requires Python >= 2.7."
sys.exit()
import os
import argparse
import textwrap
import subprocess
def count(filename, verbose=True):
cmd = "grep -c '^CO ' %s" % filename
child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
stdout, stderr = child.communicate()
if not child.returncode == 0:
print "grep returned error %s" % child.returncode
print "stderr: %s\n" %stderr
sys.exit(1)
else:
if verbose:
print "Number of contigs in %s: %s" % (filename, stdout)
return int(stdout)
def main(filename, count_only, start, end, verbose):
if not os.path.exists(filename):
print "Cannot find file %s" % filename
sys.exit(1)
if count_only:
count(filename)
sys.exit(0)
else:
if start or end:
n_contigs = count(filename, verbose=False)
if start and end:
if start > end:
msg = "Cannot start at %i and end at %i"
print msg (start, end)
sys.exit(1)
if start > n_contigs:
msg = "Only %i contigs in %s - cannot start at %i"
print msg % (n_contigs, filename, start)
sys.exit(1)
if end > n_contigs:
msg = "Only %i contigs in %s - cannot end at %i"
print msg % (n_contigs, filename, end)
sys.exit(1)
if not end:
end = n_contigs
isPreviousAF = False
inRT = False
inCT = False
skipNextLine = False
inHeader = True
foundStart = False
lines = []
counter = 1
fileLines = open(filename).readlines()
for line in fileLines:
if line.startswith("CO "):
if lines != []:
if inHeader:
#Skip over header:
lines = []
inHeader = False
lines.append(line)
continue
#Are we there yet?
if not foundStart:
if counter == start:
foundStart = True
else:
lines = []
counter +=1
lines.append(line)
continue
if foundStart:
if counter < end:
#CO FucusALL_rep_c1 1819 1428 94 U
name = lines[0].split()[1]
outfilename = "%s.ace" % name
head = "AS 1 1\n\n"
lines.insert(0, head)
fh = open(outfilename, "w")
fh.writelines(lines)
fh.close()
counter +=1
lines = []
else:
break
lines.append(line)
continue
if skipNextLine:
skipNextLine = False
continue
#Delete CT and RT stanzas
elif line.startswith("RT{"):
inRT = True
continue
elif line.startswith("CT{"):
inCT = True
elif line.startswith("}"):
#Both CT and RT finish with a C}
inCT = inRT = False
#Need to miss the next "\n" line
skipNextLine = True
continue
elif inCT or inRT:
continue
#Delete the blank line after AF line
elif line.startswith("AF"):
isPreviousAF = True
lines.append(line)
elif line.startswith("\n") and isPreviousAF:
isPreviousAF = False
continue
elif line.startswith("RD "):
#delete previous "\n" line
lines = lines[:-1]
lines.append(line)
elif line.startswith("DS "):
#delete previous "\n" line
lines = lines[:-1]
lines.append(line)
else:
lines.append(line)
#print the last one:
name = lines[0].split()[1]
outfilename = "%s.ace" % name
head = "AS 1 1\n\n"
lines.insert(0, head)
fh = open(outfilename, "w")
fh.writelines(lines)
fh.close()
counter +=1
if verbose:
print "Done. Split %i Contigs." % counter
if __name__ == "__main__":
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent(desc),
)
parser.add_argument("filename",
help="ACE assembly filename",
)
parser.add_argument("-c", "--count", default=False, action='store_true',
help="Count number of contigs in assembly only. Default=False")
parser.add_argument("-s", "--start", default=1, type=int, dest="start",
help="Start at record number N (integer). Default=1")
parser.add_argument("-e", "--end", default=None, type=int, dest="end",
help="End at record number N (integer). Default=last in file")
parser.add_argument("-v", "--verbose", default=False, action='store_true',
help="Verbose. Default=False")
args = parser.parse_args()
main(args.filename, args.count, args.start, args.end, args.verbose)
Recent Comments