Using python and this GFF parser that mimics Biopython’s SeqIO parsers:
from BCBio import GFF
# Read the gff
for seq in GFF.parse('my_file.gff'):
# only focus on the CDSs
for feat in filter(lambda x: x.type == 'CDS',
seq.features):
# extract the locus tag
locus_tag = feat.qualifiers.get('locus_tag',
['unspecified'])[0]
# extract the sequence
dna_seq = seq[......]
Recent Comments