KEGG annotation pipeline

KEGG Pathway Pipeline:

blastall -p blastp -d KEGG -i Haiyan.Pep.fasta -m 7 -a 10 -o Haiyan.Pep.fasta.blastp.m7 &
./tBLASTnParser.pl Haiyan.Pep.fasta.blastp.m7 Haiyan.Pep.fasta.blastp.m8
sed ‘1,1d’ Haiyan.Pep.fasta.blastp.m8 > Haiyan.Pep.fasta.blastp.m8.delhead

/home/zhouzh/lib/454-2.5/bin/runAssembly -m -cpu 16 -cdna -nobig -o Test sff/GV1NGBM02.sff

./draw_png.py -i ACYPIprot.KO.file -p /home/shenzy/KEGG/ko_org -o map_result5

step 1:
/home/soft/blast-2.2.23/bin/blastall -p blastp -d KEGG -i MBL_relation.fa -a 15 -b 30 -v 30 -m 7 -FF -o MBL_relation.fa.blastp2.m7 &

step2:
/home/shenzy/work_python_script_bak/tBLASTnParser.pl MBL_relation.fa. 2.m7 MBL_relation.fa.blastp2.m8

sed -e ‘1d’ G_seq_fkegg_Mix4.blastp.m8.result > G_seq_fkegg_Mix4.blastp.m8.result.nohead

./handle_KEGG_blast.py -i MBL_relation.fa.blastp2.m8 -j ../ko_gene -g anno_file2 -s anno_file_status2 > KO_list_file2

step3:
handle anno_file_status !!!!!!!!! not ko_list_file !! (must del BR:ko04091: …… and PATH:…..)

./draw_png2.py -i MBL2.KOFILE -p /home/shenzy/KEGG/ko_org/ -o MBLkeggMAP.result2

/home/soft/velvet_1.0.19/shuffleSequences_fastq.pl lane3_1209.read2.fq.t10l40.bowtie.file lane3_1209.read1.fq.t10l40.bowtie.file lane3_1209.t10l40.bowtie.pe12.fq
/home/soft/fastx_toolkit-0.0.13/src/fastq_to_fasta/fastq_to_fasta -n -i lane3_1209.t10l40.bowtie.pe12.fq -o lane3_1209.t10l40.bowtie.pe12.fa
cat lane3_1209.t10l40.bowtie.pe12.fa lane3_read12.fa lane4_read12.fa s_2_pe12.fasta > s_2343_pe12.fasta

blastall -p blastp -d ../KEGG -i AphisVelvet.pep -a 15 -b 30 -v 30 -m 7 -FF -o AphisVelvet.pep.blastp.result2 &:q!:q!
QueryName HSP QueryLength SubjctLength QueryAlignment SubjctAlignment Annotation Score BitScore EValue IdentityRate QueryFrame QueryStart QueryEnd SubjectFrame SubjectStart SubjectEnd

cdhit-cluster-consensus 1.GAC.454Reads.fna.cluster.clstr 1.GAC.454Reads.fna cdhit.result &[2] 17965
…read 379519 clusters from file “1.GAC.454Reads.fna.cluster.clstr”000 lines
read 737073 sequences from file “1.GAC.454Reads.fna”
write 5000 singleton clusters
write 293650 singleton clustersCDNA$ write 6000 singleton clusters
finish 85869 clusters out of 85869 non-singleton clusters

ACYPI000002-PA RefSeq peptide NP_001119607 gi|187097094|ref|NP_001119607.1 sucrase [Acyrthosiphon pisum] 1 590 588 95.76 97.45 dme:Dmel_CG8690 CG8690 gene product from transcript CG8690-RA (EC:3.2.1.20); K01187 alpha-glucosidase [EC:3.2.1.20] 1255 488.034 7.63337e-136 46.34 1 19 583 1 15 587

######################################
Kegg results have protein name deb:DehaBAV1_0078, we can get the related information from COG.mappings.v8.3.txt
######################################

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/lla/gene_annotation$ more 11aRayScalf_all.fna.cds.faa.blastp_kegg.m8.top1
QueryName HSP QueryLength SubjctLength QueryAlignment SubjctAlignment Annotation Score BitScore EValue IdentityRate QueryFrame QueryStar
t QueryEnd SubjectFrame SubjectStart SubjectEnd
11aRayScalf10001 87 87 734 648 648 0.891375 0.891375 3 D 3 (translation) 1 215 319 100.00 67.40 deb:DehaBAV1_0078 phage integrase family protein 1
085 422.55 5.04278e-117 94.42 1 1 215 1 66 280

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/lla/gene_annotation$ grep “DehaBAV1_0078″ COG.mappings.v8.3.txt
216389.DehaBAV1_0078 32 296 COG4974 Phage integrase family protein

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/lla/gene_annotation/kegg$ ./draw_png_good.py -i anno_file_status -p /winxp_disk2/shenzy/KEGG/img/ -o test.out

#################################################################################################################

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/MB/gene_annotation/kegg$ blastall -p blastp -d kegg-Prokaryotes -i 47_acc_num_xiaoying.txt.fasta -a 15 -b 30 -v 30 -m 7 -FF -o 47_acc_num_xiaoying.txt.fasta.blastp.m7 &

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/MB/gene_annotation$ handle_KEGG_blast.py -i MBrayScalfALL.fna.cds.faa.blasp.kegg-P.m8.nohead -j ko -g anno_file2 -s anno_file_status2 > KO_list_file2

shenzy@shenzy-ubuntu:/winxp_disk2/shenzy/BGI/MB/gene_annotation/kegg$ handle_KEGG_blast_filterzero.py -i 2548N_stat.siggenes_102030min.filter.protein.fasta.blastp.m8.nohead -j ko.pep.fasta -g 162_anno_file -s 162_anno_file_status > 162_KO_list_file &
#################################################################################################################

blastall -p blastp -d kegg-Prokaryotes -i 176_protein.fasta -a 15 -b 30 -v 30 -m 7 -FF -o 176_acc_relation.fa.blastp.m7 &

handle_KEGG_blast.py -i 176_acc_relation.fa.blastp.m8.nohead -j ko -g 176_anno_file -s 176_anno_file_status > 176_KO_list_file &

draw_png_good.py -i 176_anno_file_status -p img -o 176_kegg_results &

Leave a Reply

  

  

  

You can use these HTML tags

<a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>