tutorials:perl:parse_blast_hsp.4.pl
- parse_blast_hsp.4.pl
#!/usr/bin/perl my $script_name = 'parse_blast_hsp.4.pl'; # parse blast results # output = parsed format for sequence similarity, 1 hsp per line # v4 2010/09/20 # bug fix: hsp length (should be 'total', not 'query') # output format change # v3 2010/08/05 # add strand info # v2 2010/07/15 # style change # change output format # v1 2007/04/16 use strict; use warnings; use Getopt::Long; use Bio::SearchIO; my $in_file; my $out_file; my $format; my $print_seq; my $debug; GetOptions( "in_file=s" => \$in_file, "out_file=s" => \$out_file, "format=s" => \$format, "print_seq=i" => \$print_seq, "debug=i" => \$debug, ); $format = $format ? $format : 'blastxml'; my $searchio = new Bio::SearchIO( -format => "$format", -file => "$in_file", ); # open output file open OUT, ">$out_file" or die "Can't open output file $out_file\n"; while ( my $result = $searchio->next_result ) { my $query_id = $result->query_description; my $query_length = $result->query_length; while ( my $hit = $result->next_hit ) { my $subject_id = $hit->name; my $subject_length = $hit->length; while ( my $hsp = $hit->next_hsp ) { my $evalue = sprintf "%.2e", ( $hsp->evalue ); my $query_start = $hsp->start('query'); my $query_end = $hsp->end('query'); my $query_strand = $hsp->strand('query'); my $subject_start = $hsp->start('hit'); my $subject_end = $hsp->end('hit'); my $subject_strand = $hsp->strand('hit'); my $num_identical_HSP = $hsp->num_identical; my $num_conserved_HSP = $hsp->num_conserved; my $num_gap_HSP = $hsp->gaps; my $length_HSP_query = $hsp->length('query'); my $length_HSP_subject = $hsp->length('hit'); my $length_HSP_total = $hsp->length('total'); my $frac_HSP_query = sprintf "%.3f", ( $length_HSP_query / $query_length ); my $frac_HSP_subject = sprintf "%.3f", ( $length_HSP_subject / $subject_length ); my $frac_identical = sprintf "%.3f", ( $num_identical_HSP / $length_HSP_total ); my $frac_conserved = sprintf "%.3f", ( $num_conserved_HSP / $length_HSP_total ); my $frac_gap = sprintf "%.3f", ( $num_gap_HSP / $length_HSP_total ); print OUT "$query_id", # 0 "\t$query_length", # 1 "\t$subject_id", # 2 "\t$subject_length", # 3 "\t$evalue", # 4 "\t$query_start", # 5 "\t$query_end", # 6 "\t$query_strand", # 7 "\t$frac_HSP_query", # 8 "\t$subject_start", # 9 "\t$subject_end", # 10 "\t$subject_strand", # 11 "\t$frac_HSP_subject", # 12 "\t$length_HSP_total", # 13 "\t$num_identical_HSP", # 14 "\t$frac_identical", # 15 "\t$num_conserved_HSP", # 16 "\t$frac_conserved", # 17 "\t$num_gap_HSP", # 18 "\t$frac_gap"; # 19 if ($print_seq) { print OUT "\t", $hsp->query_string; # 20 print OUT "\t", $hsp->hit_string; # 21 } print OUT "\n"; } # while hsp } # while subject } # while query close(OUT); exit(0);
tutorials/perl/parse_blast_hsp.4.pl.txt · Last modified: 2012/06/15 00:31 by chkuo