User Tools

Site Tools


tutorials:perl:correct_fasta_orientation.1.pl
correct_fasta_orientation.1.pl
#!/usr/bin/perl -w
 
my $script_name = 'correct_fasta_orientation.1.pl';
 
# Chih-Horng Kuo <chkuo@lifedev.org>
# correct sequence orientation in a .fasta file
# v1 2010/12/31
 
use strict;
use warnings;
 
use Getopt::Long;
use File::Basename;
 
my $in_file;
my $in_fasta;
my $out_file;
my $verbose;
my $debug;
 
GetOptions(
    "in_file=s"  => \$in_file,
    "in_fasta=s" => \$in_fasta,
    "out_file=s" => \$out_file,
    "verbose=i"  => \$verbose,
    "debug=i"    => \$debug,
);
 
my $out_dir = dirname($out_file);
system "mkdir -p $out_dir"  unless -e $out_dir;
 
my %orientation_hash; # key = seq_name, value = orientation (1 or -1)
open IN, "<$in_file" or die "Can't open input file $in_file: $!\n";
while ( my $in_line = <IN> ) {
	chomp $in_line;
	my ( $seq_name, $orientation ) = split /\t/, $in_line;
	$orientation_hash{$seq_name} = $orientation;
}
close IN;
 
 
{
    open IN, "<$in_fasta" or die "Can't open input file $in_fasta: $!\n";
    open OUT, ">$out_file" or die "Can't open output file $out_file\n";
 
    # redefine the record separator
    local $/ = ">";
    my $in_line = <IN>;    # toss the first record, which only consists of ">"
    while ( my $in_line = <IN> ) {
        chomp $in_line;
        my ( $seq_name, $seq ) = split( /\n/, $in_line, 2 );
        $seq =~ tr/ \t\n\r//d;    # Remove whitespace
 
        if ( $orientation_hash{$seq_name} == 1 ) {
            # do nothing
        }
        elsif ( $orientation_hash{$seq_name} == -1 ) {
            # reverse compliment
            $seq = reverse($seq);
            $seq =~ tr/ACGTacgt/TGCAtgca/;
        }
        else {
            warn "No orientation info for $seq_name!\n";
        }
        print OUT ">$seq_name\n$seq\n";
    }
    close OUT;
    close IN;
}
 
if ($verbose) {
}
 
if ($debug) {
}
 
exit(0);
tutorials/perl/correct_fasta_orientation.1.pl.txt · Last modified: 2012/06/15 00:32 by chkuo