User Tools

Site Tools


tutorials:perl:unwrap_seq_fasta.1.pl
unwrap_seq_fasta.1.pl
#!/usr/bin/perl -w
 
my $script_name = 'unwrap_seq_fasta.1.pl';
 
# Chih-Horng Kuo <chkuo@lifedev.org>
# remove extra line-breaks (in the sequences) in fasta files
# v1 2010/03/04
 
use strict;
use warnings;
 
use Getopt::Long;
use File::Basename;
 
my $in_dir;
my $in_file_ext;
my $out_dir;
my $out_file_ext;
my $verbose;
my $debug;
 
GetOptions(
    "in_dir=s"            => \$in_dir,
    "in_file_ext=s"       => \$in_file_ext,
    "out_dir=s"           => \$out_dir,
    "out_file_ext=s"      => \$out_file_ext,
    "verbose=i"           => \$verbose,
    "debug=i"             => \$debug,
);
 
system "mkdir -p $out_dir" unless -e $out_dir;
$in_file_ext  = $in_file_ext  ? $in_file_ext  : 'fasta';
$out_file_ext = $out_file_ext ? $out_file_ext : 'fasta';
 
my $count_file = 0;
opendir( DIR, $in_dir ) or die "can't open $in_dir: $!\n";
while ( defined( my $in_file = readdir(DIR) ) ) {
    if ( $in_file =~ /^(\S+)\.$in_file_ext$/ ) {
        my $file_id = $1;
        my $count_seq = 0;
        $count_file++;
 
        $in_file = $in_dir . $in_file;
        my $out_file = $out_dir . $file_id . '.' . $out_file_ext;
        open OUT, ">$out_file" or die "Can't open output file $out_file\n";
        {
            # redefine the record separator
            local $/ = ">";
            open IN, "<$in_file" or die "Can't open input file $in_file: $!\n";
            my $in_line = <IN>; # toss the first record, which only consists of ">"
            while ( $in_line = <IN> ) {
                chomp $in_line;
                my ( $seq_name, $seq ) = split( /\n/, $in_line, 2 );
                $seq =~ tr/ \t\n\r//d;    # Remove whitespace
                $count_seq++;
 
                print OUT "\>$seq_name\n$seq\n";
            }
            close IN;
        }
        close OUT;
 
        if ($verbose) {
            print "file_id = $file_id, count_seq = $count_seq\n";
        }
 
    }
}
closedir(DIR);
 
 
exit(0);
tutorials/perl/unwrap_seq_fasta.1.pl.txt · Last modified: 2012/06/15 00:31 by chkuo