tutorials:perl:unwrap_seq_fasta.1.pl
- unwrap_seq_fasta.1.pl
#!/usr/bin/perl -w my $script_name = 'unwrap_seq_fasta.1.pl'; # Chih-Horng Kuo <chkuo@lifedev.org> # remove extra line-breaks (in the sequences) in fasta files # v1 2010/03/04 use strict; use warnings; use Getopt::Long; use File::Basename; my $in_dir; my $in_file_ext; my $out_dir; my $out_file_ext; my $verbose; my $debug; GetOptions( "in_dir=s" => \$in_dir, "in_file_ext=s" => \$in_file_ext, "out_dir=s" => \$out_dir, "out_file_ext=s" => \$out_file_ext, "verbose=i" => \$verbose, "debug=i" => \$debug, ); system "mkdir -p $out_dir" unless -e $out_dir; $in_file_ext = $in_file_ext ? $in_file_ext : 'fasta'; $out_file_ext = $out_file_ext ? $out_file_ext : 'fasta'; my $count_file = 0; opendir( DIR, $in_dir ) or die "can't open $in_dir: $!\n"; while ( defined( my $in_file = readdir(DIR) ) ) { if ( $in_file =~ /^(\S+)\.$in_file_ext$/ ) { my $file_id = $1; my $count_seq = 0; $count_file++; $in_file = $in_dir . $in_file; my $out_file = $out_dir . $file_id . '.' . $out_file_ext; open OUT, ">$out_file" or die "Can't open output file $out_file\n"; { # redefine the record separator local $/ = ">"; open IN, "<$in_file" or die "Can't open input file $in_file: $!\n"; my $in_line = <IN>; # toss the first record, which only consists of ">" while ( $in_line = <IN> ) { chomp $in_line; my ( $seq_name, $seq ) = split( /\n/, $in_line, 2 ); $seq =~ tr/ \t\n\r//d; # Remove whitespace $count_seq++; print OUT "\>$seq_name\n$seq\n"; } close IN; } close OUT; if ($verbose) { print "file_id = $file_id, count_seq = $count_seq\n"; } } } closedir(DIR); exit(0);
tutorials/perl/unwrap_seq_fasta.1.pl.txt · Last modified: 2012/06/15 00:31 by chkuo