User Tools

Site Tools


tutorials:perl:display_codon_2.pl
display_codon_2.pl
# display_codon_2.pl
# Read a fasta file that contain protein-coding sequences. 
# Re-format the sequences to show codons (10 codons per line) 
# in the output file.
# this version uses regular expression to obtain the codons
my $in_file = shift;
my $out_file = shift;
 
my $seq_hash; # key = seq_name, value = seq;
{
	# redefine the record separator
	local $/ = ">";
	open IN, "<$in_file";
	my $in_line = <IN>; # toss the first record
	while ( $in_line = <IN> ) {
		chomp $in_line; # remove the ">" character in the end 
		my ( $seq_name, $seq ) = split( /\n/, $in_line, 2 );
		$seq =~ tr/ \t\n\r//d;    # Remove whitespace
		$seq_hash{$seq_name} = uc $seq;
	}
	close IN;
}
 
open OUT, ">$out_file";
foreach my $seq_name ( sort keys %seq_hash ) {
	if ( ( length $seq_hash{$seq_name} ) % 3 == 0 ) {
		print OUT ">$seq_name\n";
		my @codons = $seq_hash{$seq_name} =~ /(.{3})/g;
		my $count_codon = 0;
		foreach my $codon (@codons) {
			$count_codon++;
			if ( $count_codon % 10 == 0 ) {
				print OUT "$codon\n";
			}
			else {
				print OUT "$codon ";
			}
		}
		if ( $count_codon % 10 != 0 ) {
			print OUT "\n";
		}
	}
	else {
		warn "$seq_name length = length $seq_hash{$seq_name}!\n";
	}
}
close OUT;
tutorials/perl/display_codon_2.pl.txt · Last modified: 2012/06/15 00:38 by chkuo