tutorials:perl:display_codon_1.pl
- display_codon_1.pl
# display_codon_1.pl # Read a fasta file that contain protein-coding sequences. # Re-format the sequences to show codons (10 codons per line) # in the output file. # this version uses substr function to get the codons my $in_file = shift; my $out_file = shift; my $seq_hash; # key = seq_name, value = seq; { # redefine the record separator local $/ = ">"; open IN, "<$in_file"; my $in_line = <IN>; # toss the first record while ( $in_line = <IN> ) { chomp $in_line; # remove the ">" character in the end my ( $seq_name, $seq ) = split( /\n/, $in_line, 2 ); $seq =~ tr/ \t\n\r//d; # Remove whitespace $seq_hash{$seq_name} = uc $seq; } close IN; } open OUT, ">$out_file"; foreach my $seq_name ( sort keys %seq_hash ) { if ( ( length $seq_hash{$seq_name} ) % 3 == 0 ) { print OUT ">$seq_name\n"; my $num_codon = ( length $seq_hash{$seq_name} ) / 3; my $count_codon = 0; while ( $count_codon < $num_codon ) { my $codon = substr ( $seq_hash{$seq_name}, $count_codon * 3, 3 ); $count_codon++; if ( $count_codon % 10 == 0 ) { print OUT "$codon\n"; } else { print OUT "$codon "; } } if ( $count_codon % 10 != 0 ) { print OUT "\n"; } } else { warn "$seq_name length = length $seq_hash{$seq_name}!\n"; } } close OUT;
tutorials/perl/display_codon_1.pl.txt · Last modified: 2012/06/15 00:37 by chkuo