User Tools

Site Tools


tutorials:perl_examples

Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revisionPrevious revision
Next revision
Previous revision
tutorials:perl_examples [2010/08/03 02:21] chkuotutorials:perl_examples [2017/04/12 17:27] (current) chkuo
Line 1: Line 1:
-====== Perl Examples ======+====== Perl examples ======
  
-  * Find empty files in a directory: [[tutorial:perl:find_empty_file.2.pl]] +  * Find empty files in a directory: [[tutorials:perl:find_empty_file.2.pl]] 
-  * Unwrap sequences in fasta files: [[tutorial:perl:unwrap_seq_fasta.1.pl]] +  * Rename files based on regex: [[tutorials:perl:rename_file_by_regex.3.pl]]
-  * Find homopolymeric regions: [[tutorial:perl:find_homopolymer.3.pl]]+
  
 +  * Generate command scripts to run blast+: [[tutorials:perl:cmd_blast+.1.pl]]
 +  * Execute command scripts: [[tutorials:perl:execute.3.pl]]
 +  * Parse blast+ results, 1 hit per line: [[tutorials:perl:parse_blast_sim.6.pl]]
 +  * Parse blast+ results, 1 HSP per line: [[tutorials:perl:parse_blast_hsp.4.pl]]
  
-===== Generate Command Scripts for Running Blast+ ===== +  * Unwrap sequences in fasta files: [[tutorials:perl:unwrap_seq_fasta.1.pl]] 
-<code perl> +  * Find homopolymeric regions: [[tutorials:perl:find_homopolymer.3.pl]] 
-#!/usr/bin/perl -w+  * Correct sequence orientation in a fasta file: [[tutorials:perl:correct_fasta_orientation.1.pl]] 
 +  * Trim sequence based on lucy: [[tutorials:perl:trim_lucy.2.pl]] 
 +  * Trim sequence based on regex: [[tutorials:perl:trim_seq_by_regex.1.pl]]
  
-my $script_name = 'cmd_blast+.1.pl'; 
- 
-# Chih-Horng Kuo  
-# generate commands for running NCBI blast+ 
-# v1 2010/07/13 
- 
-use strict; 
-use warnings; 
- 
-use Getopt::Long; 
-use File::Basename; 
- 
-my $exe; 
-my $in_dir; 
-my $out_dir; 
-my $sh_dir; 
-my $in_file_ext; 
-my $out_file_ext; 
-my $sh_prefix; 
-my $opt; 
-my $n_job; 
-my $debug; 
- 
-GetOptions( 
-    "exe=s"          => \$exe, 
-    "in_dir=s"       => \$in_dir, 
-    "out_dir=s"      => \$out_dir, 
-    "sh_dir=s"       => \$sh_dir, 
-    "in_file_ext=s"  => \$in_file_ext, 
-    "out_file_ext=s" => \$out_file_ext, 
-    "sh_prefix=s"    => \$sh_prefix, 
-    "opt=s"          => \$opt, 
-    "n_job=i"        => \$n_job, 
-    "debug=i"        => \$debug, 
-); 
- 
-$exe          = $exe          ? $exe          : '/usr/local/blast+/bin/blastn'; 
-$in_file_ext  = $in_file_ext  ? $in_file_ext  : 'fasta'; 
-$out_file_ext = $out_file_ext ? $out_file_ext : 'blast'; 
-$sh_prefix    = $sh_prefix    ? $sh_prefix    : 'job'; 
-$n_job        = $n_job        ? $n_job        : '1'; 
- 
-system "mkdir -p $out_dir" unless -e $out_dir; 
-system "mkdir -p $sh_dir"  unless -e $sh_dir; 
- 
-my $count = 0; 
-my %job_id_HoA;    # key = job_id, value = array of file_id 
-opendir( DIR, $in_dir ) or die "can't open $in_dir: $!\n"; 
-while ( defined( my $in_file = readdir(DIR) ) ) { 
-    if ( $in_file =~ /(\S+)\.$in_file_ext$/ ) { 
-        my $job_id = ( $count % $n_job ) + 1; 
-        push @{ $job_id_HoA{$job_id} }, $1; 
-        $count++; 
-    } 
-} 
-closedir(DIR); 
- 
-# generate job .sh 
-foreach my $job_id ( sort keys %job_id_HoA ) { 
-    my $sh_file = $sh_dir . $sh_prefix . $job_id . '.sh'; 
-    open OUT, ">$sh_file" or die "Can't open output file $sh_file: $!\n"; 
- 
-    # shell 
-    print OUT '#!/bin/bash', "\n"; 
- 
-    foreach my $file_id ( @{ $job_id_HoA{$job_id} } ) { 
-        my $in_file = $in_dir . $file_id . '.' . $in_file_ext; 
-        my $out_file = $out_dir . $file_id . '.' . $out_file_ext; 
- 
-        print OUT "$exe -query $in_file -out $out_file"; 
-        if ($opt) { 
-            print OUT " $opt"; 
-        } 
-        print OUT "\n"; 
-    } 
- 
-    close OUT; 
-    system "chmod +x $sh_file"; 
-} 
- 
-if ($debug) { 
-} 
- 
-exit(0); 
-</code> 
- 
-===== Execute Command Scripts ===== 
-<code perl> 
-#!/usr/bin/perl -w 
- 
-my $script_name = 'execute.3.pl'; 
- 
-# Chih-Horng Kuo <chkuo@lifedev.org> 
-# execute all .sh in the in_dir 
-# v3 2010/02/04 
-#   style change 
-# v2 2009/06/18 
-# v1 2006/05/03 
- 
-use strict; 
-use warnings; 
- 
-use Getopt::Long; 
- 
-my $in_dir; 
-my $exe_dir; 
-my $in_file_ext; 
-my $batch_file_ext; 
-my $log_file_ext; 
-my $prefix;    # prefix of batch files 
-my $n_job;     # split into n batch files 
-my $debug; 
- 
-GetOptions( 
-    "in_dir=s"         => \$in_dir, 
-    "exe_dir=s"        => \$exe_dir, 
-    "in_file_ext=s"    => \$in_file_ext, 
-    "batch_file_ext=s" => \$batch_file_ext, 
-    "log_file_ext=s"   => \$log_file_ext, 
-    "prefix=s"         => \$prefix, 
-    "n_job=i"          => \$n_job, 
-    "debug=i"          => \$debug, 
-); 
-$prefix         = $prefix         ? $prefix         : 'job'; 
-$in_file_ext    = $in_file_ext    ? $in_file_ext    : 'sh'; 
-$batch_file_ext = $batch_file_ext ? $batch_file_ext : 'sh'; 
-$log_file_ext   = $log_file_ext   ? $log_file_ext   : 'log'; 
- 
-system "mkdir -p $exe_dir" unless -e $exe_dir; 
- 
-my %job_id_HoA;    # key = job_id, value = array of file_id 
-my $count = 0; 
-opendir( DIR, $in_dir ) or die "can't open $in_dir: $!"; 
-while ( defined( my $in_file = readdir(DIR) ) ) { 
-    if ( $in_file =~ /(\S+)\.$in_file_ext$/ ) { 
-        my $job_id = ( $count % $n_job ) + 1; 
-        push @{ $job_id_HoA{$job_id} }, $1; 
-        $count++; 
-    } 
-} 
-closedir(DIR); 
- 
-foreach my $job_id ( sort keys %job_id_HoA ) { 
-    my $batch_file = $exe_dir . $prefix . $job_id . '.' . $batch_file_ext; 
-    my $log_file   = $exe_dir . $prefix . $job_id . '.' . $log_file_ext; 
-    open OUT, ">$batch_file" or die "Can't open output file $batch_file: $!\n"; 
- 
-    # shell 
-    print OUT '#!/bin/bash', "\n"; 
-    foreach my $file_id ( @{ $job_id_HoA{$job_id} } ) { 
-        print OUT "$in_dir$file_id\.$in_file_ext\n"; 
-    } 
- 
-    close OUT; 
-    system "chmod +x $batch_file"; 
-    system "$batch_file > $log_file 2>&1 &"; 
-    print "command: $batch_file > $log_file 2>&1 &\n"; 
-} 
- 
-exit(0); 
-</code> 
  
  
tutorials/perl_examples.1280773307.txt.gz · Last modified: 2010/08/03 02:21 by chkuo