Thesis Functions

Perl functions used in thesis scripts.

    1 # USAGE IN SCRIPTS:
    2 #  require "functions.pl";
    3 #  use vars qw( @aas @sorted_aas @aas_and_gaps );
    4 #
    5 # A utility functions file
    6 
    7 use warnings;
    8 use strict;
    9 use Math::Complex;
   10 
   11 # all the aas included in an EMBOSS matrix, and ambiguity codes
   12 my @emboss_aas = ( 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', "B", "Z", "X", "*" );
   13 
   14 # excludes B, Z, X, and *
   15 my @aas = ( 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V' );
   16 
   17 my @sorted_aas = sort( @aas );
   18 
   19 # includes a gap character
   20 my @aa_and_gaps = ( 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', '-');
   21 
   22 my $embossdata = "/Users/ajohnson/familybuild/scripts/.embossdata/";
   23 
   24 ################################################################################
   25 #  UTILITY
   26 ################################################################################
   27 
   28 # trim the whitespace off of the front and end of a string
   29 sub trim
   30 {
   31     chomp( @_ );
   32   map { /^\s*(\S.*\S)\s*$/ } @_;
   33 }
   34 
   35 # given a tab delimited line, returns an array
   36 sub make_fields
   37 {
   38     my $line = shift( @_ );
   39     chomp( $line );
   40     my @line_fields = split( /\t/, $line );
   41     return @line_fields;
   42 }
   43 
   44 # returns a hash reference col name -> value
   45 sub get_fields
   46 {
   47     my $line = shift( @_ );
   48     chomp( $line );
   49     my @line_fields = @_;
   50 
   51     my @fields = split( /\t/, $line );
   52     my %fields;
   53 
   54     foreach my $field_name (@line_fields)
   55     {
   56         $fields{$field_name} = shift( @fields );
   57         #print "$field_name = ".$fields{$field_name} . "\n";
   58     }
   59 
   60     return \%fields;
   61 }
   62 
   63 # makes a line for a tab delineated file
   64 # from a hash in the given order of fields
   65 sub make_line
   66 {
   67     my $hash = shift( @_ );
   68     my @fields = @_;
   69     my @line;
   70 
   71     foreach my $field ( @fields )
   72     {
   73         push( @line, $hash->{$field} );
   74     }
   75 
   76     return join( "\t", @line );
   77 }
   78 
   79 # Prints out each key-value pair of a hash reference
   80 sub print_hash
   81 {
   82     my $hash_ref = shift( @_ );
   83 
   84     while ( my ($key, $value) = each(%$hash_ref) ) {
   85         print "\t'$key' => $value\n";
   86     }
   87 }
   88 
   89 # parses a settings file and returns a hash of setting value pairs
   90 sub get_settings
   91 {
   92     my %settings;
   93 
   94     foreach my $file ( @_ )
   95     {
   96         open( SETTINGS, $file ) || die( "Could not open settings file $file!\n");
   97 
   98         while( my $line = <SETTINGS> )
   99         {
  100             &trim( $line );
  101 
  102             #don't include comments, sections, or empty lines
  103             next if( substr( $line, 0, 1) eq "#" || substr( $line, 0, 1) eq "=" || !$line );
  104 
  105             my @fields = split( "\t", $line );
  106             my $pref_name = shift( @fields );
  107 
  108             if( scalar( @fields ) > 1 )
  109             {
  110                 $settings{$pref_name} = \@fields;
  111             }
  112             elsif( scalar( @fields ) == 1 )
  113             {
  114                 $settings{$pref_name} = shift( @fields );
  115             }
  116         }
  117     }
  118 
  119     return \%settings;
  120 }
  121 
  122 # Checks that a given settings file contains all the settings
  123 # in the given hash from that settings file
  124 sub check_settings
  125 {
  126     my $settings_file = shift( @_ );
  127     my $settings = shift( @_ );
  128 
  129     foreach my $setting( @_ )
  130     {
  131         if( !defined( $settings->{$setting} ) || !$settings->{$setting} )
  132         {
  133             die "$setting is not in $settings_file!\n";
  134         }
  135     }
  136 }
  137 
  138 sub print_settings
  139 {
  140     my $title = shift( @_ );
  141     my $settings = shift( @_ );
  142 
  143     print "$title:\n\n";
  144 
  145     foreach my $setting ( @_ )
  146     {
  147         my $value = $settings->{$setting};
  148 
  149         if( ref($value) eq "ARRAY" )
  150         {
  151             print "\t$setting\t=\t" . join( "\t", @$value ) . "\n";
  152         }
  153         else
  154         {
  155             print "\t$setting\t=\t$value\n";
  156         }
  157     }
  158 
  159     print "\n";
  160 }
  161 
  162 # reads in an entire file at once
  163 sub slurp
  164 {
  165      my $file = shift( @_ );
  166 
  167      my $old_terminator = $/;
  168      undef $/;
  169      open( SLURP, $file ) || warn "Could not open file $file for slurp!\n";
  170      my $slurp = <SLURP>;
  171      close( SLURP );
  172      $/ = $old_terminator;
  173 
  174      return $slurp;
  175 }
  176 
  177 # will submit jobs for all families where the last
  178 # arg to the script being run here is the family folder
  179 #
  180 # Runs in SGE parallel if load is 0
  181 # Runs linearly if load is -1
  182 # Runs multiple jobs at once and keeps the load under the
  183 #   given number otherwise
  184 sub submit_family_jobs
  185 {
  186     my $maindir = shift( @_ );
  187     my $script = shift( @_ );
  188     my $sgename = shift( @_ );
  189     my $sgedir = shift( @_ );
  190     my $load = shift( @_ );
  191     my $wait = 4;
  192     my @args = @_;
  193 
  194     print "Running jobs for $maindir\n";
  195     print "\tScript: $script\n";
  196     print "\tSGEName: $sgename\n";
  197     print "\tSGEDir: $sgedir\n";
  198     print "\tLoad: $load\n";
  199     print "\tArgs: " . join( ", ", @args ) . "\n";
  200 
  201     if( ! -e $sgedir )
  202     {
  203         print "SGEDIR: '$sgedir' does not exist; creating\n\n";
  204         mkpath( $sgedir );
  205     }
  206 
  207     my @commands;
  208     my $parallel = 0;
  209 
  210     if( $load == 0 )
  211     {
  212       print "Load is 0, running in parallel...\n\n";
  213         $parallel = 1;
  214     }
  215 
  216     if( $parallel || $load < 0 )
  217     {
  218         $wait = 1;
  219     }
  220 
  221     my @folders = <$maindir/*.fasta.folder>;
  222 
  223     my $submit_index = 0;
  224     my $job_total = scalar(@folders);
  225 
  226     # run jobs for all folders, in alphabetical order
  227     foreach my $folder (sort(@folders))
  228     {
  229         $folder =~ /([^\/]+)\.fasta\.folder$/;
  230         my $name = $1;
  231 
  232         $submit_index++;
  233 
  234         my $submit_command = "perl $script " .
  235             join( " ", @args ) . "  $folder";
  236 
  237         # submit job through qsub
  238         if( $parallel )
  239         {
  240             print "Performing job $submit_index/$job_total\n";
  241             $submit_command = "qsub -cwd -b y -o $sgedir -e $sgedir -N $sgename\_$name $submit_command";
  242             print "\n$submit_command\n\n";
  243             system( $submit_command );
  244             &check_num_jobs($wait);
  245         }
  246         # add to the linear queue
  247         else
  248         {
  249             push( @commands, $submit_command );
  250         }
  251     }
  252 
  253     # run multiple jobs at once on a single computer
  254     if( ! $parallel && $load > 0 )
  255     {
  256         &job_processor( $load, $wait, @commands );
  257     }
  258     # run one job at a time
  259     else
  260     {
  261         my $run_index = 0;
  262         foreach my $command ( @commands )
  263         {
  264             print "Running $run_index/$job_total: $command\n";
  265             system( $command );
  266             $run_index++;
  267             sleep( 1 );
  268         }
  269     }
  270 }
  271 
  272 # returns when the number of jobs running in qstat
  273 # is less than 80
  274 sub check_num_jobs
  275 {
  276     my( $wait ) = @_;
  277 
  278     sleep($wait);
  279 
  280     # give 20 as leeway
  281   my $max_num_jobs = 80;
  282   my $count = &count_qstat_jobs();
  283 
  284     print "Jobs still running: $count\n";
  285 
  286     while( $count >= $max_num_jobs )
  287     {
  288         sleep($wait+4);
  289         $count = &count_qstat_jobs();
  290     }
  291 }
  292 
  293 # Counts the current qstat jobs running
  294 # for the user
  295 sub count_qstat_jobs
  296 {
  297     my $user = $ENV{"USER"};
  298   my $qstat = `qstat -u $user`;
  299   my @lines = split("\n",$qstat);
  300   @lines = grep { /$user/ } @lines;
  301   my $count = scalar(@lines);
  302   return $count;
  303 }
  304 
  305 # runs jobs on a single machine
  306 sub job_processor
  307 {
  308     my( $max_load, $wait, @commands ) = @_;
  309     # helps identify which jobs are coming from the same processor
  310     my $random_id = int(rand(10000));
  311     my $n_jobs = 0;
  312     # number of seconds to wait when overload
  313   my $load_wait = 45;
  314 
  315     for my $command ( @commands )
  316     {
  317         my $load = &get_load(1);
  318 
  319         # check the current load and wait if it's too much
  320         while( $load > $max_load )
  321         {
  322             print "JP $random_id (job $n_jobs): Load $load above $max_load; waiting $load_wait seconds\n";
  323             sleep $load_wait;
  324             $load = &get_load(1);
  325         }
  326 
  327         print "JP $random_id (job $n_jobs): Starting: $command (load: $load)\n";
  328         print `date`;
  329 
  330         # don't want zombie child processes!
  331         $SIG{CHLD} = 'IGNORE';
  332         my $pid = fork();
  333         $n_jobs++;
  334 
  335         if( $pid == 0 )
  336         {
  337             exec( $command );
  338         }
  339 
  340         print "JP $random_id (job $n_jobs): Waiting $wait seconds before submitting next command\n";
  341 
  342         sleep $wait;
  343     }
  344 }
  345 
  346 # parses the uptime command to get a good
  347 # idea of the current load
  348 sub get_load
  349 {
  350     my $slot = shift( @_ );
  351 
  352     my $uptime = `uptime`;
  353 
  354     #print "Getting slot $slot for uptime: $uptime\n";
  355 
  356     if( $uptime !~ /([0-9.]+), ([0-9.]+), ([0-9.]+)$/ )
  357     {
  358         print "Could not parse uptime string: $uptime\n";
  359         return undef;
  360     }
  361 
  362     #print "$1, $2, $3\n";
  363 
  364     if( $slot == 1 )
  365     {
  366         return $1;
  367     }
  368     elsif( $slot == 5 )
  369     {
  370         return $2;
  371     }
  372     elsif( $slot == 15 )
  373     {
  374         return $3;
  375     }
  376     else
  377     {
  378         return $1;
  379     }
  380 }
  381 
  382 # http://www.perl.com/doc/FAQs/FAQ/oldfaq-html/Q4.13.html
  383 sub round {
  384   return int($_[0] + .5 * ($_[0] <=> 0));
  385 }
  386 
  387 ################################################################################
  388 #  SEQUENCE FIDDLING
  389 ################################################################################
  390 
  391 # unwrap all a fasta record's newlines and make the format
  392 # HEADER\tSEQUENCE
  393 sub fasta_unwrap {
  394     my $text = shift( @_ );
  395 
  396   $text =~ s/\n/\t/;
  397   $text =~ s/\>//;
  398   $text =~ s/\n//g;
  399 
  400     return 0 if !$text;
  401 
  402     my @fields = split( "\t", $text );
  403     my %return = ( "id" => $fields[0], "seq" => $fields[1] );
  404     return \%return;
  405 }
  406 
  407 # will take a string that has multiple fasta sequences in it
  408 # and return an ID->sequence hash of the contents
  409 sub fasta_split {
  410 
  411     my %hash;
  412 
  413     foreach my $sequences ( @_ )
  414     {
  415         my @sequences = split(/\>/, $sequences);
  416         &trim( @sequences );
  417 
  418         foreach my $sequence ( @sequences )
  419         {
  420             next if( !$sequence );
  421             my @lines = split( /\n/, $sequence );
  422             &trim( @lines );
  423             my $id = shift( @lines );
  424             $hash{$id} = join( "", @lines );
  425         }
  426     }
  427 
  428     return \%hash;
  429 }
  430 
  431 # figures out the first position in an alignment
  432 # where all sequences have started
  433 sub align_start {
  434 
  435     my ( $alignments, $length ) = shift( @_ );
  436 
  437     if( !$length )
  438     {
  439         my @ids = keys(%$alignments);
  440         $length = length( $alignments->{ $ids[0] } );
  441     }
  442 
  443     my $start = 0;
  444 
  445     # find the start of the complete alignment
  446     for my $location ( 0 .. $length )
  447     {
  448         # keeps track of whether all the sequences
  449         # have a base at this position
  450         my $all = 1;
  451 
  452         while ( my ($id, $align) = each(%$alignments) )
  453         {
  454             $all = 0 if ( substr( $align, $location, 1 ) eq "-" );
  455         }
  456 
  457         if( $all )
  458         {
  459             $start = $location;
  460             last;
  461         }
  462     }
  463 
  464     return $start+1;
  465 }
  466 
  467 # figures out the place in the alignment where the
  468 # shortest sequence ends
  469 sub align_end {
  470 
  471     my ($alignments, $length) = shift( @_ );
  472 
  473     if( !$length )
  474     {
  475         my @ids = keys(%$alignments);
  476         $length = length( $alignments->{ $ids[0] } );
  477     }
  478 
  479     my $offset = 0;
  480 
  481     # find the end of the complete alignment
  482     for my $location ( 1 .. $length )
  483     {
  484         # keeps track of whether all the sequences
  485         # have a base at this position
  486         my $all = 1;
  487 
  488         while( my ($id, $align) = each(%$alignments) )
  489         {
  490             $all = 0 if( substr( $align, -$location, 1 ) eq "-" );
  491             #print( $location . ":\t" . substr( $align, -$location, 1 ) . "\n" );
  492         }
  493 
  494         #print( "=====\n" );
  495 
  496         if( $all )
  497         {
  498             $offset = $location;
  499             last;
  500         }
  501     }
  502 
  503     my $end = $length - $offset + 1;
  504 
  505     #print( "END: $end\n" );
  506 
  507     return $end;
  508 }
  509 
  510 # will take in a ID->alignment hashreference and return
  511 # a hash where the alignments have been trimmed so that
  512 # the trailing beginnings and ends are gone
  513 sub align_trim {
  514 
  515     my $alignments = shift( @_ );
  516     my ( $length, $start, $end ) = shift( @_ );
  517 
  518     if( !$length )
  519     {
  520         my @ids = keys(%$alignments);
  521         $length = length( $alignments->{ $ids[0] } );
  522     }
  523 
  524     if( !$start )
  525     {
  526         # the number of positions from the beginning
  527         $start = &align_start( $alignments );
  528     }
  529 
  530     if( !$end )
  531     {
  532         # the number of positions from the end
  533         $end = &align_end( $alignments );
  534     }
  535 
  536     my %trimmed;
  537 
  538     #print "Length: $length ($ids[0], $has)\n";
  539 
  540     # trim all sequences
  541     while( my ($id, $align) = each( %$alignments))
  542     {
  543         $trimmed{$id} = substr( $align, $start-1, $end - $start + 1 );
  544     }
  545 
  546     return \%trimmed;
  547 
  548 
  549     #things stay the same
  550     if( $start == 0 && $end == $length )
  551     {
  552         # trim all sequences
  553         while( my ($id, $align) = each( %$alignments))
  554         {
  555             $trimmed{$id} = $alignments->{$id};
  556         }
  557     }
  558     #no end for substring
  559     elsif( $end == $length )
  560     {
  561         # trim all sequences
  562         while( my ($id, $align) = each( %$alignments))
  563         {
  564             $trimmed{$id} = substr( $align, $start );
  565         }
  566     }
  567     else
  568     {
  569         # trim all sequences
  570         while( my ($id, $align) = each( %$alignments))
  571         {
  572             $trimmed{$id} = substr( $align, $start-1, -($end-1) );
  573         }
  574     }
  575 
  576     return \%trimmed;
  577 }
  578 
  579 # will take in a hash of alignments and a substitution matrix
  580 # and return a hash containing the evaluation of the alignment
  581 # right now only does two alignments at once!
  582 # NOTE: THESE SHOULD BE TRIMMED ALIGNMENTS
  583 sub evaluate_alignment
  584 {
  585     # hash of alignments by ID->alignment
  586   my $alignments = shift( @_ );
  587     # substitution matrix hash for scoring
  588     # if not present, no score will be present
  589     my $matrix = shift( @_ );
  590     my $gap_penalty = shift( @_ );
  591     my $extend_penalty = shift( @_ );
  592     my @ids = @_;
  593 
  594     my $id1 = $ids[0];
  595     my $id2 = $ids[1];
  596 
  597     #print "Evaluating $id1 vs $id2\n";
  598 
  599     my $align1 = $alignments->{$ids[0]};
  600     my $align2 = $alignments->{$ids[1]};
  601 
  602     #print "seq1: $seq1\nseq2: $seq2\n";
  603 
  604     my $start = 1;
  605   my $end = length($align1);
  606 
  607     #print "Start: $start\nEnd: $end\n";
  608 
  609   my $in_gap = 0;
  610   my $gap_count = 0;
  611   my $num_gaps = 0;
  612   my $num_gap_pos = 0;
  613   my $diff_positions = 0;
  614   my $total_positions = 0;
  615     my $similarity_positions = 0;
  616     my $match_positions = 0;
  617     my $score = 0;
  618 
  619   my %transitions;
  620 
  621   #print "Adding $id1 versus $id2 to family matrix\n";
  622 
  623   for my $location ( $start .. $end )
  624   {
  625     $location--;
  626 
  627     my $aa1 = substr( $align1, $location, 1 );
  628     my $aa2 = substr( $align2, $location, 1 );
  629 
  630         #print "$aa1 vs $aa2: ";
  631 
  632     if( $aa1 eq "-" || $aa2 eq "-" )
  633     {
  634       $in_gap = 1;
  635       $gap_count++;
  636             #print "\n";
  637     }
  638     else
  639     {
  640       if( $in_gap )
  641       {
  642                 $score -= $gap_penalty + $extend_penalty * $gap_count - $extend_penalty;
  643         $num_gap_pos += $gap_count;
  644         $num_gaps++;
  645         $gap_count = 0;
  646         $in_gap = 0;
  647                 #print "GAPS = $num_gaps\n";
  648                 #print "GAP COUNT = $num_gap_pos\n"
  649       }
  650 
  651       if( $aa1 le $aa2 )
  652       {
  653         $transitions{$aa1}{$aa2}++;
  654       }
  655       else
  656       {
  657         $transitions{$aa2}{$aa1}++;
  658       }
  659 
  660             if( !defined($matrix->{$aa1}{$aa2}) )
  661             {
  662                 die "ERROR TRYING TO ACCESS SUBSTITUTION MATRIX AT $aa1:$aa2\n";
  663             }
  664             else
  665             {
  666                 #print "SCORE $score + $matrix->{$aa1}{$aa2} ($aa1:$aa2)\n";
  667 
  668                 #print "$matrix->{$aa1}{$aa2}\n";
  669 
  670                 $score += $matrix->{$aa1}{$aa2};
  671 
  672                 if( $matrix->{$aa1}{$aa2} > 0 )
  673                 {
  674                     #print "\tSIMILARITY POSITION: $aa2 $aa2 = $matrix->{$aa1}{$aa2}\n";
  675                     $similarity_positions++;
  676                 }
  677       }
  678 
  679       if( $aa1 ne $aa2 )
  680       {
  681         $diff_positions++;
  682       }
  683             else
  684             {
  685                 $match_positions++;
  686             }
  687 
  688       $total_positions++;
  689     }
  690   }
  691 
  692   my $total_length = $total_positions + $num_gap_pos;
  693   #my $percent_gaps = $num_gap_pos / $total_length;
  694   my $percent_identity = eval{ $match_positions / $total_length };
  695 
  696     $percent_identity = 0 if( !$percent_identity );
  697     #my $percent_similarity = $similarity_positions / $total_length;
  698 
  699     my %fields = (
  700         "id1" => $id1,
  701         "id2" => $id2,
  702         "length" => $total_length,
  703         "score" => $score,
  704         "pi" => $percent_identity,
  705         "ident_positions" => $match_positions,
  706         "similar_positions" => $similarity_positions,
  707         "num_gaps" => $num_gaps,
  708         "num_gap_positions" => $num_gap_pos,
  709         "align1" => $align1,
  710         "align2" => $align2
  711     );
  712 
  713     return \%fields;
  714 }
  715 
  716 # Given a sequence and the its start and ending sections
  717 # reconstruct the squence as it appeared in its alignment
  718 sub align_construct {
  719 
  720     my ( $sequence, $sections ) = @_;
  721     my @sections = split( ";", $sections );
  722     my $alignment = "";
  723 
  724     my $last_end = 0;
  725 
  726     foreach my $segment ( @sections )
  727     {
  728         # start,end:length
  729         $segment =~ /(\d+),(\d+):(\d+)/;
  730         my $seq_start = $1;
  731         my $align_start = $2;
  732         my $length = $3;
  733 
  734         if( $last_end )
  735         {
  736             $alignment .= "-" x ($align_start - $last_end);
  737         }
  738 
  739         #print( "SEQ: $sequence\n\tSeq start: $seq_start, Align start: $align_start, Length: $length\n" );
  740 
  741         $alignment .= substr( $sequence, $seq_start-1, $length );
  742         $last_end = $align_start + $length;
  743     }
  744 
  745     return $alignment;
  746 }
  747 
  748 # compresses an alignment sequence based on the given start and end
  749 sub align_compress {
  750 
  751     my ( $alignment, $start, $end ) = @_;
  752 
  753     my @sections;
  754 
  755     my $position = 0;
  756 
  757     my $segment = 0;
  758     my $segment_start = 0;
  759     my $align_start = 0;
  760 
  761     for my $location ( 1 .. length( $alignment ) )
  762     {
  763         my $aa = substr( $alignment, $location-1, 1 );
  764         $position++ if( $aa ne "-" );
  765 
  766         next if( $location < $start || $location > $end );
  767 
  768         if( $aa eq "-" )
  769         {
  770             if( $segment )
  771             {
  772                 push( @sections, "$segment_start,$align_start:$segment" );
  773                 $segment = 0;
  774                 $segment_start = 0;
  775                 $align_start = 0;
  776             }
  777 
  778             #print( "$aa\tGAP\tS_POS: $position\tA_POS: $location\tSEGMENT: $segment\tS_START: $segment_start\tA_START: $align_start\n" );
  779         }
  780         else
  781         {
  782             $segment++;
  783 
  784             if( $segment_start == 0 )
  785             {
  786                 $align_start = $location;
  787                 $segment_start = $position;
  788             }
  789 
  790             #print( "$aa\tAA\tS_POS: $position\tA_POS: $location\tSEGMENT: $segment\tS_START: $segment_start\tA_START: $align_start\n" );
  791         }
  792     }
  793 
  794     if( $segment )
  795     {
  796         push( @sections, "$segment_start,$align_start:$segment" );
  797     }
  798 
  799     return join(";", @sections );
  800 }
  801 
  802 # loads FASTA sequences from a file
  803 sub load_fasta_seqs {
  804     my( $file ) = @_;
  805 
  806     my %seqs;
  807     my $seqio_object = Bio::SeqIO->new(-file => "$file");
  808 
  809     while( my $seq_obj = $seqio_object->next_seq ) {
  810         if( !defined($seq_obj)) {
  811             return \%seqs;
  812         }
  813         else {
  814             $seqs{$seq_obj->id()} = $seq_obj->seq();
  815         }
  816     }
  817 
  818     return \%seqs;
  819 }
  820 
  821 ################################################################################
  822 #  MATRIX I/O
  823 ################################################################################
  824 
  825 # takes in a filename
  826 # returns a hash reference to a matrix
  827 sub read_linear_matrix {
  828     my( $filename ) = @_;
  829     my $matrix = &init_matrix();
  830 
  831     if( ! open( MATRIX, "$filename" ) )
  832     {
  833         print STDERR "read_linear_matrix could not open \"$filename\" for reading!\n";
  834         return 0;
  835     }
  836 
  837     while( my $line = <MATRIX> )
  838     {
  839         chomp( $line );
  840         my @row = split( /\t/, $line );
  841         my ($aa1, $aa2, $value ) = @row;
  842         $matrix->{$aa1}{$aa2} = $value;
  843     }
  844 
  845     return $matrix;
  846 }
  847 
  848 # read in a matrix in linear format, use the value in the given column
  849 # FROM the amino acids, starting at 0
  850 sub read_linear_matrix_col {
  851     my( $filename, $col ) = @_;
  852     my $matrix = &init_matrix();
  853 
  854     if( ! open( MATRIX, "$filename" ))
  855     {
  856         print STDERR "read_linear_matrix could not open \"$filename\" for reading!\n";
  857         return 0;
  858     }
  859 
  860     while( my $line = <MATRIX> )
  861     {
  862         chomp( $line );
  863         my @row = split( /\t/, $line );
  864         my $aa1 = shift( @row );
  865         my $aa2 = shift( @row );
  866         $matrix->{$aa1}{$aa2} = $row[$col];
  867     }
  868 
  869     return $matrix;
  870 }
  871 
  872 #takes in a filename
  873 #returns a hash reference of a matrix
  874 sub read_square_matrix {
  875     my( $filename ) = @_;
  876     my $matrix = &init_matrix( );
  877 
  878     if( ! open( MATRIX, "$filename" ) )
  879     {
  880         print STDERR "read_square_matrix could not open \"$filename\" for reading!\n";
  881         return 0;
  882     }
  883 
  884     my $line = <MATRIX>;
  885     chomp( $line );
  886     my @columns = split( /\t/, $line );
  887     #get rid of AAS field
  888     shift( @columns );
  889 
  890     while( $line = <MATRIX> )
  891     {
  892         chomp( $line );
  893         my @row = split( /\t/, $line );
  894         my $row_aa = shift( @row );
  895         foreach my $col ( 0 .. $#row )
  896         {
  897             $matrix->{$row_aa}{$columns[$col]} = $row[$col];
  898             print "$row_aa\t$columns[$col]\t$matrix->{$row_aa}{$columns[$col]}\n";
  899         }
  900     }
  901 
  902     print "RETURNING SQUARE MATRIX:\n";
  903     &print_square_matrix( $matrix, @aas );
  904 
  905     return $matrix;
  906 }
  907 
  908 #takes in a hash reference and a filename
  909 sub write_linear_matrix {
  910     my( $matrix, $filename ) = @_;
  911 
  912     if( ! open( MATRIX, "> $filename" ) )
  913     {
  914         print STDERR "write_linear_matrix could not open \"$filename\" for writing!\n";
  915         return 0;
  916     }
  917 
  918     foreach my $aa1 ( @sorted_aas )
  919     {
  920         foreach my $aa2 ( @sorted_aas )
  921         {
  922             if( $aa2 ge $aa1 || $matrix->{$aa1}{$aa2} )
  923             {
  924                 print MATRIX "$aa1\t$aa2\t".$matrix->{$aa1}{$aa2}."\n";
  925             }
  926         }
  927     }
  928 
  929     close( MATRIX );
  930     return 1;
  931 }
  932 
  933 #takes in a hash reference and a filename
  934 sub write_square_matrix {
  935     my $matrix = shift( @_ );
  936     my $filename = shift( @_ );
  937     my @order = @_;
  938 
  939     if( !@order )
  940     {
  941         print STDERR "WARNING: NO ORDER GIVEN TO WRITE_SQUARE_MATRIX!\n";
  942     }
  943 
  944     if( ! open( MATRIX, "> $filename" ) )
  945     {
  946         print STDERR "write_square_matrix could not open \"$filename\" for writing!\n";
  947         return 0;
  948     }
  949 
  950     &out_square_matrix( $matrix, *MATRIX, @order );
  951 
  952     close( MATRIX );
  953 }
  954 
  955 # takes in a hash reference and a filename
  956 # writes an EMBOSS formatted matrix to file
  957 sub write_emboss_matrix {
  958     my $matrix = shift( @_ );
  959     my $filename = shift( @_ );
  960     my @order = @emboss_aas;
  961 
  962     if( ! open( MATRIX, "> $filename" ) )
  963     {
  964         print STDERR "write_emboss_matrix could not open \"$filename\" for writing!\n";
  965         return 0;
  966     }
  967 
  968     &out_emboss_matrix( $matrix, *MATRIX, @order );
  969 
  970     close( MATRIX );
  971 }
  972 
  973 # prints an emboss matrix to screen
  974 sub print_emboss_matrix {
  975     my $matrix = shift( @_ );
  976     my @order = @emboss_aas;
  977 
  978     &out_emboss_matrix( $matrix, *STDOUT, @order );
  979 }
  980 
  981 # used by print_ and write_emboss_matrix to output an EMBOSS matrix
  982 sub out_emboss_matrix
  983 {
  984     my $matrix = shift( @_ );
  985     local *FH = shift( @_ );
  986     my @order = @_;
  987 
  988     # add all of the elements in an EMBOSS matrix we usually ignore
  989     # and use the matrix's minimum for their likelihood
  990     #push( @order, "*" );
  991     #my $min = &matrix_min( $matrix );
  992     #my $max = &matrix_max( $matrix );
  993 
  994     my $line = join( "  ", "", @order );
  995     print FH "  " . $line . "\n";
  996 
  997     foreach my $aa1 ( @order )
  998     {
  999         my @line;
 1000 
 1001         #row heading
 1002         push( @line, $aa1 );
 1003 
 1004         foreach my $aa2 ( @order )
 1005         {
 1006             # if( $aa1 eq "*" || $aa2 eq "*" ||
 1007                     # $aa1 eq "B" || $aa2 eq "B" ||
 1008                     # $aa1 eq "Z" || $aa2 eq "Z" ||
 1009                     # $aa1 eq "X" || $aa2 eq "X" )
 1010             # {
 1011                 # if( exists($matrix->{$aa1}{$aa2}))
 1012                 # {
 1013                     # #push the value onto the line
 1014                     # push( @line, $matrix->{$aa1}{$aa2} );
 1015                 # }
 1016                 # elsif( $aa1 eq $aa2 && $aa1 eq    "*" )
 1017                 # {
 1018                     # #EBLOSUM62 does this, so we will copy it
 1019                     # push( @line, 1 );
 1020                 # }
 1021                 # else
 1022                 # {
 1023                     # push( @line, $min );
 1024                 # }
 1025             # }
 1026             # else
 1027             # {
 1028                 #push the value onto the line
 1029                 if( exists($matrix->{$aa1}{$aa2}) )
 1030                 {
 1031                     push( @line, $matrix->{$aa1}{$aa2} );
 1032                 }
 1033                 else
 1034                 {
 1035                     push( @line, "x" );
 1036                 }
 1037             #}
 1038             #print "$aa1:$aa2 = $matrix->{$aa1}{$aa2}\n";
 1039         }
 1040 
 1041         #space delineated
 1042         my $line = join( " ", @line );
 1043         print FH $line . "\n";
 1044     }
 1045 }
 1046 
 1047 # loads an emboss matrix from the given filename
 1048 # returns the hash reference for the matrix
 1049 sub read_emboss_matrix
 1050 {
 1051     my( $filename ) = @_;
 1052     my $matrix = &init_matrix( );
 1053 
 1054     if( ! open( MATRIX, "$filename" ) )
 1055     {
 1056         print STDERR "read_emboss_matrix could not open \"$filename\" for reading!\n";
 1057         return 0;
 1058     }
 1059 
 1060     my $read_cols = 0;
 1061     my @columns;
 1062 
 1063     while( my $line = <MATRIX> )
 1064     {
 1065         chomp( $line );
 1066 
 1067         #discard comments
 1068         next if( substr( $line, 0, 1 ) eq "#" );
 1069 
 1070         #print $line . "\n";
 1071 
 1072         #get column headers
 1073         if( $read_cols == 0 )
 1074         {
 1075             chomp( $line );
 1076             @columns = split( /\s+/, $line );
 1077             #get rid of blank field
 1078             shift( @columns );
 1079             #print "COLUMNS: " . @columns . "\n";
 1080             $read_cols = 1;
 1081             next;
 1082         }
 1083 
 1084         my @row = split( /\s+/, $line );
 1085         my $row_aa = shift( @row );
 1086 
 1087         foreach my $col ( 0 .. $#row )
 1088         {
 1089             $matrix->{$row_aa}{$columns[$col]} = $row[$col];
 1090             #print "R:$row_aa\tC:$columns[$col]\t$matrix->{$row_aa}{$columns[$col]}\n";
 1091         }
 1092     }
 1093 
 1094     #print "RETURNING SQUARE EMBOSS MATRIX:\n";
 1095     #&print_square_matrix( $matrix, @aas );
 1096 
 1097     return $matrix;
 1098 }
 1099 
 1100 sub print_square_matrix {
 1101     my $matrix = shift( @_ );
 1102     my @order = @_;
 1103 
 1104     if( !@order )
 1105     {
 1106         print STDERR "WARNING: NO ORDER GIVEN TO PRINT_SQUARE_MATRIX!\n";
 1107     }
 1108 
 1109     &out_square_matrix( $matrix, *STDOUT, @order );
 1110 }
 1111 
 1112 sub out_square_matrix {
 1113     my $matrix = shift( @_ );
 1114     local *FH = shift( @_ );
 1115     my @order = @_;
 1116 
 1117     my $line = join( "\t", "AAS", @order );
 1118     print FH $line . "\n";
 1119 
 1120     foreach my $aa1 ( @order )
 1121     {
 1122         my @line;
 1123 
 1124         #row heading
 1125         push( @line, $aa1 );
 1126 
 1127         foreach my $aa2 ( @order )
 1128         {
 1129             #push the value onto the line
 1130             push( @line, $matrix->{$aa1}{$aa2} );
 1131             #print "$aa1:$aa2 = $matrix->{$aa1}{$aa2}\n";
 1132         }
 1133 
 1134         #tab delineated
 1135         $line = join( "\t", @line );
 1136         print FH $line . "\n";
 1137     }
 1138 }
 1139 
 1140 sub write_sorted_square_matrix {
 1141 
 1142     my( $matrix, $filename ) = @_;
 1143 
 1144     if( ! open( MATRIX, "> $filename" ) )
 1145     {
 1146         print STDERR "write_square_matrix could not open \"$filename\" for writing!\n";
 1147         return 0;
 1148     }
 1149 
 1150     @aas = sort( @aas );
 1151 
 1152     #not using sorted--want the standard order of amino acids
 1153     foreach my $aa1 ( @aas )
 1154     {
 1155         my @line;
 1156 
 1157         foreach my $aa2 ( @aas )
 1158         {
 1159             push( @line, $matrix->{$aa1}{$aa2} );
 1160         }
 1161 
 1162         unshift( @line, $aa1 );
 1163 
 1164         my $line = join( "\t", @line );
 1165         print MATRIX $line . "\n";
 1166     }
 1167 
 1168     close( MATRIX );
 1169 }
 1170 
 1171 # reads a list of amino acids and their values from a tab delineated file
 1172 sub read_aas {
 1173 
 1174     my $file = shift( @_ );
 1175     my $aas = &init_aas();
 1176 
 1177     open( AACOUNTS, $file ) || warn "COULD NOT OPEN FILE $file!";
 1178 
 1179     while( my $line = <AACOUNTS> )
 1180     {
 1181         my( $aa, $value ) = split( /\t/, $line );
 1182 
 1183         if( $value )
 1184         {
 1185             $aas->{$aa} += $value;
 1186         }
 1187     }
 1188 
 1189     close( AACOUNTS );
 1190 
 1191     return $aas;
 1192 }
 1193 
 1194 # initialize a hash of amino acid values
 1195 sub init_aas {
 1196 
 1197     my $aas = {};
 1198 
 1199     foreach my $aa ( @emboss_aas )
 1200     {
 1201         $aas->{$aa} = 0;
 1202     }
 1203 
 1204     return $aas;
 1205 }
 1206 
 1207 # output a hash of amino acid values according to the given order
 1208 sub out_aas {
 1209     my $aas = shift( @_ );
 1210     local *FH = shift( @_ );
 1211     my @order = @_;
 1212 
 1213     foreach my $aa ( @order )
 1214     {
 1215         print FH $aa . "\t" . $aas->{$aa} . "\n";
 1216     }
 1217 }
 1218 
 1219 # print amino acids and their values to the screen according
 1220 # to the given order
 1221 sub print_aas {
 1222     my $aas = shift( @_ );
 1223     my @order = @_;
 1224 
 1225     if( !@order )
 1226     {
 1227         print STDERR "WARNING: NO ORDER GIVEN TO PRINT_AAS!\n";
 1228     }
 1229 
 1230     &out_aas( $aas, *STDOUT, @order );
 1231 }
 1232 
 1233 # write amino acids and their values to the given filename
 1234 # depends on the aas listed in order
 1235 sub write_aas_limited {
 1236     my $aas = shift( @_ );
 1237     my $filename = shift( @_ );
 1238     my @order = @_;
 1239 
 1240     if( ! open( AAS, "> $filename" ) )
 1241     {
 1242         print STDERR "write_aas could not open \"$filename\" for writing!\n";
 1243         return 0;
 1244     }
 1245 
 1246     &out_aas( $aas, *AAS, @order );
 1247 
 1248     close( AAS );
 1249 }
 1250 
 1251 # writes the EMBOSS aas and their values to the given filename
 1252 sub write_aas {
 1253     my $aas = shift( @_ );
 1254     my $filename = shift( @_ );
 1255 
 1256     if( ! open( AAS, "> $filename" ) )
 1257     {
 1258         print STDERR "write_aas could not open \"$filename\" for writing!\n";
 1259         return 0;
 1260     }
 1261 
 1262     &out_aas( $aas, *AAS, @emboss_aas );
 1263 
 1264     close( AAS );
 1265 }
 1266 
 1267 ################################################################################
 1268 #  MATRIX MATH AND MANIPULATION
 1269 ################################################################################
 1270 
 1271 #takes in two hash references -- a - b = diff
 1272 #returns a hash reference
 1273 sub diff_matrix {
 1274     my( $matrix_a, $matrix_b ) = @_;
 1275     my $matrix_diff = &init_matrix();
 1276 
 1277     #only use actual aas
 1278     foreach my $aa1 ( @aas )
 1279     {
 1280         foreach my $aa2 ( @aas )
 1281         {
 1282             $matrix_diff->{$aa1}{$aa2} =
 1283                 $matrix_a->{$aa1}{$aa2} - $matrix_b->{$aa1}{$aa2};
 1284 
 1285             #print "diff $aa1:$aa2 = $matrix_diff->{$aa1}{$aa2} ($matrix_a->{$aa1}{$aa2} - $matrix_b->{$aa1}{$aa2})\n";
 1286         }
 1287     }
 1288 
 1289     #print "DIFF_MATRIX TEST: \n";
 1290     #&print_square_matrix( $matrix_diff, @aas );
 1291 
 1292     return $matrix_diff;
 1293 }
 1294 
 1295 # returns the absolute difference of values between
 1296 # matrix a and b
 1297 sub abs_diff_matrix {
 1298     my( $matrix_a, $matrix_b ) = @_;
 1299 
 1300     my $abs_diff = 0;
 1301 
 1302     # only use actual amino acids
 1303     foreach my $aa1 ( @aas ) {
 1304     foreach my $aa2 ( @aas ) {
 1305         next if( $aa2 lt $aa1 );
 1306 
 1307         $abs_diff += abs( $matrix_a->{$aa1}{$aa2} - $matrix_b->{$aa1}{$aa2} );
 1308     }}
 1309 
 1310     return $abs_diff;
 1311 }
 1312 
 1313 #takes in a hash ref, amino acid 1, amino acid 2, and numerical value
 1314 #sets top only by ALPHABETICAL ORDER
 1315 sub set_matrix_value {
 1316     my( $matrix, $aa1, $aa2, $value ) = @_;
 1317 
 1318     if( $aa1 gt $aa2 )
 1319     {
 1320         my $switch = $aa2;
 1321         $aa2 = $aa1;
 1322         $aa1 = $switch;
 1323     }
 1324 
 1325     $matrix->{$aa1}{$aa2} = $value;
 1326 }
 1327 
 1328 #takes in a hash ref, amino acid 1, amino acid 2
 1329 #returns numerical value
 1330 #from top only by ALPHABETICAL ORDER
 1331 sub get_matrix_value {
 1332     my( $matrix, $aa1, $aa2 ) = @_;
 1333 
 1334     if( $aa1 gt $aa2 )
 1335     {
 1336         my $switch = $aa2;
 1337         $aa2 = $aa1;
 1338         $aa1 = $switch;
 1339     }
 1340 
 1341     #print "Getting $matrix->{$aa1}{$aa2} from $aa1:$aa2\n";
 1342 
 1343     return $matrix->{$aa1}{$aa2};
 1344 }
 1345 
 1346 #initializes all amino acid pairs to 0 for all given hash refs
 1347 sub init_matrix {
 1348     my %matrix;
 1349 
 1350     foreach my $aa1 ( @emboss_aas )
 1351     {
 1352         foreach my $aa2 ( @emboss_aas )
 1353         {
 1354             $matrix{$aa1}{$aa2} = 0;
 1355         }
 1356     }
 1357 
 1358     return \%matrix;
 1359 }
 1360 
 1361 #copies the given matrix and returns a reference to the copy
 1362 sub copy_matrix {
 1363     my $orig = shift( @_ );
 1364     my $copy = &init_matrix();
 1365 
 1366     foreach my $aa1 ( @sorted_aas )
 1367     {
 1368         foreach my $aa2 ( @sorted_aas )
 1369         {
 1370             $copy->{$aa1}{$aa2} = $orig->{$aa1}{$aa2};
 1371         }
 1372     }
 1373 
 1374     return $copy;
 1375 }
 1376 
 1377 # makes a copy of a hash reference of aas
 1378 sub copy_aas {
 1379     my $orig = shift( @_ );
 1380     my $copy = &init_matrix();
 1381 
 1382     foreach my $aa ( keys(%$orig) )
 1383     {
 1384         $copy->{$aa} = $orig->{$aa};
 1385     }
 1386 
 1387     return $copy;
 1388 }
 1389 
 1390 # finds the minimum value in the matrix
 1391 sub matrix_min {
 1392 
 1393     my $matrix = shift( @_ );
 1394 
 1395     my $min = $matrix->{$aas[0]}{$aas[0]};
 1396 
 1397     foreach my $aa1 (  @emboss_aas )
 1398     {
 1399         foreach my $aa2 (  @emboss_aas )
 1400         {
 1401             if( $matrix->{$aa1}{$aa2} < $min )
 1402             {
 1403                 $min = $matrix->{$aa1}{$aa2};
 1404             }
 1405         }
 1406     }
 1407 
 1408     return $min;
 1409 }
 1410 
 1411 # finds the maximum value of the matrix
 1412 sub matrix_max {
 1413 
 1414     my $matrix = shift( @_ );
 1415 
 1416     my $max = $matrix->{$aas[0]}{$aas[0]};
 1417 
 1418     foreach my $aa1 (  @emboss_aas )
 1419     {
 1420         foreach my $aa2 (  @emboss_aas )
 1421         {
 1422             if( $matrix->{$aa1}{$aa2} > $max )
 1423             {
 1424                 $max = $matrix->{$aa1}{$aa2};
 1425             }
 1426         }
 1427     }
 1428 
 1429     return $max;
 1430 }
 1431 
 1432 # merges two matrices, taking the min value for each position
 1433 sub min_merge
 1434 {
 1435     my( $a, $b ) = @_;
 1436     my $c = &init_matrix();
 1437 
 1438     #print "MERGING MIN\n";
 1439 
 1440     foreach my $aa1 ( @emboss_aas )
 1441     {
 1442         foreach my $aa2 ( @emboss_aas )
 1443         {
 1444             if( $a->{$aa1}{$aa2} > $b->{$aa1}{$aa2} )
 1445             {
 1446                 #print "MIN:\t".$a->{$aa1}{$aa2}.">*".$b->{$aa1}{$aa2}."\n";
 1447                 $c->{$aa1}{$aa2} = $b->{$aa1}{$aa2};
 1448             }
 1449             else
 1450             {
 1451                 #print "MIN:\t".$a->{$aa1}{$aa2}."*<".$b->{$aa1}{$aa2}."\n";
 1452                 $c->{$aa1}{$aa2} = $a->{$aa1}{$aa2};
 1453             }
 1454         }
 1455     }
 1456 
 1457     return $c;
 1458 }
 1459 
 1460 # merges two matrices, taking the max value for each position
 1461 sub max_merge
 1462 {
 1463     my( $a, $b ) = @_;
 1464     my $c = &init_matrix();
 1465 
 1466     #print "MERGING MAX\n";
 1467 
 1468     foreach my $aa1 ( @emboss_aas )
 1469     {
 1470         foreach my $aa2 ( @emboss_aas )
 1471         {
 1472             if( $a->{$aa1}{$aa2} > $b->{$aa1}{$aa2} )
 1473             {
 1474                 #print "MAX:\t".$a->{$aa1}{$aa2}."*>".$b->{$aa1}{$aa2}."\n";
 1475                 $c->{$aa1}{$aa2} = $a->{$aa1}{$aa2};
 1476             }
 1477             else
 1478             {
 1479                 #print "MAX:\t".$a->{$aa1}{$aa2}."<*".$b->{$aa1}{$aa2}."\n";
 1480                 $c->{$aa1}{$aa2} = $b->{$aa1}{$aa2};
 1481             }
 1482         }
 1483     }
 1484 
 1485     return $c;
 1486 }
 1487 
 1488 # finds the total of all the values in the matrix
 1489 sub matrix_all_total {
 1490 
 1491     my $matrix = shift( @_ );
 1492     my $total = 0;
 1493 
 1494     foreach my $aa1 ( @aas )
 1495     {
 1496         foreach my $aa2 ( @aas )
 1497         {
 1498                 $total += $matrix->{$aa1}{$aa2};
 1499         }
 1500     }
 1501 
 1502     return $total;
 1503 }
 1504 
 1505 # finds the total of all the values in the upper diagonal
 1506 # of the matrix
 1507 sub matrix_total {
 1508 
 1509     my $matrix = shift( @_ );
 1510     my $total = 0;
 1511 
 1512     foreach my $aa1 ( @aas )
 1513     {
 1514         foreach my $aa2 ( @aas )
 1515         {
 1516             next if( $aa1 gt $aa2 );
 1517 
 1518             if( $aa1 le $aa2 )
 1519             {
 1520                 $total += $matrix->{$aa1}{$aa2};
 1521             }
 1522         }
 1523     }
 1524 
 1525     return $total;
 1526 }
 1527 
 1528 # finds the total of the absolute values in the matrix summed,
 1529 # upper diagonal values only
 1530 sub matrix_abs_total {
 1531 
 1532     my $matrix = shift( @_ );
 1533     my $total = 0;
 1534 
 1535     foreach my $aa1 ( @aas )
 1536     {
 1537         foreach my $aa2 ( @aas )
 1538         {
 1539             next if( $aa1 gt $aa2 );
 1540 
 1541             if( $aa1 le $aa2 )
 1542             {
 1543                 $total += abs($matrix->{$aa1}{$aa2});
 1544             }
 1545         }
 1546     }
 1547 
 1548     return $total;
 1549 }
 1550 
 1551 # finds the total of the values on the diagonal of the matrix
 1552 sub diag_total {
 1553 
 1554     my $matrix = shift( @_ );
 1555     my $total = 0;
 1556 
 1557     foreach my $aa ( @aas )
 1558     {
 1559         $total += $matrix->{$aa}{$aa};
 1560     }
 1561 
 1562     return $total;
 1563 }
 1564 
 1565 # Finds the total of the matrix and divides each value
 1566 # by that total so the matrix will sum to 1
 1567 sub normalize_matrix
 1568 {
 1569     my( $matrix ) = @_;
 1570     my $matrix_n = &init_matrix();
 1571 
 1572     my $total = &matrix_total( $matrix );
 1573 
 1574     foreach my $aa1 ( @emboss_aas )
 1575     {
 1576         foreach my $aa2 ( @emboss_aas )
 1577         {
 1578             if( $aa1 le $aa2 )
 1579             {
 1580                 $matrix_n->{$aa1}{$aa2} = $matrix->{$aa1}{$aa2} / $total;
 1581             }
 1582         }
 1583     }
 1584 
 1585     return $matrix_n;
 1586 }
 1587 
 1588 # sums all the values of a list of aas
 1589 sub total_aas
 1590 {
 1591     my( $aas ) = @_;
 1592     my $total = 0;
 1593 
 1594     foreach my $aa (@emboss_aas )
 1595     {
 1596         $total += $aas->{$aa};
 1597     }
 1598 
 1599     return $total;
 1600 }
 1601 
 1602 # finds the log odds without rounding
 1603 sub raw_log_odds
 1604 {
 1605     my( $transitions, $aas ) = @_;
 1606     my $log_odds_matrix = &init_matrix();
 1607     my $C = 2;
 1608 
 1609     #initialize to impossible value
 1610     my $lowest_nonzero = 10000000;
 1611 
 1612     #find the lowest
 1613     foreach my $aa1 ( @emboss_aas )
 1614     {
 1615         foreach my $aa2 ( @emboss_aas )
 1616         {
 1617             $lowest_nonzero = $transitions->{$aa1}{$aa2}
 1618                 if( $transitions->{$aa1}{$aa2} < $lowest_nonzero
 1619                         && $transitions->{$aa1}{$aa2} != 0 );
 1620         }
 1621     }
 1622 
 1623     $lowest_nonzero /= 2;
 1624 
 1625     #all 0s = lowest/2
 1626     foreach my $aa1 ( @emboss_aas )
 1627     {
 1628         foreach my $aa2 ( @emboss_aas )
 1629         {
 1630             if( $aa1 gt $aa2 )
 1631             {
 1632                 next;
 1633             }
 1634 
 1635             if( $transitions->{$aa1}{$aa2} == 0 )
 1636             {
 1637                 $transitions->{$aa1}{$aa2} = $lowest_nonzero;
 1638             }
 1639         }
 1640     }
 1641 
 1642     #initialize to impossible value
 1643     $lowest_nonzero = 1000000;
 1644 
 1645     #find the lowest
 1646     foreach my $aa1 ( @emboss_aas )
 1647     {
 1648         $lowest_nonzero = $aas->{$aa1}
 1649             if( $aas->{$aa1} < $lowest_nonzero
 1650                         && $aas->{$aa1} != 0 );
 1651     }
 1652 
 1653     $lowest_nonzero /= 2;
 1654 
 1655     #all 0s = lowest/2
 1656     foreach my $aa1 ( @emboss_aas )
 1657     {
 1658         if( $aas->{$aa1} == 0 )
 1659         {
 1660             $aas->{$aa1} = $lowest_nonzero;
 1661         }
 1662     }
 1663 
 1664     foreach my $aa1 ( @emboss_aas )
 1665     {
 1666         foreach my $aa2 ( @emboss_aas )
 1667         {
 1668             if( $aa1 gt $aa2 )
 1669             {
 1670                 next;
 1671             }
 1672 
 1673             #if there's a 0 we stay at 0
 1674             die "ERROR: $aa1-$aa2 NORMALIZED AT 0 AND WASN'T SET" if( $transitions->{$aa1}{$aa2} == 0 );
 1675 
 1676             my $random_prob = $aas->{$aa1} * $aas->{$aa2};
 1677 
 1678             if( !$random_prob )
 1679             {
 1680                 print "Strange: $aa1 versus $aa2 has normal of ".$transitions->{$aa1}{$aa2}." but no random prob\n";
 1681                 next;
 1682             }
 1683 
 1684             #actual over expected probabilities of matching up
 1685             my $p = $transitions->{$aa1}{$aa2} / ( $random_prob );
 1686             my $log_odds = $C * logn( $p, 2 );
 1687             $log_odds_matrix->{$aa1}{$aa2} = $log_odds;
 1688             #print "$aa1 versus $aa2 ( $aas->{$aa1}, $aas->{$aa2}, $transitions->{$aa1}{$aa2} ) log odds: ".$log_odds_matrix->{$aa1}{$aa2}." ( $log_odds )\n";
 1689         }
 1690     }
 1691 
 1692     &mirror_matrix( $log_odds_matrix, @emboss_aas );
 1693 
 1694     return $log_odds_matrix;
 1695 }
 1696 
 1697 # log odds rounded to the nearest integer
 1698 sub log_odds
 1699 {
 1700     my( $transitions, $aas ) = @_;
 1701     my $log_odds_matrix = &init_matrix();
 1702     my $C = 2;
 1703 
 1704     #initialize to impossible value
 1705     my $lowest_nonzero = 10000000;
 1706 
 1707     #find the lowest
 1708     foreach my $aa1 ( @emboss_aas )
 1709     {
 1710         foreach my $aa2 ( @emboss_aas )
 1711         {
 1712             $lowest_nonzero = $transitions->{$aa1}{$aa2}
 1713                 if( $transitions->{$aa1}{$aa2} < $lowest_nonzero
 1714                         && $transitions->{$aa1}{$aa2} != 0 );
 1715         }
 1716     }
 1717 
 1718     $lowest_nonzero /= 2;
 1719 
 1720     #all 0s = lowest/2
 1721     foreach my $aa1 ( @emboss_aas )
 1722     {
 1723         foreach my $aa2 ( @emboss_aas )
 1724         {
 1725             if( $aa1 gt $aa2 )
 1726             {
 1727                 next;
 1728             }
 1729 
 1730             if( $transitions->{$aa1}{$aa2} == 0 )
 1731             {
 1732                 $transitions->{$aa1}{$aa2} = $lowest_nonzero;
 1733             }
 1734         }
 1735     }
 1736 
 1737     #initialize to impossible value
 1738     $lowest_nonzero = 1000000;
 1739 
 1740     #find the lowest
 1741     foreach my $aa1 ( @emboss_aas )
 1742     {
 1743         $lowest_nonzero = $aas->{$aa1}
 1744             if( $aas->{$aa1} < $lowest_nonzero
 1745                         && $aas->{$aa1} != 0 );
 1746     }
 1747 
 1748     $lowest_nonzero /= 2;
 1749 
 1750     #all 0s = lowest/2
 1751     foreach my $aa1 ( @emboss_aas )
 1752     {
 1753         if( $aas->{$aa1} == 0 )
 1754         {
 1755             $aas->{$aa1} = $lowest_nonzero;
 1756         }
 1757     }
 1758 
 1759     foreach my $aa1 ( @emboss_aas )
 1760     {
 1761         foreach my $aa2 ( @emboss_aas )
 1762         {
 1763             if( $aa1 gt $aa2 )
 1764             {
 1765                 next;
 1766             }
 1767 
 1768             #if there's a 0 we stay at 0
 1769             die "ERROR: $aa1-$aa2 NORMALIZED AT 0 AND WASN'T SET" if( $transitions->{$aa1}{$aa2} == 0 );
 1770 
 1771             my $random_prob = $aas->{$aa1} * $aas->{$aa2};
 1772 
 1773             if( !$random_prob )
 1774             {
 1775                 print "Strange: $aa1 versus $aa2 has normal of ".$transitions->{$aa1}{$aa2}." but no random prob\n";
 1776                 next;
 1777             }
 1778 
 1779             #actual over expected probabilities of matching up
 1780             my $p = $transitions->{$aa1}{$aa2} / ( $random_prob );
 1781             my $log_odds = $C * logn( $p, 2 );
 1782             $log_odds_matrix->{$aa1}{$aa2} = &round($log_odds);
 1783             #print "$aa1 versus $aa2 ( $aas->{$aa1}, $aas->{$aa2}, $transitions->{$aa1}{$aa2} ) log odds: ".$log_odds_matrix->{$aa1}{$aa2}." ( $log_odds )\n";
 1784         }
 1785     }
 1786 
 1787     &mirror_matrix( $log_odds_matrix, @emboss_aas );
 1788 
 1789     return $log_odds_matrix;
 1790 }
 1791 
 1792 # divides all aa values by the total of all values
 1793 # so they sum to 1
 1794 sub normalize_aas
 1795 {
 1796     my( $aas ) = @_;
 1797     my %norm;
 1798 
 1799     my $total = &total_aas( $aas );
 1800 
 1801     foreach my $aa (@emboss_aas )
 1802     {
 1803         $norm{$aa} = $aas->{$aa} / $total;
 1804     }
 1805 
 1806     return \%norm;
 1807 }
 1808 
 1809 # calculates the amino acid frequencies in a matrix
 1810 sub calc_aa_freq {
 1811     my $matrix = shift(@_);
 1812     &mirror_matrix($matrix);
 1813     &non_diagonal_half($matrix);
 1814     my $aa_freq = &init_aas();
 1815     my $matrix_total = &matrix_all_total($matrix);
 1816 
 1817     for my $aa (@aas)
 1818     {
 1819         $aa_freq->{$aa} = col_sum($matrix, $aa) / $matrix_total;
 1820     }
 1821 
 1822     return $aa_freq;
 1823 }
 1824 
 1825 # sum two sets of corresponding aa values into another list of aa values
 1826 sub sum_aas
 1827 {
 1828     my( $aas_a, $aas_b ) = @_;
 1829     my $c = &init_aas();
 1830 
 1831     foreach my $aa ( @emboss_aas )
 1832     {
 1833         $c->{$aa} = $aas_a->{$aa} + $aas_b->{$aa};
 1834     }
 1835 
 1836     return $c;
 1837 }
 1838 
 1839 # sum a row on a matrix
 1840 sub row_sum
 1841 {
 1842     my( $matrix, $aa1 ) = @_;
 1843     my $total = 0;
 1844 
 1845     for my $aa2 (@aas)
 1846     {
 1847         $total += $matrix->{$aa1}{$aa2};
 1848     }
 1849 
 1850     return $total;
 1851 }
 1852 
 1853 # cuts all non diagonal matrix values in half
 1854 sub non_diagonal_half {
 1855     my( $matrix ) = @_;
 1856 
 1857     for my $aa1 (@aas) {
 1858         for my $aa2 (@aas) {
 1859             $matrix->{$aa1}{$aa2} /= 2 if( $aa1 ne $aa2 );
 1860         }
 1861     }
 1862 }
 1863 
 1864 # sums a column on a matrix
 1865 sub col_sum
 1866 {
 1867     my( $matrix, $aa2 ) = @_;
 1868     my $total = 0;
 1869 
 1870     #print "Col sum for $aa2: ";
 1871 
 1872     for my $aa1 (@aas)
 1873     {
 1874         #print "$aa1 ($matrix->{$aa1}{$aa2})\t";
 1875         $total += $matrix->{$aa1}{$aa2};
 1876     }
 1877 
 1878     #print "\n";
 1879 
 1880     return $total;
 1881 }
 1882 
 1883 # returns the difference of corresponding values on two aa lists
 1884 sub diff_aas
 1885 {
 1886     my( $aas_a, $aas_b ) = @_;
 1887     my $c = &init_aas();
 1888 
 1889     foreach my $aa ( @emboss_aas )
 1890     {
 1891         $c->{$aa} = $aas_a->{$aa} - $aas_b->{$aa};
 1892     }
 1893 
 1894     return $c;
 1895 }
 1896 
 1897 #takes in a hash reference and mirrors it based on the input of aa order
 1898 sub mirror_matrix {
 1899     my $matrix = shift( @_ );
 1900     my @order = @emboss_aas;
 1901 
 1902     if( !@order )
 1903     {
 1904         print STDERR "WARNING: NO ORDER GIVEN TO MIRROR_MATRIX!\n";
 1905     }
 1906 
 1907     foreach my $aa1 ( @order )
 1908     {
 1909         foreach my $aa2 ( @order )
 1910         {
 1911             if( $aa1 gt $aa2 )
 1912             {
 1913                 if( ! $matrix->{$aa1}{$aa2} )
 1914                 {
 1915                     $matrix->{$aa1}{$aa2} = $matrix->{$aa2}{$aa1};
 1916                 }
 1917                 elsif( ! $matrix->{$aa2}{$aa1} )
 1918                 {
 1919                     $matrix->{$aa2}{$aa1} = $matrix->{$aa1}{$aa2};
 1920                 }
 1921             }
 1922         }
 1923     }
 1924 }
 1925 
 1926 #removes the lower half of the matrix to make it a top triangular matrix
 1927 #requires an input of the order in which to cut!
 1928 sub top_matrix {
 1929     my $matrix = shift( @_ );
 1930     my @order = @_;
 1931 
 1932     if( !@order )
 1933     {
 1934         print STDERR "WARNING: NO ORDER GIVEN TO TOP_MATRIX!\n";
 1935     }
 1936 
 1937     foreach my $aa1 ( @order )
 1938     {
 1939         foreach my $aa2 ( @order )
 1940         {
 1941             if( $aa1 gt $aa2 )
 1942             {
 1943                 $matrix->{$aa1}{$aa2} = 0 ;
 1944             }
 1945         }
 1946     }
 1947 }
 1948 
 1949 #scales a matrix -- multiplies each member by the given scale
 1950 sub scale_matrix {
 1951     my $matrix = shift( @_ );
 1952     my $scale = shift( @_ );
 1953 
 1954     foreach my $aa1 ( @emboss_aas )
 1955     {
 1956         foreach my $aa2 ( @emboss_aas )
 1957         {
 1958             $matrix->{$aa1}{$aa2} = $matrix->{$aa1}{$aa2} * $scale;
 1959         }
 1960     }
 1961 }
 1962 
 1963 #scales aas -- multiplies each member by the given scale
 1964 sub scale_aas {
 1965     my $aas = shift( @_ );
 1966     my $scale = shift( @_ );
 1967 
 1968     foreach my $aa ( @emboss_aas )
 1969     {
 1970             $aas->{$aa} = $aas->{$aa} * $scale;
 1971     }
 1972 }
 1973 
 1974 #multiplies matrix a against matrix b with a given @order
 1975 sub mult_matrix {
 1976     my $a = shift( @_ );
 1977     my $b = shift( @_ );
 1978     my @order = @_;
 1979     my $c = &init_matrix();
 1980 
 1981     if( !@order )
 1982     {
 1983         print STDERR "WARNING: NO ORDER GIVEN TO MULT_MATRIX!\n";
 1984     }
 1985 
 1986     foreach my $i ( 0 .. $#order )
 1987     {
 1988         foreach my $j ( 0 .. $#order )
 1989         {
 1990             my $total = 0;
 1991 
 1992             foreach my $x ( 0 .. $#order )
 1993             {
 1994                 $total += $a->{$order[$i]}{$order[$x]} *
 1995                                     $b->{$order[$x]}{$order[$j]};
 1996             }
 1997 
 1998             $c->{$order[$i]}{$order[$j]} = $total;
 1999         }
 2000     }
 2001 
 2002     return $c;
 2003 }
 2004 
 2005 # adds two matrices together
 2006 sub sum_matrix {
 2007     my $a = shift( @_ );
 2008     my $b = shift( @_ );
 2009     my @order = @_;
 2010     my $c = &init_matrix();
 2011 
 2012     if( !@order )
 2013     {
 2014         print STDERR "WARNING: NO ORDER GIVEN TO SUM MATRIX!\n";
 2015     }
 2016 
 2017     foreach my $aa1 ( @order )
 2018     {
 2019         foreach my $aa2 ( @order )
 2020         {
 2021             $c->{$aa1}{$aa2} = $a->{$aa1}{$aa2} + $b->{$aa1}{$aa2} if( exists( $c->{$aa1}{$aa2}));
 2022         }
 2023     }
 2024 
 2025     return $c;
 2026 }
 2027 
 2028 # transposes the matrix -- ie Mab = Mba
 2029 sub transpose_matrix {
 2030     my $matrix = shift( @_ );
 2031     my $matrix_t = &init_matrix();
 2032 
 2033     foreach my $aa1 ( @sorted_aas )
 2034     {
 2035         foreach my $aa2 ( @sorted_aas )
 2036         {
 2037             $matrix_t->{$aa1}{$aa2} = $matrix->{$aa2}{$aa1};
 2038         }
 2039     }
 2040 
 2041     return $matrix_t;
 2042 }
 2043 
 2044 # creates an identity matrix -- all values 0 except on the diagonal
 2045 sub identity_matrix {
 2046     my $matrix = &init_matrix();
 2047 
 2048     foreach my $aa ( @sorted_aas )
 2049     {
 2050         #print "$aa:$aa = 1\n";
 2051         $matrix->{$aa}{$aa} = 1;
 2052     }
 2053 
 2054     return $matrix;
 2055 }
 2056 
 2057 # calculates the log odds of the matrix, also puts in values for special
 2058 # aas BXZ*; takes X values from the given xmatrix file
 2059 sub scale_log_odds {
 2060     my( $normalized_matrix, $aa_percent, $xmatrix_filename ) = @_;
 2061 
 2062     my $C = 2;
 2063 
 2064     #initialize to impossible value
 2065     my $lowest_nonzero = 1000000;
 2066 
 2067     #find the lowest
 2068     foreach my $aa1 ( @emboss_aas )
 2069     {
 2070         foreach my $aa2 ( @emboss_aas )
 2071         {
 2072             my $norm = $normalized_matrix->{$aa1}{$aa2};
 2073 
 2074             if( defined($norm) &&
 2075                     $norm < $lowest_nonzero
 2076                     && $norm > 0) {
 2077                         $lowest_nonzero = $norm;
 2078             }
 2079         }
 2080     }
 2081 
 2082     $lowest_nonzero /= 2;
 2083 
 2084     #all 0s = lowest/2
 2085     foreach my $aa1 ( @emboss_aas )
 2086     {
 2087         foreach my $aa2 ( @emboss_aas )
 2088         {
 2089             if( $aa1 gt $aa2 )
 2090             {
 2091                 next;
 2092             }
 2093 
 2094             if( $normalized_matrix->{$aa1}{$aa2} == 0 )
 2095             {
 2096                 $normalized_matrix->{$aa1}{$aa2} = $lowest_nonzero;
 2097             }
 2098         }
 2099     }
 2100 
 2101     #initialize to impossible value
 2102     $lowest_nonzero = 1000000;
 2103 
 2104     #find the lowest
 2105     foreach my $aa1 ( @emboss_aas )
 2106     {
 2107         $lowest_nonzero = $aa_percent->{$aa1}
 2108         if( exists($aa_percent->{$aa1}) && $aa_percent->{$aa1} < $lowest_nonzero
 2109                 && $aa_percent->{$aa1} != 0 );
 2110     }
 2111 
 2112     $lowest_nonzero /= 2;
 2113 
 2114     #all 0s = lowest/2
 2115     foreach my $aa1 ( @emboss_aas )
 2116     {
 2117         if( ! exists($aa_percent->{$aa1}) || $aa_percent->{$aa1} == 0 )
 2118         {
 2119             $aa_percent->{$aa1} = $lowest_nonzero;
 2120         }
 2121     }
 2122 
 2123     my $log_odds_matrix = &init_matrix();
 2124     my $log_min = 0;
 2125 
 2126     foreach my $aa1 ( @emboss_aas )
 2127     {
 2128         foreach my $aa2 ( @emboss_aas )
 2129         {
 2130             if( $aa1 gt $aa2 )
 2131             {
 2132                 next;
 2133             }
 2134 
 2135             if($aa1 eq "*" || $aa2 eq "*")
 2136             {
 2137                 next;
 2138             }
 2139 
 2140             #if there's a 0 we stay at 0
 2141             die "ERROR: $aa1:$aa2 NORMALIZED AT 0 AND WASN'T SET" if( $normalized_matrix->{$aa1}{$aa2} == 0 );
 2142 
 2143             my $random_prob = $aa_percent->{$aa1} * $aa_percent->{$aa2};
 2144 
 2145             if( !$random_prob )
 2146             {
 2147                 print "Strange: $aa1 versus $aa2 has normal of ".$normalized_matrix->{$aa1}{$aa2}." but no random prob\n";
 2148                 next;
 2149             }
 2150 
 2151             #actual over expected probabilities of matching up
 2152             my $p = $normalized_matrix->{$aa1}{$aa2} / ( $random_prob );
 2153             $log_odds_matrix->{$aa1}{$aa2} = $C * logn( $p, 2 );
 2154             #print "$aa1 versus $aa2 log odds: ".$log_odds_matrix{$aa1}{$aa2}."\n";
 2155             $log_min = $log_odds_matrix->{$aa1}{$aa2} if($log_odds_matrix->{$aa1}{$aa2} < $log_min);
 2156         }
 2157     }
 2158 
 2159     #print "LOG ODDS: \n";
 2160     #&print_emboss_matrix( $log_odds_matrix );
 2161     &mirror_matrix( $log_odds_matrix, @emboss_aas, "*" );
 2162 
 2163     my %special_cases;
 2164     $special_cases{"B"} = ["N","D"];
 2165     $special_cases{"Z"} = ["Q","E"];
 2166     $special_cases{"X"} = ['A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V', "B", "Z"];
 2167 
 2168     foreach my $special_aa (keys(%special_cases))
 2169     {
 2170         my $components =  $special_cases{$special_aa};
 2171         my @components = @$components;
 2172 
 2173         foreach my $aa (@emboss_aas)
 2174         {
 2175             my $total = 0;
 2176             my $components = 0;
 2177 
 2178             foreach my $component (@components)
 2179             {
 2180                 my $value = &get_matrix_value($log_odds_matrix, $aa, $component);
 2181                 $total += $value;
 2182                 #print "Adding $value from $aa:$component ($special_aa)...total = $total\n";
 2183                 $components++;
 2184             }
 2185 
 2186             my $average = $total / $components;
 2187 
 2188             #print "Average of $special_aa:$aa: $average\n";
 2189 
 2190             &set_matrix_value($log_odds_matrix, $special_aa, $aa, $average);
 2191         }
 2192     }
 2193 
 2194     #PRINT LOG ODDS MATRIX
 2195     #open( MATRIX, "> $matrix_file" ) || die "Could not open matrix file!";
 2196 
 2197     #print "Log min: $log_min\n";
 2198 
 2199     # set the * to the log odds minimum except for  *:* which is 1
 2200     foreach my $aa ( @emboss_aas )
 2201     {
 2202         if($aa eq "*")
 2203         {
 2204             $log_odds_matrix->{$aa}{"*"} = 1;
 2205         }
 2206         else
 2207         {
 2208             $log_odds_matrix->{$aa}{"*"} = $log_min;
 2209             $log_odds_matrix->{"*"}{$aa} = $log_min;
 2210         }
 2211     }
 2212 
 2213     my $nn_value = &get_matrix_value($log_odds_matrix, "N", "N");
 2214     my $dd_value = &get_matrix_value($log_odds_matrix, "D", "D");
 2215 
 2216     if( $nn_value < $dd_value )
 2217     {
 2218         #print "Replacing B:B with the N:N ($nn_value) value over D:D ($dd_value)...\n";
 2219         &set_matrix_value( $log_odds_matrix, "N", "N", $nn_value );
 2220     }
 2221     else
 2222     {
 2223         #print "Replacing B:B with the D:D ($dd_value) value over N:N ($nn_value)...\n";
 2224         &set_matrix_value( $log_odds_matrix, "D", "D", $dd_value );
 2225     }
 2226 
 2227     my $qq_value = &get_matrix_value($log_odds_matrix, "Q", "Q");
 2228     my $ee_value = &get_matrix_value($log_odds_matrix, "E", "E");
 2229 
 2230     if( $qq_value < $ee_value )
 2231     {
 2232         #print "Replacing Z:Z with the Q:Q ($qq_value) value over E:E ($ee_value)...\n";
 2233         &set_matrix_value( $log_odds_matrix, "Q", "Q", $nn_value );
 2234     }
 2235     else
 2236     {
 2237         #print "Replacing Z:Z with the E:E ($ee_value) value over Q:Q ($qq_value)...\n";
 2238         &set_matrix_value( $log_odds_matrix, "E", "E", $dd_value );
 2239     }
 2240 
 2241     my $rounded_log_odds = &init_matrix();
 2242 
 2243     foreach my $aa1 ( @emboss_aas, "*" )
 2244     {
 2245         foreach my $aa2 ( @emboss_aas, "*" )
 2246         {
 2247             #keep  these just to see what the numbers are
 2248             #next if( $aa1 eq "-" || $aa2 eq "-" );
 2249             #if( $aa2 ge $aa1 )
 2250             #{
 2251                 #print MATRIX "$aa1\t$aa2\t".$log_odds_matrix{$aa1}{$aa2}."\t".&round($log_odds_matrix{$aa1}{$aa2})."\n";
 2252             #}
 2253 
 2254             next if( $aa1 gt $aa2 );
 2255 
 2256             if( $log_odds_matrix->{$aa1}{$aa2} )
 2257             {
 2258                 $rounded_log_odds->{$aa1}{$aa2} = &round($log_odds_matrix->{$aa1}{$aa2});
 2259             }
 2260             else
 2261             {
 2262                 print "No log odds at $aa1:$aa2\n";
 2263             }
 2264         }
 2265     }
 2266 
 2267     # replace Xs with the ones from the x matrix
 2268     my $x_matrix = &read_emboss_matrix($xmatrix_filename) || die "Could not open X replacement matrix $xmatrix_filename!\n";
 2269 
 2270     #print "X Replacement Matrix\n";
 2271     #&print_emboss_matrix($x_matrix);
 2272 
 2273     # rounding matrix
 2274     foreach my $aa ( @emboss_aas )
 2275     {
 2276         #don't mess with *--it should stay the minimum
 2277         next if ($aa eq "*");
 2278 
 2279         my $x_value = &get_matrix_value( $x_matrix, "X", $aa );
 2280         my $old_x = &get_matrix_value( $rounded_log_odds, "X", $aa );
 2281 
 2282         die "DYING: No replacement X value for X:$aa, old value is $old_x" if( !defined($x_value) );
 2283 
 2284         #print "Replacing X:$aa = $old_x with $x_value...\n";
 2285 
 2286         &set_matrix_value( $rounded_log_odds, "X", $aa, $x_value );
 2287     }
 2288 
 2289     $rounded_log_odds->{"*"}{"*"} = 1;
 2290     &mirror_matrix( $rounded_log_odds, @emboss_aas, "*" );
 2291 
 2292     return $rounded_log_odds;
 2293 }
 2294 1;
XHTML 1.1 CSS 2 Sec 508