use strict; use diagnostics; use warnings; my $pt; my $ct; my $i; my $j; my $debug_distance = 0; open (PT, "<$ARGV[0]"); $pt = ; close (PT); open (CT, "<$ARGV[1]"); $ct = ; close (CT); if (defined $ARGV[2]) { $debug_distance = $ARGV[2]; } if (length($pt) != length ($ct)) { print "The pt and ct are different lengths, aborting\n"; } my $position = 0; my $len = length ($pt); my %map; my %distance; while ($position < $len) { my $p = substr ($pt, $position, 1); my $c = substr ($ct, $position, 1); if (($p ne "?") && ($c ne "?")) { if (defined $map{$p . $c}) { # We encountered this pair already, calculate the distance my $d = sprintf "%5d", $position - $map{$p . $c}; if (defined $distance { $d }) { ++$distance { $d }; } else { $distance { $d } = 1; } if ($d == $debug_distance) { # Display all $position values for the debug distance printf "DEBUG: distance %7d occurred at positions %7d and %7d\n", $debug_distance, $map{$p . $c}, $position; } } $map {$p . $c} = $position; } ++$position; } printf "\n\n"; print "pt/ct identity distances not encountered\n"; print "----------------------------------------\n"; my $lim = $len < 1000 ? $len : 1000; for (my $i=1; $i<$lim; $i++) { my $key = sprintf "%5d", $i; if (!(defined $distance{$key})) { printf "\tDistance of %5d not encountered\n", $i; } } print "\n"; print "pt/ct identity distances and counts\n"; print "-----------------------------------\n"; foreach my $key (sort keys %distance) { printf "\t%05d\t%05d\n", $key, $distance{$key}; }