#-------------------------------------------------------------------- # # find_hidden_alphabets.pl # # This script attempts to find mixed alphabets within an # inputted text stream. # # Written by: Moshe Rubin (March 2009) # #-------------------------------------------------------------------- use strict; use diagnostics; use warnings; # my $infile = ""; my $alphabet = 0; my $numalphs = 0; my $ignorelastletter = 0; parseCommandLine(); validateCommandLine(); displayOptions(); # . . . doIt (); print "\nFinished!\n"; #-------------------------------------------------------------------- # Subroutines #-------------------------------------------------------------------- sub usage { print "\n"; print "Usage: perl find_hidden_alphabets.pl -in \n"; print " -alphabet \n"; print " -numalphs <#>\n"; print " [-ignorelastletter]\n"; print "Examples:\n"; print "\n"; print "\tperl ... -in foo.txt -alphabet ABCDEFGHIJKLMNOPQRSTUVWXYZ -numalphs 3\n"; print "\tperl ... -in foo.txt -alphabet ABCDEFGHIJKLMNOPQRSTUVWXYZ -numalphs 2 -ignorelastletter\n"; print "\n"; } sub parseCommandLine { my $i; my $p; # Parse command line for ($i=0; $i<@ARGV; $i++) { # Convert parameter to lowercase for comparison $p = lc($ARGV[$i]); # NOTE: Compare $p with LOWERCASE strings only! if ($p eq "-in") { $infile = $ARGV[++$i]; } elsif ($p eq "-alphabet") { $alphabet = $ARGV[++$i]; } elsif ($p eq "-numalphs") { $numalphs = $ARGV[++$i]; } elsif ($p eq "-ignorelastletter") { $ignorelastletter = 1; } elsif ( ($p eq "-?") || ($p eq "--?") || ($p eq "/?") || ($p eq "-h") || ($p eq "--h") || ($p eq "-help") || ($p eq "--help") ) { usage(); exit; } else { print ("\nError: Unexpected command line parameter ($ARGV[$i]), aborting\n"); usage(); exit(); } } } sub validateCommandLine { if (!(-e $infile)) { print ("\nError: The input file \"$infile\" does not exist\n"); usage(); exit(); } if (length($alphabet) == 0) { print ("\nError: Invalid alphabet ($alphabet)\n"); usage(); exit(); } if ($numalphs <= 0) { print ("\nError: Invalid numalphs ($numalphs)\n"); usage(); exit(); } } sub displayOptions { printf "\n"; printf "Session options\n"; printf "===============\n"; printf "\tInput file: $infile\n"; printf "\tAlphabet: $alphabet (size=%d)\n", length($alphabet); printf "\tNumber of alphabets: $numalphs\n"; printf "\tIgnore last letter: %s\n", ($ignorelastletter ? "true" : "false"); printf "\n"; } sub doIt { my $text = ProcessInputFile ($infile); my $min_text_len = 0; my $alphsize_looked_for = 0; my %freqcnt; my $i; my $offset = 0; $alphsize_looked_for = length($alphabet) + ($ignorelastletter ? -1 : 0); $min_text_len = $alphsize_looked_for * $numalphs; if (length($text) < $min_text_len) { printf "\nError: A minimum of $min_text_len letters are needed, input file has only %d\n", length($text); usage(); exit(); } # Load up the first $min_text_len characters for ($i=0; $i%s", $earliest_char, substr($text, $offset, 1)) . " "; } while ($offset < length($text)); } sub ProcessInputFile { my ($file) = @_; my $text = ""; my $line = ""; open (FILE, "<$file"); while ($line = ) { chomp ($line); $line =~ s/\s//g; $text .= $line; } close (FILE); return $text; } sub DumpFreqCnt { my ($desc, %freqcnt) = @_; my $temp; print "\n$desc "; foreach $temp (sort keys %freqcnt) { printf "%s:%d ", $temp, $freqcnt{$temp}; } print "\n"; } sub foundSomething { my ($numalphs, %freqcnt) = @_; my $temp; foreach $temp (sort keys %freqcnt) { if ($freqcnt{$temp} > $numalphs) { return 0; } } return 1; }