program freqcomp (engfreq, substtable, codewheel, infile, input, output);


{234567890123456789012345678901234567890123456789012345678901234567890123456789}
{        1         2         3         4         5         6         7         }


{******************************************************************
 * Letter Frequency counter Version 3.0:                          *
 *   Incorporating the analysis of only the nth letter.           *
 *   Incorporates a comparison to a standard english frequency    *
 *   distribution table saved as a file "engfreq".                *   
 *   The program then formulates a _possible_ substitution table  *
 *   for use with a letter substitution decoder/encoder. The      *
 *   substitution table is stored in two different files:	  *
 *     substtable   (used by the letter substitution program)     *
 *		    contains the table sorted by letter frequency *
 *     codewheel    (for the user's reference) contains the table *
 *		    in alphabetical order of the codetext.	  *
 *                                                                *
 *      Written by Stuart Prescott      20/03/96                  *
 *      Last modified                   07/04/96                  *
 ******************************************************************}

{******************************************************************
 *  Required includes:						  *
 *	lowercase.i	    (returns lowercase value of char)     *
 *	isletter.i	    (returns TRUE if char is a letter)    *
 ******************************************************************}



{This prog reads in the codetext from the file infile and counts the number of
occurences of each letter (ignoring case). The actual frequencies are
in array:letters.}

{future developments:	
			look at some standard deviations or acceptable
		variances which can make the prog give an educated guess.}
type
    FreqArrayType=array [1..2 , 1 .. 26] of real;

var
    totalletters: integer;              {count of all letters}
    length: integer;			{n for nth-letter analysis}
    start: integer;			{starting position for analysis} 
    
    letters: FreqArrayType;		{array of freqs as counted in file}
    english: FreqArrayType;             {array of normal frequencies 
					  read in from file engfreq}
    
    infile: text;			{File containing codetext}
    engfreq: text;                      {Input file for std freqs}
    substtable: text;                   {Output files for substtable}
    codewheel: text;			{ - see banner for details}

{******************************************************************}

#include 'lowercase.i'
#include 'isletter.i'

{******************************************************************}
procedure Sort (var sortme: FreqArrayType;     {the array to be sorted}
		 col: integer);	        {the dimension by which to sort}

   { The sort algorithm used is as follows:
      Find the highest member of the array
      Swap it with the first member of the array
      Find the highest member of the array between position 2 and end
      Swap it with the second member of the array . . . }

    var
	init, 			{the starting position of the pass}
	now, 			{the current position within the pass}
	largest, 		{the largest value found so far}
	numcomp, 		{the number of comparisons made - counter}
	numswap: integer;	{the number of swaps made - counter}
	temp: real;		{temp variable used in the value swapping}

    begin       {proc Sort}
	numswap:=0;		{initialise counters to zero}
	numcomp:=0;		

	for init:=1 to 25 do begin	{loop to control pass through array}
	    largest:=init;	   {largest so far is the first to be checked}

	    for now:= init+1 to 26 do begin		{loop for each pass}
		if sortme[col,now] > sortme[col,largest] then
		    largest:=now;		    {this one is largest so far}
		numcomp:=numcomp+1;		    {a comparison has been made}
	    end;

	    if largest<>init then begin		{see if a swap needs to be made}
		temp:=sortme[1,largest];
		sortme[1,largest]:=sortme[1,init];   {swap the largest}
		sortme[1,init]:=temp;                {     with the first entry}
		temp:=sortme[2,largest];
		sortme[2,largest]:=sortme[2,init];      {and again for the }
		sortme[2,init]:=temp;           	{2nd dimension}
		numswap:=numswap+1;			{a swap has been made}
	    end;
	end;
	write (numcomp:1, ' comparisons made, ');    {give some efficiency info}
	writeln (numswap:1, ' swaps made.');
    end;        {proc: Sort}
    
{******************************************************************}
procedure ReadStdFreqs (var stds: FreqArrayType);
  { This proc reads in the std freqs from a file called "engfreq".
     The file is to be of the format:
	letter <spaces> frequency<Enter>
     The frequncies should be entered as percentages}

    var
	temp: char;				{}
	readnum, c: integer;
    begin
	writeln ('Reading in Std English Letter Frequencies from file "engfreq"');
	reset (engfreq);
	readnum:=1;
	c:=1;
	while ((not eof(engfreq)) and (c<=26)) do begin
	    if eoln(engfreq) then begin
		    readln (engfreq);
		    if readnum<>1 then begin
			    readnum:=1;
			    c:=c+1;
		    end;
		end
		else begin
		    case readnum of
			3:  begin
				writeln ('Error in Std English Frequencies File!');
				writeln ('File should be in format:');
				writeln ('letter <spaces> frequency<Enter>');
			    end;
			2:  begin
				read (engfreq, stds[2,c]);
				readnum:=3;
			    end;
			1:  begin
				read (engfreq, temp);
				stds[1,c]:= ord(temp);
				readnum:=2;
			    end;
		    end; {case of}
	    end; {if}
	end; {while}
    end; {proc ReadStdFreqs}
	    
{******************************************************************}
procedure CountFreqs (var code: FreqArrayType;	    {the individual totals}
	  	      var total: integer;	    {the grand total of all}
		      start: integer;		    {the first letter to read}
		      n: integer);		    {the nth letter to read}
    var 
	c: integer;		{counter}
	ch: char;		{temp variablethe character read}
	readnum: integer;	{counter from 0 to n-1. read occurs on 0}
    begin
	for c:=1 to 26 do begin             
	    code[2,c]:=0;                   {initialize count to zero}
	    code[1,c]:=c+96;                {set up ASCII values for letters}
	end;    
	
	{read through file (using standard input file commands}
	readnum:=start - 1;
	total := 0;                         {initialize counter}
	writeln ('Reading in code . . .');
	reset(infile);
	while not eof(infile) do begin                   {start file ops}
	    if eoln(infile) then begin
		    readln(infile);        {get rid of the EOLN}
		end
		else begin
		    read(infile,ch);
		    ch:=lowercase(ch);
		    readnum:= (readnum+1) mod n;
		    if ((isletter(ch)) and (readnum=0)) then begin
			code[2, ord(ch)-96]:= code[2, ord(ch)-96] +1;
			total :=total + 1;
		    end; 
	    end; {if eoln}
	end; {while}
    end;        {proc CountFreqs}


{******************************************************************}
procedure OutputFreqs(var code, stds:FreqArrayType; 
	  	      total,			{total of alll counts}
	  	      first,			{first read posn}
		      n: integer);		{nth read}
    var
	c: integer;
    begin
	write ('Sorting standard frequencies table... ');
	Sort (stds,2);           {sort out frequencies so that displayed}
	write ('Sorting this messages'' frequency table... ');
	Sort (code,2);           {  in descending freq & so that comparable}

	{write out substtable}
	writeln ('Saving substitution table in file: "substtable"');
	rewrite(substtable);
	writeln;
	writeln('Total letters analysed: ',total:1);
	writeln('Starting at letter: ',first:1);
	writeln('Analysing every ',n:1,'th letter.');
	writeln ('-----CODETEXT--------   --POSSIBLE PLAINTEXT--');
	writeln ('                              EQUIVALENTS');
	writeln ('letter   freq    freq%       freq%     letter');
	writeln ('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~');
	for c:=1 to 26 do begin
	    write (chr(round(code[1,c])):4);
	    write (round(code[2,c]):9, (code[2,c]/total*100):9:2);
	    write (stds[2,c]:12:2, chr(round(stds[1,c])):9);
	    writeln;
	    write (substtable, chr(round(code[1,c])):1);
	    write (substtable, chr(round(stds[1,c])):3);
	    writeln (substtable);
	end;
	writeln;
	writeln ('Substitution table saved in file: "substtable"');
	writeln;
    end;         {proc: OutputFreqs}
    
{******************************************************************}
procedure OutputCodeWheel (code, stds: FreqArrayType);    
    var 
	c: integer;
	subst: FreqArrayType;
    begin    
	for c:=1 to 26 do begin
	    subst[1,c]:= code[1,c];
	    subst[2,c]:= stds[1,c];
	end;
	write ('Sorting substitution table... ');
	Sort (subst,1);             {sort substitution table by codetext}
	
	{write out codewheel}
	writeln ('Saving sorted substitution table in file: "codewheel"');
	rewrite(codewheel);
	for c:= 1 to 26 do begin
	    write (codewheel, chr(round(subst[1,27-c])));
	    writeln (codewheel, chr(round(subst[2,27-c])):3);
	end;
    end;         {proc OutputCodeWheel}


{******************************************************************}
{******************************************************************}
begin   {main program}
    
    ReadStdFreqs(english);                {english is variable}
    
    write('Enter the number at which to perform ');
    writeln('the nth-letter frequency analysis');
    read (length);
    write('Enter the number at which to start ');
    writeln('the nth-letter frequency analysis');
    read (start);
    
    CountFreqs(letters, totalletters, start, length); 
    				{(total)letters variables}
    
    OutputFreqs(letters, english, totalletters, start, length); 
    				{letters & english variable}
				
    OutputCodeWheel(letters, english);

end.    {main program}


