program freqcomp (engfreq, substtable, infile, summary, input, output);

{*******************************************************************************
  Letter Frequency counter Version 4.0:
                          
    The analysis of the nth letter, for all n<=26.           
    Incorporates a comparison to a standard english frequency
    distribution table saved as a file "engfreq".
        "engfreq" should be of the format:
        <letter>   <relative frequency><eoln>

    The program then formulates  _possible_ substitution table(s)   
    for use with a letter substitution decoder/encoder. The       
    substitution table are stored the file "substtable" in which the
    table sorted by letter frequency.

    The "summary" file contains the info from _every_ single analysis
    performed on the file (ie at each nth letter).    
    
    The algorithm to perform the analysis was developed by Stuart Prescott.   
    The entire analysis of the file is performed using only one pass
    through the file. The development of this algorithm was initiated
    to improve the efficiency of the cryptanalysis system. 

    The ciphertext is read in from the file "infile". The letter
    frequencies are determined by counting each occurrence of each 
    letter counts (ignoring case). 

    Written by Stuart Prescott      20/03/96                   
    Last modified                   07/05/96

    Copyright (c) MCMXCVI, Stuart Prescott                  
*******************************************************************************}

{*******************************************************************************
    Required includes:						   
	lowercase.i       	(returns lowercase value of char)  
   
  	isletter.i	    	(returns TRUE if char is a letter) 



	SelectionSort.i		(uses the Selction Sort Algorithm)

	InitArrays.i		(initialises the array freq[i][j]. )

	ReadStdFreqs.i	    	(reads the std English freqs from file engfreq)

	AssessFreqs.i		(Assesses the Quality of the Analyses)

*******************************************************************************}


const
    MAXLETTER = 26;
    MAXTRY    = 26;

    PROPER1  = 11;		{PROPERn indicates the 'correct' english}
    PROPER2  =  9;		{    frequency for the nth most common  }
    PROPER3  =  7; 		{    letter in the frequency analysis.  }
    PROPER13 =  3;
    PROPER26 =  1;

    DELTA1   =  3;		{DELTAn indicates the maximum acceptable}
    DELTA2   =  2;		{    variance of the frequency from this}
    DELTA3   =  1;		{    'correct' frequency.		}
    DELTA13  =  1;
    DELTA26  =  1;

    DEBUGSort        = FALSE;
    DEBUGAssessFreqs = FALSE;
    DEBUGCountFreqs  = FALSE;
    DEBUGOutputFreqs = FALSE;

    ACCEPTFreq	= 8;
    TOTALTests  = 10;

type
    FreqRecordType  = record
			 letter : char;
			 count  : integer;
		      end;

    MonoArrayType   = array [ 1 .. MAXLETTER ]
		      of FreqRecordType;

    FreqArrayType   = array [ 1 .. MAXTRY ]
		      of MonoArrayType;

    QualityType     = 1..10;

    DistQualityType = array [ 1 .. MAXTRY ]
		      of QualityType;

var
    totalletters	              	{count of all letters}
		: integer;	
    
    letters				{array of freqs as counted in file}
		: FreqArrayType;

    english          		   	{array of normal frequencies 
    					  read in from file engfreq}
    		: MonoArrayType;

    goodfreqs				{True if freqs are good}
		: DistQualityType;

    infile,				{File containing codetext}
    engfreq,	                        {Input file for std freqs}
    substtable,	                        {Output files for substtable}
    summary
		: text;	

{******************************************************************************}

#include 'lowercase.i'
#include 'isletter.i'

#include 'SelectionSort.i' 
#include 'InitArrays.i'
#include 'ReadStdFreqs.i'
#include 'AssessFreqs.i'

{******************************************************************************}

procedure CountFreqs 
	(
	var code  	: FreqArrayType;    	{the individual totals}
	var total 	: integer		{the grand total of all}
	);

    var 
	index,			{the index value (1..26)=(a..z) of the letter}
	c			{counter}
		: integer;
	ch			{temp variable for the character read}
		: char;

    { P: code is initialised as code[i][j].count =0 and .letter as a..z}
    begin
	{read through file 'infile'}
	total   := 0;                           {initialize counter}
	writeln ('Reading in code . . .');
	reset(infile);

	while not eof(infile) do begin          {start file ops}
	    if eoln(infile) then begin
		    readln(infile);             {get rid of the EOLN}
	    end
	    else begin
		read(infile,ch);
	  	if isletter(ch) then begin
		    index := ord(lowercase(ch)) -ord('a') +1;
		    { the index value (1..26)=(a..z) of the letter}
		    for c:=1 to MAXTRY do begin
			if (((total-1) mod c) = 0) then begin
			    code[c][index].count:= code[c][index].count +1;
			end;
		    end;
		    total :=total + 1;
		end; 
	    end; {if eoln}
	end; {while}

	if DEBUGCountFreqs then begin
	    writeln ('Total Letters read = ', total:1); 
	end;
    end;        {proc CountFreqs}
    { R: code[i] contains the frequency analysis at the ith letter}

{******************************************************************************}

procedure OutputFreqs
	(
	code		: FreqArrayType; 
	var stds	: MonoArrayType;
	letters		: integer; 
	goodfreqs	: DistQualityType
	);

    var
	totaln,					{total reads of analysis n}
	c,					{dummy counters}
	d
			: integer;
	percentfreq				{%age freq}
			: real;
	firstgoodfreq				{TRUE until a good freq found}
			: boolean;

    { P: code contains sorted freqs
	 stds contains the std English freqs
	 goodfreqs contains assessment of the freqs}
    begin
	write ('Sorting standard frequencies table... ');
	SelectionSort (stds);           {sort out frequencies so that displayed}
	{write out substtable}
	writeln ('Saving substitution table(s) in file: "substtable"');
	rewrite(substtable);
	writeln;

	{ output all the _good_ freqs (checking each one's status}
	firstgoodfreq := TRUE;
	for d:=1 to (MAXTRY) do begin
	    if ((goodfreqs[d] >= ACCEPTFreq) or (DEBUGOutputFreqs)) then begin
	        totaln:= letters div d;
		
		writeln ('Analysis number ',d:1);
		write   (' Score: ', goodfreqs[d]:1);
		writeln (' out of ',TOTALTests);
		writeln (substtable, '{Analysis number ',d:3, '}');
		write   (substtable, '{Score: ', goodfreqs[d]:1);
		writeln (substtable, ' out of ',TOTALTests:1, '}');

		writeln ('-----CODETEXT--------   --POSSIBLE PLAINTEXT--');
		writeln ('                              EQUIVALENTS');
		writeln ('letter   freq    freq%       freq%     letter');
		writeln ('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~');
		
		if not(firstgoodfreq) then
		    writeln (substtable, '{');
		for c:=1 to 26 do begin
		    percentfreq := code[d][c].count / totaln * 100;
	    	    write ((code[d][c].letter) :4);
		    write ((code[d][c].count)  :9   , (percentfreq)    :9:2);
		    write ((stds[c].count/100) :12:2, (stds[c].letter) :9  );
		    writeln;

		    write (substtable, (code[d][c].letter) :1);
		    write (substtable, (stds[c].letter)    :3);
		    writeln (substtable);
		end;

		if not(firstgoodfreq) then
		    writeln (substtable, '}');
	        firstgoodfreq := FALSE;
	        writeln (substtable);
		writeln (substtable);
		writeln ('Press <ENTER> to continue');
		readln;

	    end;
	end;
	writeln;
	writeln ('Substitution tables saved in file: "substtable"');
	writeln;
    end;         {proc: OutputFreqs}
    { R: "substtable" contains a series of possible substitution tables}
    
{******************************************************************************}
{******************************************************************************}
begin   {main program}
	
    rewrite(summary);
    
    ReadStdFreqs(english);    		           {english is variable}
    
    InitArrays (letters);			   {letters is variable}

    CountFreqs (letters, totalletters); 	   {(total)letters variables}

    AssessFreqs(letters, totalletters, goodfreqs, FALSE); {letters, goodfreqs variable}

    OutputFreqs(letters, english, totalletters, goodfreqs);   {english variable}
				
end.    {main program}

{******************************************************************************}
{******************************************************************************}

