
program stringDist(input, output, infile2);

{ ***************************************************************************
   
   Jack Dermody							21 - 4 - 96


   ***Statement:
      	A program to find the number of strings (the size of which is determined
	by the global N) in the file input.  The results are sorted and printed.

      	The structure of the program is based on dynamic memory allocation - 
	only as much memory as is needed is allocated.


   ***Project:
      	Although (due mainly to the internal design of pascal) the program runs
      	slowly, it will come to an end eventually.

      	Note that the -p compiler option needs to be enabled during compilation
      	in order for the program to run.

	In accordance with the group standard, this project uses the file 
	'infile' for its standard input.
	

   ***Design:
	ReadFile takes a string of size N, reseting the file and increasing
	the file window by one.

        This means that if N = 2 and the word is 'that', stringDist will find
	three separate strings, 'th', 'ha' and 'at'.

	These strings are compared against the previous records to see if those
	strings have been found before.

	CheckArray and TestArray handle this.

	If the string is new, a new record is created and the new string is
	copied using CopyString.

	If not, the incidence of the existing record is increased by one.

	When the file has been analysed and all the strings have been recorded,
	the results are sorted and written.

	A faster algorithm would perhaps entail sorting at the end, since the
	comparing algorithm is a very time consuming part of this program.  This
	is something for any possible version 3.

   ***Globals:

	N        - the number of letters contained in the string searched for
	   by the program.

	Note that the actual size of the strings is one less than N.

	cArray   - a string defined by the size of N.  Later, this will be set
	    to be 10 or 12.

	cptr     - pointer to a structure holding both any particular string and
	   it's incidence.

	ptrArray - one ptrArray for each distinct string found.

	maxPtr   - the number of strings found.

	testPtr  - the array with which the program deals and sorts the input.

	pos      - the current position of the file.

	fileD    - the array of the file data.

	max      - if the file length is < 10.

   ***************************************************************************


   ***!!! :  modified    13 - 5 - 96 by Jack Dermody
	
	the variable N is now read in by reading the first letter of the file.

   ***!!! :  modified    24 - 5 - 96 by Jack Dermody

	now runs faster, due to improvement on the reading and reseting of
	the file.  Previously, the file was reset and then run through to the
	file position.  Now, the file data is read into an array and then
	the array is used instead.  This is accomplished with the function
	ReadChar.


  ***************************************************************************  }


type    cArray = array[1 .. 11] of char;

	cptr = ^cl;
        cl = record
	       c 	 : cArray;
	       incidence : integer;
	       written   : integer;
	     end;			

	ptrArray = array[1 .. 16000] of cptr;

var
	maxPtr, N  : integer;
	ptr        : ptrArray;
	testPtr    : cptr;
	pos, max   : integer;
	fileD      : cArray;
	infile2     : file of char;

{*****************************************************************************
 *
 *   tests to see if two strings (arrays of char) are the same.  Returns true
 *   or false accordingly.
 *
 *   P : That both strings are of size N.
 *
 *   r : That TestArray is either true or false, depending on if the strings are
 *   equal.
 *
 *}

function TestArray(which : integer) : boolean;

var	counter : integer;

begin
	TestArray := true;

	for counter := 1 to N do

	   if (testPtr^.c[counter] <> ptr[which]^.c[counter]) then 

	      TestArray := false;
end;



{*****************************************************************************
 *   
 *   if this is a new string, it is copied into a fresh record.
 *
 *   P : that a new testPtr exists.
 *
 *   R : that the new ptr now contains the string in testPtr
 *
 *}

procedure CopyStrings(which : integer);

var
   counter : integer;

begin
	counter := 1;
	while counter < N do begin

	   ptr[which]^.c[counter] := testPtr^.c[counter];
	   counter := counter + 1;

	end;

	assert(ptr[which]^.c = testPtr^.c);
end;



{*****************************************************************************
 * 
 *   this procedure runs through and checks all the previous strings to
 *	 determine if the new string is unique.
 *
 *   If so, a new record is created and the string is copied, if not, the
 *      incidence of the repeated string is aumented by one.
 *
 *   P : That all the strings in the array contain strings
 *
 *   R : That all strings have been checked and appropriate action has been
 *   taken.
 *}


procedure CheckArray;

   var	counter, match, which : integer;

begin
	match := 0;
	
	counter := 1;
	while counter <= maxPtr do begin

	   if TestArray(counter) = true then begin

	      match := 1;
	      which := counter;
	      counter := maxPtr + 1;

	   end
	   else counter := counter + 1;

	end;
		
	{if the string is unique}
	if match = 0 then begin

	   maxPtr := maxPtr + 1;
	   new(ptr[maxPtr]);
	   CopyStrings(maxPtr);
	   ptr[maxPtr]^.incidence := 1;
	   ptr[maxPtr]^.written := 0;

	end

	else ptr[which]^.incidence := ptr[which]^.incidence + 1;

end;


{*****************************************************************************
 *
 *	the file is read into an array of size 10 to be fed, slowly, 
 *	to ReadFile.  If reset is true, the file window is increased by one,
 *	otherwise the array is used to give the file data.
 *
 *	P : That the eof has not yet been reached.
 *
 *	R : that max has been set to the size of the file, if it is less than
 *	than 11 ( the size of the standard string initialisation.
 *}

function ReadChar(reset : boolean) : char;

var	i : integer;

begin
	if reset = true then begin
	   
	   for i := 2 to max do
	      fileD[i - 1] := fileD[i];
	   
	   read(infile2, fileD[max]);
	   ReadChar := ' ';
	   pos := 1;

	end
	
	else begin
	   
	   
	   ReadChar := fileD[pos];
	   pos := pos + 1;

	end;

	assert(max > 0);
end;


{*****************************************************************************
 *
 *   Initialise the file array, making sure that the size is not less than the
 *   array size.
 *
 *   P : the file is open and reset.
 *
 *   R : The file has been filled with the first ten characters in the file.
 *}

procedure InitFileD;

var	i : integer;

begin
	max := 0;
	i := 1;

	while (i <= 10) do begin
	   if eof(infile2) = true then begin
	      max := i;
	      i := 11;
	   end
	      
	   else  begin 
	      read(infile2, fileD[i]);
	      max := max + 1;
	      i := i + 1;
	   end;
	 
	end;

end;





{*****************************************************************************
 *
 *   a string of size N is read into the testPtr and run through CheckArray.
 *
 *   P : That the file array has been initialised.
 *
 *   R : That a string has been found and put into testArray.
 *}

procedure ReadFile;

var 
   counter, match, capital, ggg: integer;
   pascalHappy : char;

begin
	while eof(infile2) = false do begin

	   counter := 1;
	   match := 1;

	   while counter < N do begin

	        if eof(infile2) = true then begin counter := N; match := 0; end;

		testPtr^.c[counter] := ReadChar(false);
		
		{change capitals to lowercase}

		capital := ord(testPtr^.c[counter]);
		if capital < 91 then if capital > 64 then
		   testPtr^.c[counter] := chr(capital + 32);

		{filter non-chars}		

		ggg := 0;
		if capital < 123 then if capital > 96 then ggg := 1;
		if capital < 91 then if capital > 64 then ggg := 1;

		if (ggg = 0) then begin
		   counter := N;
		   match := 0;
		end
		
		{get next char}

		else counter := counter + 1;
	   end;

	   {give the compiler a function instead of a procedure}

	   pascalHappy := ReadChar(true);
	
	   if match = 1 then CheckArray;
	end;
end;




{*****************************************************************************
 *
 *   the results are sorted and written.
 *
 *   P : That all the strings have been found.
 *
 *   R : That all the results have been written.
 *}

procedure WriteResults;

var
   counter, largest, counter2, nnn : integer;

begin
	write('number of strings found:  ');
	writeln(maxPtr);
	counter := 1;
	
	while counter <= maxPtr do begin

	   largest := 0;
	   for counter2 := 1 to maxPtr do
	      if ptr[counter2]^.written = 0 then
		 if ptr[counter2]^.incidence > largest then begin

		    largest := ptr[counter2]^.incidence;
		    nnn := counter2;

	      end;
	   
	   write(ptr[nnn]^.c);
	   ptr[nnn]^.written := 1;
	   writeln(ptr[nnn]^.incidence);
	   counter := counter + 1;

	end;
	
end;
	
	


{*******   Main Program   ********}

begin   
	reset(infile2);
	read(infile2, N);
	maxPtr := 0;
	pos := 1;
	new(testPtr);
	N := N + 1;
	InitFileD;
	ReadFile;
	
	WriteResults;
end.

{*******   End   *******}

