%  Uebung 23 - Aufgabe 3
% 
%  Schaetzen von N-Gram-Wahrscheinlichkeiten 
%  ----------------------------------------
%
%  Diese Matlab-Funktion dient zum Testen der Matlab-Funktion: 
%  ngram = estim_ngram_probs(tr_data,vocsize,N,smval,dbg).
%
%  Die Testfunktion wird gestartet mit:  ueb23_3(N)
%  Das optionale Argument spezifiert die zu testende 
%  N-Gram-Ordnung  [Vorgabe: alle von 1 bis 3]

function  ueb23_3(N)


%----- check parameters -----

if nargin < 1
  N = [];     % test N-gram estimation for N = 1...3
elseif (N < 1) | (N > 3)
  error('*** N-gram order out of range! (allowed: 1...3)')
end


%----- check if function "estim_ngram_probs" is present -----

if exist('estim_ngram_probs') ~= 2,
  disp(['Datei ''estim_ngram_probs'' nicht gefunden.']);
  return;
end;


for testnum = 1:9
  switch testnum
  case 1        %  1 training sequence / unigram  
    tr_data = {[1 2 1 1]};
    vocsize = 2;
    NN = 1;
    smval = 0;
    ngram1 = [0.75  0.25];

  case 2        %  2 training sequences / unigram  
    tr_data = {[1 2 1 1], [2 1]};
    vocsize = 3;
    NN = 1;
    smval = 0;
    ngram1 = [0.66666666667  0.333333333  0.00];

  case 3        %  2 training sequences / unigram / smoothing
    tr_data = {[1 2 1 1], [2 1]};
    vocsize = 3;
    NN = 1;
    smval = 0.5;
    ngram1 = [0.6  0.333333333  0.0666666667];
    
  case 4        %  1 training sequence / bigram  
    tr_data = {[1 2 2 1]};
    vocsize = 2;
    NN = 2;
    smval = 0;
    ngram1 = [0.00  0.50  0.50; ...
              0.50  0.50  0.00; ...
              1.00  0.00  0.00];

  case 5        %  2 training sequences / bigram  
    tr_data = {[1 2 1 1], [2 1]};
    vocsize = 3;
    NN = 2;
    smval = 0;
    ngram1 = [0.25  0.25  0.00  0.50; ...
              1.00  0.00  0.00  0.00; ...
              0.00  0.00  0.00  0.00; ...
              0.50  0.50  0.00  0.00];

  case 6        %  2 training sequences / bigram / smoothing
    tr_data = {[2 1 1], [1 2 2]};
    vocsize = 3;
    NN = 2;
    smval = 0.5;
    ngram1 = [0.300  0.300  0.100  0.300; ...
              0.300  0.300  0.100  0.300; ...
              0.250  0.250  0.250  0.250; ...
              0.375  0.375  0.125  0.125];
    
  case 7        %  1 training sequence / trigram  
    tr_data = {[1 2 1 1]};
    vocsize = 2;
    NN = 3;
    smval = 0;
    ngram1 = [];
    ngram1(1:3,1:3,1) = [0.0  1.0  0.0; ...
                         1.0  0.0  0.0; ...
                         0.0  0.0  1.0];
    ngram1(1:3,1:3,2) = [0.0  0.0  0.0; ...
                         0.0  0.0  0.0; ...
                         1.0  0.0  0.0];
    ngram1(1:3,1:3,3) = [1.0  0.0  0.0; ...
                         0.0  0.0  0.0; ...
                         0.0  0.0  0.0];

  case 8        %  2 training sequences / trigram  
    tr_data = {[1 2 1 1], [2 1]};
    vocsize = 2;
    NN = 3;
    smval = 0;
    ngram1(1:3,1:3,1) = [0.0  1.0  0.0; ...
                         0.5  0.0  0.0; ...
                         0.0  1.0  0.5];
    ngram1(1:3,1:3,2) = [0.0  0.0  0.0; ...
                         0.0  0.0  0.0; ...
                         1.0  0.0  0.5];
    ngram1(1:3,1:3,3) = [1.0  0.0  0.0; ...
                         0.5  0.0  0.0; ...
                         0.0  0.0  0.0];

  case 9        %  2 training sequences / trigram / smoothing
    tr_data = {[1 2 1 1], [2 1]};
    vocsize = 2;
    NN = 3;
    smval = 0.5;
    ngram1(1:3,1:3,1) = [0.200  0.600  1/3; ...
                         3/7    1/3    1/3; ...
                         0.200  0.600  3/7];
    ngram1(1:3,1:3,2) = [0.200  0.200  1/3; ...
                         1/7    1/3    1/3; ...
                         0.600  0.200  3/7];
    ngram1(1:3,1:3,3) = [0.600  0.200  1/3; ...
                         3/7    1/3    1/3; ...
                         0.200  0.200  1/7];
  end 

  if isempty(N) | (N == NN)
    disp(['Test number ' num2str(testnum)])
    ngram2 = estim_ngram_probs(tr_data,vocsize,NN,smval,0);
    res = cmp_ngrams(NN,ngram1,ngram2); 
    if res == -1
      break
    elseif res == 0
      disp_test_res(testnum,NN,ngram1,ngram2,tr_data,smval);
      break
    end
  end
end
if res == 1
  disp('All tests successfully completed')
end



%----- local functions -----------------------------------------

function  res = cmp_ngrams(N,ngram1,ngram2)
%
%  Compare N-grams `ngram1` and `ngram2`; the returned result is:
%  res = 1:  N-grams are equal
%        0:  N-grams are not equal 
%       -1:  N-grams are not comparable (invalid or not same order) 

tolerance = 1e-6;

N1 = ndims(ngram1);             % test dimension and size of N-gram array
sz = size(ngram1);
if (N1 == 2) & (sz(1) ~= sz(2))
  N1 = 1;
end
vs1 = max(sz);
vocsize = vs1-1;

if any(size(ngram1) ~= size(ngram2))
  ss = num2str(vs1);
  disp('invalid N-gram!  (output of function "estim_ngram_probs.m")') 
  disp([num2str(N) '-dimensional array with ' num2str(vs1) ...
       ' elements in each dimension expected'])
  res = -1;
  return
end

if N == 1
  dd = sum(abs(ngram2-ngram1));
elseif N == 2
  dd = sum(sum(abs(ngram2-ngram1))');
else
  dd = 0;
  for i3 = 1:3
    for i2 = 1:3
      for i1 = 1:3
        dd = dd+abs(ngram2(i3,i2,i1)-ngram1(i3,i2,i1));
      end
    end
  end
end

if dd < tolerance
  res = 1;
else
  res = 0;
end
return


function  disp_test_res(tstnum,N,ngram1,ngram2,tr_data,smval)
%
%  display the test results

disp([char(10) 'Test number ' num2str(tstnum) ' failed:  (smval = ' ...
      num2str(smval) ')'])
wstr = 'training data:  ';
for ii = 1:length(tr_data)
  ss = '[';
  for ll = 1:length(tr_data{ii})
    wstr = [wstr ss num2str(tr_data{ii}(ll))];
    ss = ' ';
  end
  wstr = [wstr '] '];
end 
disp([wstr ' '])
print_ngram_probs(ngram1,[],'reference ')
print_ngram_probs(ngram2,[],'calculated ')
return

