function  ngram = estim_ngram_probs(tr_data,vocsize,N,smval,dbg)
%
%  function:  ngram = estim_ngram_probs(tr_data,vocsize,N,smval,dbg)
%
%  Evaluate the N-gram probabilities from a set of training data
%  `tr_data` (cell array of variable length sequences of word indices) 
%  and the vocabulary size `vocsize`. 
%  
%  The N-gram probabilities P(vi1|viN,...,vi2) are denoted as 
%  ngram(iN,...,i2,i1),  1 <= i <= vocsize+1, where `vocsize+1` 
%  means the boundary item "BND" that delimits finite-length 
%  sequences.
%
%  Inputs:   tr_data   cell array of limited size training vectors;
%                      each vector consists of a sequence of indices 
%                      in the range of 1 to `vocsize`
%            vocsize   vocabulary size 
%            N         N-gram order (1...3)
%            smval     optional smoothing constant (between 0 and 0.5) 
%                      (see book, equation 200)
%            dbg       optional debug output control (0 = no output)
%
%  Output:   ngram     N-dimensional array of N-gram probabilities 

if nargin < 5
  dbg = 0;
end
if nargin < 4
  smval = 0;
else
  if (smval < 0) | (smval > 0.5)
    error(['*** smooting value out of range'])  
  end
end

for kk = 1:length(tr_data)       % check if all training items are 
  if any(tr_data{kk} > vocsize)  % in vocabulary
    error(['*** vocabulary index exceeds vocabulary size (' ...
              num2str(vocsize) ')' char(10) ...
           '    in training data item number ' num2str(kk) char(10) ...
           '    ' num2str(tr_data{kk})])
  end
end 

vs1 = vocsize+1;                  % voc size incl. "BND"

if N == 1                         % estimate unigram probabilities
  ngram = zeros(1,vocsize);           % init word counters





elseif N == 2                     % estimate bigram probabilities
  ngram = zeros(vs1,vs1);         % init wordpair counters






elseif N == 3                     % estimate trigram probabilities
  ngram = zeros([1 1 1]*vs1);     % init word triple counters





else
  error('*** N-gram order out of range! (allowed: 1...3)')
end

if dbg > 0
  maxpr = 15;
  print_ngram_probs(ngram,maxpr,'Estimated ')
end
