function  cross_entropy = eval_ngram_cross_entropy(data,ngram) 
%
%  function:  cross_entropy = eval_ngram_cross_entropy(data,ngram) 
%
%  evaluate 

N = ndims(ngram);
sz = size(ngram);
if (N == 2) & (sz(1) ~= sz(2))
  N = 1;
end
vs1 = max(sz);
vocsize = vs1-1;

cross_entropy = 0;
k = 0;

if N == 1               % evaluate cross-entropy for unigram 
  unigram = ngram;
  if abs(sum(unigram)-1) > 1E-6     % test if unigram probs sum to 1
    print_ngram_probs(unigram)
    error('*** invalid unigram:  sum of probabilities is not 1')
  end

  for is = 1:length(data)
    wstr = [data{is} vs1];
    k = k+length(wstr);
    for iw = 1:length(wstr)
      if unigram(wstr(iw)) > 0
        cross_entropy = cross_entropy-log2(unigram(wstr(iw)));
      else
        error(['*** unigram(' num2str(wstr(iw)) ') = 0' char(10) ...
               '    encountered in word sequence ' num2str(is) ...
               ' at position ' num2str(iw) char(10) ...
               '    ' num2str(data{is}(max(iw-10,1):iw))])
      end
    end
  end

elseif N == 2           % evaluate cross_entropy for bigram 
  bigram = ngram;
  if sum(abs(sum(bigram')-1)) > 1E-6    % test if bigram probs sum to 1
    print_ngram_probs(bigram)
    error('*** invalid bigram:  sum of probabilities is not 1')
  end

  for is = 1:length(data)
    wstr = [vs1 data{is} vs1];
    k = k+length(wstr)-1;
    for iw = 2:length(wstr)
      if bigram(wstr(iw-1),wstr(iw)) > 0
        cross_entropy = cross_entropy-log2(bigram(wstr(iw-1),wstr(iw)));
      else
        error(['*** bigram(' num2str(wstr(iw-1)) ',' num2str(wstr(iw)) ...
               ') = 0' char(10) ...
               '    encountered in word sequence ' num2str(is) ...
               ' at position ' num2str(iw) char(10) ...
               '    ' num2str(wstr(max(iw-10,2):iw))])
      end
    end
  end

elseif N == 3           % evaluate cross_entropy for trigram 
  trigram = ngram;
  diff = 0;                   % test if trigram probs sum to 1
  for i3 = 1:vs1
    for i2 = 1:vs1
      if (i2 ~= vs1) | (i3 == vs1)
        if abs(sum(trigram(i3,i2,:))-1) > 1E-6
          diff = 1;
        end
      end
    end
  end
  if diff == 1
    print_ngram_probs(trigram)
    error('*** invalid trigram:  sum of probabilities is not 1')
  end

  for is = 1:length(data)
    wstr = [vs1 vs1 data{is} vs1];
    k = k+length(wstr)-2;
    for iw = 3:length(wstr)
      if trigram(wstr(iw-2),wstr(iw-1),wstr(iw)) > 0
        cross_entropy = cross_entropy ...
                         -log2(trigram(wstr(iw-2),wstr(iw-1),wstr(iw)));
      else
        error(['*** trigram(' num2str(wstr(iw-2)) ',' num2str(wstr(iw-1)) ...
               ',' num2str(wstr(iw)) ') = 0' char(10) ...
               '    encountered in word sequence ' num2str(is) ...
               ' at position ' num2str(iw-2) char(10) ...
               '    ' num2str(wstr(max(iw-10,3):iw))])
      end
    end
  end

else
  error(['*** max order of ngram exceeded (' num2str(N) ' > 3)'])
end

cross_entropy = cross_entropy/k;

