function  tr_data = load_training_text(txtfile,dbg)
%
%  function:  tr_data = load_training_text(txtfile,dbg)
%
%  Load training text from ASCII file `txtfile`, replace accented 
%  characters by non-accented ones, delete non alpha characters, 
%  replace the characters "a,b,c,...x,y,x,-" by the numbers 
%  "1,2,3,...,24,25,26,27" and store the number sequences 
%  word-wise in cell array `tr_data` 


if ~exist(txtfile,'file')
  error(['*** file not found:  ' txtfile])
end


fid = fopen(txtfile,'r');
if fid < 0 
  error(['*** cannot open file:  ' txtfile])
end
ss = fread(fid);
fclose(fid);

tr_data = [];
nw = 0;
nc = 0;
wstr = [];
for ii = 1:length(ss)
  if ss(ii) == 'ae'
    wstr = [wstr double('a')-96];
    wstr = [wstr double('e')-96];
  elseif ss(ii) == 'oe'
    wstr = [wstr double('o')-96];
    wstr = [wstr double('e')-96];
  elseif ss(ii) == 'ue'
    wstr = [wstr double('u')-96];
    wstr = [wstr double('e')-96];
  elseif (ss(ii) == '') | (ss(ii) == '') | (ss(ii) == '')  
    wstr = [wstr double('a')-96];
  elseif (ss(ii) == '') | (ss(ii) == '') | (ss(ii) == '')  
    wstr = [wstr double('e')-96];
  elseif (ss(ii) == '') | (ss(ii) == '') | (ss(ii) == '') | ...
         (ss(ii) == '')  
    wstr = [wstr double('i')-96];
  elseif (ss(ii) == '') | (ss(ii) == '') | (ss(ii) == '')  
    wstr = [wstr double('o')-96];
  elseif (ss(ii) == '') | (ss(ii) == '') | (ss(ii) == '')  
    wstr = [wstr double('u')-96];
  elseif (ss(ii) == '')
    wstr = [wstr double('c')-96];
  elseif (ss(ii) >= 65) & (ss(ii) <= 90)
    wstr = [wstr double(ss(ii))-64];
  elseif (ss(ii) >= 97) & (ss(ii) <= 122)
    wstr = [wstr double(ss(ii))-96];
  elseif ss(ii) == '-'
    wstr = [wstr double(ss(ii))-18];
  else
    nw = nw+1;
    if ~isempty(wstr)
      tr_data{end+1} = wstr;
      nc = nc+length(wstr);
      wstr = [];
    end
  end   
end

if dbg > 0 
  disp(['training data has ' num2str(nw) ' words with ' num2str(nc) ... 
        ' characters'])
end
if dbg > 1
  if dbg > 2
    nn = dbg;
  else 
    nn = length(tr_data);
  end
  for ii = 1:nn
    str = char(tr_data{ii}+96-(tr_data{ii}==27)*78);
    disp([num2str(ii) ':  "' str '"'])
    if dbg <= 20 
      disp(['  ' sprintf('%d ',tr_data{ii})])
    end
  end
end
