function [syl,acc,lng,phr,bnd] = ptrread(file)
%
%  function:  [syl,acc,lng,phr,bnd] = ptrread(file);
%
%  Read pholological transcription file and converts it into syllables with
%  syllable phone sequence 'syl', accent value 'acc', language 'lng', phrase
%  type 'phr' and boundary value 'bnd' (i.e. syllable, word, weak phrase,
%  strong phrase, sentence boundary). 
%   
%  Syllable phone sequences are returned as a string matrix and can be
%  scanned using "index = strmatch(<syl>,syl,'exact')"). 
%
% Output:
%   syl
%   acc
%   lng
%   phr
%   bnd   The boundaries in numeric form: 
%           0 sentence boundary
%           1 strong phrase
%           2 weak phrase
%           | word boundary
%           - syllable bondary




  
fid = fopen(file);
if fid <= 0,
  error('*** phonological representation file "%s" not openend\n', ...
			 file);
  syl = '';
  acc = '';
  lng = '';
  phr = '';
  bnd = '';
else
  fprintf('reading phonological representation file "%s"\n',file);

  % read in entire text
  text = char(fread(fid))';
  fclose(fid);

  % remove optional '.' and whitespace at end of sequence
  text = regexprep(text,'\..*$','');
  text = regexprep(text,'[\r\n]','');
  text = regexprep(text,'\t',' ');
  
  fprintf('%s\n',text);
  rem = text;
  ind = 1;
  bnd{ind} = '-';
  phrtype = '.';
  phr{ind} = phrtype;
  acc{ind} = '0';
  lng{ind} = '_';
  foundbnd = 1;
  while (any(rem))
	 [s,f] = regexp(rem,'^[^ -]*[- ]*');
	 tok = rem(s:f);
	 tok = regexprep(tok,'-','');
	 rem = rem(f+1:end);
	  
	 if strcmp(tok(end),' '),
		bnd{ind} = '|';
	 else
		bnd{ind} = '-';
	 end
	 tok = regexprep(tok,' ','');
	 
	 if ~isempty(tok),
	   if size(tok,2)>3 & strcmp(tok(1:2),'#{'),
	     if ind>1,
               if strcmp(tok(4),'}')
	         bndval = str2num(tok(3));
               else
	         bndval = str2num(tok(3:4));
               end
	       if bndval==0, 
				bnd{ind-1} = '0';
	       elseif bndval==1,
				bnd{ind-1} = '1';        
	       elseif bndval==2,
				bnd{ind-1} = '2';        
	       end
	     end
       
	     foundbnd = 1;
	   elseif foundbnd==1 & size(tok,2)>2,
	     if strcmp(tok(1),'(') & strcmp(tok(3),')'),
	       phrtype = tok(2);
	       phr{ind} = phrtype;
	     elseif strcmp(tok(1),'(') & strcmp(tok(4),')'),
	       phrtype = tok(2:3);
	       phr{ind} = phrtype;
	     end
	     foundbnd = 0;
	   else
	     if ~isempty(strfind(tok,'[1]')),
	       tok = strrep(tok,'[1]','');
	       acc{ind} = '1';
	     elseif ~isempty(strfind(tok,'[2]')),
	       tok = strrep(tok,'[2]','');
	       acc{ind} = '2';
	     elseif ~isempty(strfind(tok,'[3]')),
	       tok = strrep(tok,'[3]','');
	       acc{ind} = '3';
	     elseif ~isempty(strfind(tok,'[4]')),
	       tok = strrep(tok,'[4]','');
	       acc{ind} = '4';
	     elseif ~isempty(strfind(tok,'[E]')),
	       tok = strrep(tok,'[E]','');
	       acc{ind} = '1';
	     end
	     if ~isempty(strfind(tok,'\E\')),
	       tok = strrep(tok,'\E\','');
	       lng{ind} = 'E';
	     elseif ~isempty(strfind(tok,'\F\')),
	       tok = strrep(tok,'\F\','');
	       lng{ind} = 'F';
	     elseif ~isempty(strfind(tok,'\G\')),
	       tok = strrep(tok,'\G\','');
	       lng{ind} = 'G';
	     elseif ~isempty(strfind(tok,'\I\')),
	       tok = strrep(tok,'\I\','');
	       lng{ind} = 'I';
	     end
		  % simple correctness check of syllabic string
		  if ~isempty(strfind(tok,'[')) | ~isempty(strfind(tok,']')),
			 fprintf('*** error in syllable %d: "%s"\n',ind,tok);
	     end
		  if ~isempty(strfind(tok,'{')) | ~isempty(strfind(tok,'}')),
			 fprintf('*** error in syllable %d: "%s"\n',ind,tok);
	     end
		  if ~isempty(strfind(tok,' ')),
			 fprintf('*** error in syllable %d: "%s"\n',ind,tok);
      end
		  
      syl{ind} = tok;
		  
		  ind = ind+1;
	     bnd{ind} = '-';
	     lng{ind} = '_';
	     % phr{ind} = '.';   % only tag phrase starting syllables
	     phr{ind} = phrtype;
	     acc{ind} = '0';
	     foundbnd = 0;
	   end
	 end
  end
  % todo: resize structs
  len = size(syl,2);
  bnd{len} = '0';  
  bnd = char(bnd{1:len});
  phr = char(phr{1:len});
  lng = char(lng{1:len});
  acc = char(acc{1:len});
	syl = char(syl{1:len});
end

