import java.io.*;
import java.util.*;
import java.util.regex.* ;


public class ProcessTags{

   public static void main(String[] args) {
     try{
       if (args.length != 2) // print help
       { 
       	System.out.println("Usage is:");
       	System.out.println("java ProcessTags posfile taggedfile");
       	return; 
       }
       
       BufferedReader posFile = new BufferedReader(new FileReader(args[0])); 
       BufferedReader taggedFile = new BufferedReader(new FileReader(args[1])); 

       int lineNumber = 1;
       String posLineTemp = posFile.readLine();
       String taggedLineTemp = taggedFile.readLine();
       StringTokenizer posTokens;
       StringTokenizer taggedTokens;
       String tempPosToken, tempPosWord;
       String tempTaggedToken, tempTaggedWord;

       System.out.println("<html>");
       
       Pattern p = Pattern.compile("[^0-9A-Za-z_.,]+");
       Pattern sp = Pattern.compile("</*S>");
       

       while(posLineTemp!=null && taggedLineTemp!=null) {
         
         // advance the lines over whitespace or non-lines
         Matcher m1 = p.matcher(taggedLineTemp);
         while ((m1.replaceAll("")).length() == 0) { 
 	   taggedLineTemp = taggedFile.readLine(); 
 	   m1 = p.matcher(taggedLineTemp);
 	   if (taggedLineTemp==null) { break; }
 	 }
         	
         Matcher m2 = p.matcher(posLineTemp);
         while ((m2.replaceAll("")).length() == 0) { 
           posLineTemp = posFile.readLine(); 
           m2 = p.matcher(posLineTemp);
           if (posLineTemp==null) { break; }
         }
         
         // break loop if at the end now for either.
         if (posLineTemp==null && taggedLineTemp!=null) 
           { System.out.println("Premature end of POS"); break; }
         else if (posLineTemp!=null && taggedLineTemp==null) 
           { System.out.println("Premature end of tagged input"); break; }
         else if (posLineTemp==null && taggedLineTemp==null) 
           { break; }
         
         // remove the <S> </S> tags -- they should *never* be on a line by themselves
         Matcher s = sp.matcher(taggedLineTemp);
         taggedLineTemp = s.replaceAll("");
         s = sp.matcher(posLineTemp);
         posLineTemp = s.replaceAll("");
         
           
         posTokens = new StringTokenizer(posLineTemp, "/ [_]<>=");
         taggedTokens = new StringTokenizer(taggedLineTemp, "/ [_]<>=");
         while (taggedTokens.hasMoreTokens() && posTokens.hasMoreTokens()) {
           tempTaggedWord = taggedTokens.nextToken();
           tempPosWord = posTokens.nextToken();
           
           if ((tempTaggedWord.equals("S") || tempTaggedWord.equals("((") || tempTaggedWord.equals("))")) && taggedTokens.hasMoreTokens())
             tempTaggedWord = taggedTokens.nextToken();
           if (!taggedTokens.hasMoreTokens() || !posTokens.hasMoreTokens())
             break;
           tempTaggedToken = taggedTokens.nextToken();
           tempPosToken = posTokens.nextToken();

           
           //      System.out.println(lineNumber);
           if(!tempTaggedToken.equals(tempPosToken) && tempTaggedWord.equals(tempPosWord)) {
             System.out.println("<p>");
             System.out.println("<i>On line <b>" + lineNumber + "</b> the tag for the word <b>" + tempTaggedWord + "</b> was <b><font color='red'>" + tempTaggedToken + "</font></b> instead of <b><font color='green'>" + tempPosToken + "</font></b></i><br>");
             System.out.println("The correct sentence tagging is: " + posLineTemp + "<br>");
             System.out.println("The output sentence tagging was: " + taggedLineTemp + "<br>");
           }
         }
         posLineTemp = posFile.readLine();
         taggedLineTemp = taggedFile.readLine();
         lineNumber = lineNumber+1;
       }
     }

     catch(Exception e) {
       e.printStackTrace();
     }

     System.out.println("</html>");
   }


}

