problem with ghost fragments appearing after generated files has been fixed
[mir.git] / source / mir / util / xml / html / HTMLScanner.java
index e956ae1..319d053 100755 (executable)
@@ -45,6 +45,9 @@ public class HTMLScanner {
             reader.get();
             readEndTag();
             break;
+          case '<':
+            receiver.handleCData(new String(new char[] {c }));
+            break;
           default:
             readTag();
         }
@@ -67,6 +70,20 @@ public class HTMLScanner {
         (aCharacter == ':');
   }
 
+  private boolean isValidUnQuotedAttributeCharacter(char aCharacter) {
+    int type = Character.getType(aCharacter);
+
+    return
+        (type == Character.UPPERCASE_LETTER)  ||
+        (type == Character.LOWERCASE_LETTER)  ||
+        (type == Character.DECIMAL_DIGIT_NUMBER)  ||
+        (aCharacter == '.') ||
+        (aCharacter == '#') ||
+        (aCharacter == '-') ||
+        (aCharacter == '_') ||
+        (aCharacter == ':');
+  }
+
   private void skipWhiteSpace() throws IOException {
     while (!reader.isAtEnd() && Character.isWhitespace(reader.peek())) {
       reader.get();
@@ -101,8 +118,7 @@ public class HTMLScanner {
 
     if (result.length()==0)
       return null;
-    else
-      return result.toString();
+               return result.toString();
   }
 
   private String getAttributeValue() throws IOException {
@@ -122,7 +138,7 @@ public class HTMLScanner {
           reader.get();
       }
       else {
-        while (!reader.isAtEnd() && isValidTagNameCharacter(reader.peek())) {
+        while (!reader.isAtEnd() && isValidUnQuotedAttributeCharacter(reader.peek())) {
           result.append(reader.get());
         }
       }
@@ -183,7 +199,11 @@ public class HTMLScanner {
             reader.get();
             if (!reader.isAtEnd() && reader.peek()=='-') {
               reader.get();
-              break;
+              if (!reader.isAtEnd() && reader.peek()=='>') {
+                reader.get();
+                break;
+              }
+              result.append('-');
             }
             result.append('-');
           }
@@ -200,9 +220,7 @@ public class HTMLScanner {
 
         return;
       }
-      else {
-        result.append('-');
-      }
+                       result.append('-');
     }
 
     while (!reader.isAtEnd() && reader.peek()!='>') {
@@ -222,8 +240,6 @@ public class HTMLScanner {
       result.append(reader.get());
     }
 
-
-
     receiver.handleCData(HTMLRoutines.resolveHTMLEntites(result.toString()));
   }