further refining the filter...
[mir.git] / source / mircoders / localizer / basic / MirBasicProducerAssistantLocalizer.java
index 40c95cd..6697d4c 100755 (executable)
@@ -69,15 +69,17 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
 
   private RE regularExpressionLT;
   private RE regularExpressionGT;
+  private RE regularExpressionWhitespace;
 
   public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
-      try{
-          regularExpressionLT = new RE("<");
-          regularExpressionGT = new RE(">");
-      }
-      catch (Throwable t) {
-          throw new MirLocalizerFailure(t);
-      }
+    try{
+      regularExpressionLT = new RE("<");
+      regularExpressionGT = new RE(">");
+      regularExpressionWhitespace = new RE("\\s+");
+    }
+    catch (Throwable t) {
+      throw new MirLocalizerFailure(t);
+    }
   }
 
   public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure  {
@@ -234,13 +236,47 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
     }
   }
 
+
+  private String[] badAttributeValuePrefixes= {"javascript","vbscript","about","wysiwyg","data","view-source","ms-its","mhtml","shell","lynxexec","lynxcgi","hcp","ms-help","help","disk","vnd.ms.radio","opera","res","resource","chrome","mocha","livescript"};
+
+  private String[] badAttributes = {"onabort", "onblur",  "onchange", "onclick", "ondblclick", "onerror", "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", "onselect", "onsubmit", "onunload","onload","onclick","onfocus","onblur","style","height","width"};
+  
+  private boolean isBadAttr(String attrName){
+    for (int i=0;i<badAttributes.length;i++){
+      if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase()))
+       return true;
+      }
+    return false;
+  }
+
+  private String stripWhitespace(String aString){
+    try{
+      return regularExpressionWhitespace.substituteAll(aString, "");
+     }
+    catch (Throwable t){
+      return "";
+    }
+  }
+
   private boolean checkAttr(String attrName) {
-    if (attrName.equals("onLoad") || attrName.equals("onClick") || attrName.equals("onFocus") || attrName.equals("onBlur") || attrName.equals("onMouseOver") || attrName.equals("onMouseOut") || attrName.equals("style") || attrName.equals("STYLE") || attrName.equals("height") || attrName.equals("width") || attrName.equals("HEIGHT") || attrName.equals("WIDTH"))
-      return false;
-               return true;
+    if (isBadAttr(attrName)){
+       return false;
+    }
+    return true;
 
   }
 
+  private boolean checkAttrValue(String attrValue) {
+    for (int i=0;i<badAttributeValuePrefixes.length;i++){
+      if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase()+":")){
+       return false;
+      } 
+    }
+    return true;
+  }
+
+
   private boolean checkNode(String nodeName) {
     List languages =  StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
     
@@ -276,11 +312,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
 
           for (int i = 0; i < attrs.getLength(); i++) {
             String attrName = attrs.item(i).getNodeName();
-            if (checkAttr(attrName)) {
+            String attrValue = attrs.item(i).getNodeValue();
+            if (checkAttr(attrName) && checkAttrValue(attrValue)) {
               out.write(' ');
               out.write(attrs.item(i).getNodeName());
               out.write("=\"");
-
+             
               out.write(attrs.item(i).getNodeValue());
               out.write('"');
             }