config attribute filters via config.propertieis./
[mir.git] / source / mircoders / localizer / basic / MirBasicProducerAssistantLocalizer.java
index db4bdf6..c13397b 100755 (executable)
  */
 package mircoders.localizer.basic;
 
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.StringWriter;
-import java.util.GregorianCalendar;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
 import gnu.regexp.RE;
-
 import mir.config.MirPropertiesConfiguration;
 import mir.entity.adapter.EntityAdapter;
 import mir.entity.adapter.EntityIteratorAdapter;
@@ -47,16 +37,15 @@ import mir.generator.Generator;
 import mir.generator.GeneratorExc;
 import mir.generator.GeneratorFailure;
 import mir.log.LoggerWrapper;
-import mir.misc.StringUtil;
 import mir.util.GeneratorDateTimeFunctions;
 import mir.util.GeneratorFormatAdapters;
-import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
+import mir.util.HTMLStripper;
 import mir.util.StringRoutines;
+import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
 import mircoders.global.MirGlobal;
 import mircoders.localizer.MirLocalizerExc;
 import mircoders.localizer.MirLocalizerFailure;
 import mircoders.localizer.MirProducerAssistantLocalizer;
-
 import org.w3c.dom.Document;
 import org.w3c.dom.NamedNodeMap;
 import org.w3c.dom.Node;
@@ -64,18 +53,33 @@ import org.w3c.dom.NodeList;
 import org.w3c.tidy.Configuration;
 import org.w3c.tidy.Tidy;
 
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
 public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
   protected LoggerWrapper logger;
 
+  private HTMLStripper stripper;
   private RE regularExpressionLT;
   private RE regularExpressionGT;
   private RE regularExpressionWhitespace;
+  private RE regularExpressionLeadingSlashes;
+    
 
   public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
     try {
+      stripper = new HTMLStripper();
+
       regularExpressionLT = new RE("<");
       regularExpressionGT = new RE(">");
-      regularExpressionWhitespace = new RE("\\s+");
+      regularExpressionWhitespace = new RE("\\s+|&#x0A;|&#x0D;");
+      regularExpressionLeadingSlashes = new RE("^//+");
     }
     catch (Throwable t) {
       throw new MirLocalizerFailure(t);
@@ -206,8 +210,8 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
     logger.debug("about to filter non HTML Text of length " + aText.length());
     try {
       String result =
-          StringUtil.createHTML(
-              StringUtil.removeHTMLTags(aText),
+          stripper.createHTML(
+              stripper.removeHTMLTags(aText),
               MirGlobal.config().getString("Producer.ImageRoot"),
               MirGlobal.config().getString("Producer.MailLinkName"),
               MirGlobal.config().getString("Producer.ExtLinkName"),
@@ -293,40 +297,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
   }
 
 
-  private String[] badAttributeValuePrefixes = {
-      "javascript", "vbscript", "about", "wysiwyg", "data", "view-source",
-      "ms-its", "mhtml", "shell", "lynxexec", "lynxcgi", "hcp", "ms-help",
-      "help", "disk", "vnd.ms.radio", "opera", "res", "resource", "chrome",
-      "mocha", "livescript"};
-
-
-  private String[] badAttributes = {
-      "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror",
-      "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown",
-      "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset",
-      "onselect", "onsubmit", "onunload", "onload", "onclick", "onfocus",
-      "onblur", "FSCommand", "onAbort", "onActivate", "onAfterPrint",
-      "onAfterUpdate", "onBeforeActivate", "onBeforeCopy", "onBeforeCut",
-      "onBeforeDeactivate", "onBeforeEditFocus", "onBeforePaste",
-      "onBeforePrint", "onBeforeUnload", "onBegin", "onBlur", "onBounce",
-      "onCellChange", "onChange", "onClick", "onContextMenu", "onControlSelect",
-      "onCopy", "onCut", "onDataAvailible", "onDataSetChanged", "onDataSetComplete",
-      "onDblClick", "onDeactivate", "onDrag", "onDragEnd", "onDragLeave", "onDragEnter",
-      "onDragOver", "onDragDrop", "onDrop", "onEnd", "onError", "onErrorUpdate", "onExit",
-      "onFilterChange", "onFinish", "onFocus", "onFocusIn", "onFocusOut", "onHelp",
-      "onKeyDown", "onKeyPress", "onKeyUp", "onLayoutComplete", "onLoad", "onLoseCapture",
-      "onMediaComplete", "onMediaError", "onMouseDown", "onMouseEnter", "onMouseLeave",
-      "onMouseMove", "onMouseOut", "onMouseOver", "onMouseUp", "onMouseWheel", "onMove",
-      "onMoveEnd", "onMoveStart", "onOutOfSync", "onPaste", "onPause", "onProgress",
-      "onPropertyChange", "onReadyStateChange", "onRepeat", "onReset", "onResize",
-      "onResizeEnd", "onResizeStart", "onResume", "onReverse", "onRowEnter", "onRowExit",
-      "onRowDelete", "onRowInserted", "onScroll", "onSeek", "onSelect", "onSelectionChange",
-      "onSelectStart", "onStart", "onStop", "onSynchRestored", "onSubmit", "onTimeError",
-      "onTrackChange", "onUnload", "onURLFlip", "seekSegmentTime", "style", "height", "width"};
 
   private boolean isBadAttr(String attrName) {
-    for (int i = 0; i < badAttributes.length; i++) {
-      if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase())) {
+    List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";");
+    Iterator i = badAttributes.iterator();
+    while (i.hasNext()) {
+      if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) {
         return true;
       }
     }
@@ -351,8 +327,10 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
   }
 
   private boolean checkAttrValue(String attrValue) {
-    for (int i = 0; i < badAttributeValuePrefixes.length; i++) {
-      if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase() + ":")) {
+      List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";");
+      Iterator i = badPrefixes.iterator();
+      while (i.hasNext()) {
+         if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) {
         return false;
       }
     }
@@ -361,9 +339,9 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
 
 
   private boolean checkNode(String nodeName) {
-    List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
+    List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
 
-    Iterator i = languages.iterator();
+    Iterator i = acceptableNodes.iterator();
     while (i.hasNext()) {
       if (nodeName.equals(i.next())) {
         return true;
@@ -397,6 +375,10 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL
           for (int i = 0; i < attrs.getLength(); i++) {
             String attrName = attrs.item(i).getNodeName();
             String attrValue = attrs.item(i).getNodeValue();
+           if (attrValue.startsWith("//")){
+             attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/");
+           }
+                           
             if (checkAttr(attrName) && checkAttrValue(attrValue)) {
               out.write(' ');
               out.write(attrs.item(i).getNodeName());