X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Flocalizer%2Fbasic%2FMirBasicProducerAssistantLocalizer.java;h=c13397bd2e6e092a569ad09dcd0c2f1c88ce47ea;hb=c581a663636b2f530016fafecccbe531d872b2bf;hp=db4bdf6b2566418f7eed7ef73f480e8b16540234;hpb=4a893bc2a122c246763bad4b5dc4d4f51a83e80b;p=mir.git diff --git a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java index db4bdf6b..c13397bd 100755 --- a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java +++ b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java @@ -29,17 +29,7 @@ */ package mircoders.localizer.basic; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.StringWriter; -import java.util.GregorianCalendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; - import gnu.regexp.RE; - import mir.config.MirPropertiesConfiguration; import mir.entity.adapter.EntityAdapter; import mir.entity.adapter.EntityIteratorAdapter; @@ -47,16 +37,15 @@ import mir.generator.Generator; import mir.generator.GeneratorExc; import mir.generator.GeneratorFailure; import mir.log.LoggerWrapper; -import mir.misc.StringUtil; import mir.util.GeneratorDateTimeFunctions; import mir.util.GeneratorFormatAdapters; -import mir.util.generator.ReflectionGeneratorFunctionsAdapter; +import mir.util.HTMLStripper; import mir.util.StringRoutines; +import mir.util.generator.ReflectionGeneratorFunctionsAdapter; import mircoders.global.MirGlobal; import mircoders.localizer.MirLocalizerExc; import mircoders.localizer.MirLocalizerFailure; import mircoders.localizer.MirProducerAssistantLocalizer; - import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -64,18 +53,33 @@ import org.w3c.dom.NodeList; import org.w3c.tidy.Configuration; import org.w3c.tidy.Tidy; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringWriter; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer { protected LoggerWrapper logger; + private HTMLStripper stripper; private RE regularExpressionLT; private RE regularExpressionGT; private RE regularExpressionWhitespace; + private RE regularExpressionLeadingSlashes; + public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure { try { + stripper = new HTMLStripper(); + regularExpressionLT = new RE("<"); regularExpressionGT = new RE(">"); - regularExpressionWhitespace = new RE("\\s+"); + regularExpressionWhitespace = new RE("\\s+| | "); + regularExpressionLeadingSlashes = new RE("^//+"); } catch (Throwable t) { throw new MirLocalizerFailure(t); @@ -206,8 +210,8 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL logger.debug("about to filter non HTML Text of length " + aText.length()); try { String result = - StringUtil.createHTML( - StringUtil.removeHTMLTags(aText), + stripper.createHTML( + stripper.removeHTMLTags(aText), MirGlobal.config().getString("Producer.ImageRoot"), MirGlobal.config().getString("Producer.MailLinkName"), MirGlobal.config().getString("Producer.ExtLinkName"), @@ -293,40 +297,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL } - private String[] badAttributeValuePrefixes = { - "javascript", "vbscript", "about", "wysiwyg", "data", "view-source", - "ms-its", "mhtml", "shell", "lynxexec", "lynxcgi", "hcp", "ms-help", - "help", "disk", "vnd.ms.radio", "opera", "res", "resource", "chrome", - "mocha", "livescript"}; - - - private String[] badAttributes = { - "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror", - "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown", - "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", - "onselect", "onsubmit", "onunload", "onload", "onclick", "onfocus", - "onblur", "FSCommand", "onAbort", "onActivate", "onAfterPrint", - "onAfterUpdate", "onBeforeActivate", "onBeforeCopy", "onBeforeCut", - "onBeforeDeactivate", "onBeforeEditFocus", "onBeforePaste", - "onBeforePrint", "onBeforeUnload", "onBegin", "onBlur", "onBounce", - "onCellChange", "onChange", "onClick", "onContextMenu", "onControlSelect", - "onCopy", "onCut", "onDataAvailible", "onDataSetChanged", "onDataSetComplete", - "onDblClick", "onDeactivate", "onDrag", "onDragEnd", "onDragLeave", "onDragEnter", - "onDragOver", "onDragDrop", "onDrop", "onEnd", "onError", "onErrorUpdate", "onExit", - "onFilterChange", "onFinish", "onFocus", "onFocusIn", "onFocusOut", "onHelp", - "onKeyDown", "onKeyPress", "onKeyUp", "onLayoutComplete", "onLoad", "onLoseCapture", - "onMediaComplete", "onMediaError", "onMouseDown", "onMouseEnter", "onMouseLeave", - "onMouseMove", "onMouseOut", "onMouseOver", "onMouseUp", "onMouseWheel", "onMove", - "onMoveEnd", "onMoveStart", "onOutOfSync", "onPaste", "onPause", "onProgress", - "onPropertyChange", "onReadyStateChange", "onRepeat", "onReset", "onResize", - "onResizeEnd", "onResizeStart", "onResume", "onReverse", "onRowEnter", "onRowExit", - "onRowDelete", "onRowInserted", "onScroll", "onSeek", "onSelect", "onSelectionChange", - "onSelectStart", "onStart", "onStop", "onSynchRestored", "onSubmit", "onTimeError", - "onTrackChange", "onUnload", "onURLFlip", "seekSegmentTime", "style", "height", "width"}; private boolean isBadAttr(String attrName) { - for (int i = 0; i < badAttributes.length; i++) { - if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase())) { + List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";"); + Iterator i = badAttributes.iterator(); + while (i.hasNext()) { + if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) { return true; } } @@ -351,8 +327,10 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL } private boolean checkAttrValue(String attrValue) { - for (int i = 0; i < badAttributeValuePrefixes.length; i++) { - if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase() + ":")) { + List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";"); + Iterator i = badPrefixes.iterator(); + while (i.hasNext()) { + if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) { return false; } } @@ -361,9 +339,9 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL private boolean checkNode(String nodeName) { - List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";"); + List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";"); - Iterator i = languages.iterator(); + Iterator i = acceptableNodes.iterator(); while (i.hasNext()) { if (nodeName.equals(i.next())) { return true; @@ -397,6 +375,10 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL for (int i = 0; i < attrs.getLength(); i++) { String attrName = attrs.item(i).getNodeName(); String attrValue = attrs.item(i).getNodeValue(); + if (attrValue.startsWith("//")){ + attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/"); + } + if (checkAttr(attrName) && checkAttrValue(attrValue)) { out.write(' '); out.write(attrs.item(i).getNodeName());