X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Flocalizer%2Fbasic%2FMirBasicProducerAssistantLocalizer.java;fp=source%2Fmircoders%2Flocalizer%2Fbasic%2FMirBasicProducerAssistantLocalizer.java;h=c13397bd2e6e092a569ad09dcd0c2f1c88ce47ea;hb=c9ac8fa71b679f8d967aac901bbef945c13b94c9;hp=f67233bb33b096e995b5f804c90a02178b512f1a;hpb=d63595f89aaa4b6a524dc0b4af9e0eef888f4c6b;p=mir.git diff --git a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java index f67233bb..c13397bd 100755 --- a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java +++ b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java @@ -29,6 +29,7 @@ */ package mircoders.localizer.basic; +import gnu.regexp.RE; import mir.config.MirPropertiesConfiguration; import mir.entity.adapter.EntityAdapter; import mir.entity.adapter.EntityIteratorAdapter; @@ -36,8 +37,10 @@ import mir.generator.Generator; import mir.generator.GeneratorExc; import mir.generator.GeneratorFailure; import mir.log.LoggerWrapper; -import mir.misc.StringUtil; -import mir.util.*; +import mir.util.GeneratorDateTimeFunctions; +import mir.util.GeneratorFormatAdapters; +import mir.util.HTMLStripper; +import mir.util.StringRoutines; import mir.util.generator.ReflectionGeneratorFunctionsAdapter; import mircoders.global.MirGlobal; import mircoders.localizer.MirLocalizerExc; @@ -47,8 +50,8 @@ import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; -import org.w3c.tidy.Tidy; import org.w3c.tidy.Configuration; +import org.w3c.tidy.Tidy; import java.io.ByteArrayInputStream; import java.io.IOException; @@ -62,7 +65,28 @@ import java.util.Map; public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer { protected LoggerWrapper logger; - public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure { + private HTMLStripper stripper; + private RE regularExpressionLT; + private RE regularExpressionGT; + private RE regularExpressionWhitespace; + private RE regularExpressionLeadingSlashes; + + + public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure { + try { + stripper = new HTMLStripper(); + + regularExpressionLT = new RE("<"); + regularExpressionGT = new RE(">"); + regularExpressionWhitespace = new RE("\\s+| | "); + regularExpressionLeadingSlashes = new RE("^//+"); + } + catch (Throwable t) { + throw new MirLocalizerFailure(t); + } + } + + public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure { try { Iterator i; @@ -90,13 +114,13 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL aValueSet.put("config", configMap); - aValueSet.put("utility", new Utility()); + aValueSet.put("utility", new Utility()); aValueSet.put("languages", - new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language")); + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language")); aValueSet.put("topics", - new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic")); + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic")); Map articleTypeMap = new HashMap(); articleTypeMap.put("openposting", "0"); @@ -125,10 +149,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL } catch (Throwable t) { logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage()); - throw new RuntimeException(t.getMessage()); + + throw new MirLocalizerFailure(t); } - }; + } + public static class getLanguageIdFunction implements Generator.Function { private Map languageCodeToId; private String otherLanguageId; @@ -142,8 +168,9 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"); while (i.hasNext()) { EntityAdapter language = (EntityAdapter) i.next(); - if (language.get("code").equals("ot")) + if (language.get("code").equals("ot")) { otherLanguageId = (String) language.get("id"); + } languageCodeToId.put(language.get("code"), language.get("id")); } @@ -157,12 +184,14 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure { try { - if (aParameters.size() != 1) + if (aParameters.size() != 1) { throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code"); + } String result = (String) languageCodeToId.get(aParameters.get(0)); - if (result == null) + if (result == null) { result = otherLanguageId; + } return result; } @@ -172,7 +201,7 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL catch (Throwable t) { throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t); } - }; + } } @@ -181,13 +210,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL logger.debug("about to filter non HTML Text of length " + aText.length()); try { String result = - StringUtil.createHTML( - StringUtil.removeHTMLTags(aText), - MirGlobal.config().getString("Producer.ImageRoot"), - MirGlobal.config().getString("Producer.MailLinkName"), - MirGlobal.config().getString("Producer.ExtLinkName"), - MirGlobal.config().getString("Producer.IntLinkName") - ); + stripper.createHTML( + stripper.removeHTMLTags(aText), + MirGlobal.config().getString("Producer.ImageRoot"), + MirGlobal.config().getString("Producer.MailLinkName"), + MirGlobal.config().getString("Producer.ExtLinkName"), + MirGlobal.config().getString("Producer.IntLinkName")); logger.debug("done filtering non-HTML text "); return result; } @@ -197,6 +225,60 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL throw new RuntimeException(t.toString()); } } + + public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure { + + if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) { + return new Generator.Interceptor() { + + public Object intercept(Object anObject) { + if (anObject instanceof EntityAdapter) { + return new InterceptedEntityAdapter((EntityAdapter) anObject); + } + + return anObject; + } + }; + } + else { + return null; + } + } + + public class InterceptedEntityAdapter { + private EntityAdapter adapter; + + InterceptedEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + Object result = adapter.get(aField); + if (result instanceof String) { + return filterHTMLText((String) result); + } + else { + return result; + } + } + + public Object getRaw() { + return new RawEntityAdapter(adapter); + } + } + + public class RawEntityAdapter { + private EntityAdapter adapter; + + RawEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + return adapter.get(aField); + } + } + public String filterHTMLText(String aText) { try { StringWriter out = new StringWriter(); @@ -204,53 +286,68 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8")); tidy.setMakeClean(true); tidy.setCharEncoding(Configuration.UTF8); + tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE)); print(tidy.parseDOM(in, null), out); + return out.toString(); - } catch (IOException e) { + } + catch (IOException e) { return e.getMessage(); } } + + + private boolean isBadAttr(String attrName) { + List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";"); + Iterator i = badAttributes.iterator(); + while (i.hasNext()) { + if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) { + return true; + } + } + return false; + } + + private String stripWhitespace(String aString) { + try { + return regularExpressionWhitespace.substituteAll(aString, ""); + } + catch (Throwable t) { + return ""; + } + } + private boolean checkAttr(String attrName) { - if (attrName.equals("onLoad") || attrName.equals("onClick") || attrName.equals("onFocus") || attrName.equals("onBlur") || attrName.equals("onMouseOver") || attrName.equals("onMouseOut") || attrName.equals("style") || attrName.equals("STYLE") || attrName.equals("height") || attrName.equals("width") || attrName.equals("HEIGHT") || attrName.equals("WIDTH")) + if (isBadAttr(attrName)) { return false; - else - return true; + } + return true; } + private boolean checkAttrValue(String attrValue) { + List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";"); + Iterator i = badPrefixes.iterator(); + while (i.hasNext()) { + if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) { + return false; + } + } + return true; + } + + private boolean checkNode(String nodeName) { - if (nodeName.equals("a") || - nodeName.equals("img") || - nodeName.equals("h1") || - nodeName.equals("h2") || - nodeName.equals("h3") || - nodeName.equals("h4") || - nodeName.equals("h5") || - nodeName.equals("h6") || - nodeName.equals("br") || - nodeName.equals("form") || - nodeName.equals("input") || - nodeName.equals("hr") || - nodeName.equals("strong") || - nodeName.equals("font") || - nodeName.equals("b") || - nodeName.equals("i") || - nodeName.equals("em") || - nodeName.equals("p") || - nodeName.equals("table") || - nodeName.equals("tr") || - nodeName.equals("td") || - nodeName.equals("th") || - nodeName.equals("ul") || - nodeName.equals("ol") || - nodeName.equals("li") - ) { - return true; - } else { + List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";"); - return false; + Iterator i = acceptableNodes.iterator(); + while (i.hasNext()) { + if (nodeName.equals(i.next())) { + return true; + } } + return false; } private void print(Node node, StringWriter out) throws IOException { @@ -277,7 +374,12 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL for (int i = 0; i < attrs.getLength(); i++) { String attrName = attrs.item(i).getNodeName(); - if (checkAttr(attrName)) { + String attrValue = attrs.item(i).getNodeValue(); + if (attrValue.startsWith("//")){ + attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/"); + } + + if (checkAttr(attrName) && checkAttrValue(attrValue)) { out.write(' '); out.write(attrs.item(i).getNodeName()); out.write("=\""); @@ -287,7 +389,7 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL } } - if (node.getChildNodes()==null || node.getChildNodes().getLength()==0) { + if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) { out.write("/"); } out.write('>'); @@ -302,12 +404,21 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL break; case Node.TEXT_NODE: - out.write(node.getNodeValue()); + String value = node.getNodeValue(); + try { + value = regularExpressionLT.substituteAll(value, "<"); + value = regularExpressionGT.substituteAll(value, ">"); + } + catch (Throwable t) { + value = ""; + } + out.write(value); + break; } - if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes()!=null && node.getChildNodes().getLength()>0) { + if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) { out.write("'); @@ -317,12 +428,13 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL } public static class Utility extends ReflectionGeneratorFunctionsAdapter { - public Utility () { + public Utility() { super(new MirBasicUtilityFunctions()); } + public Object getDatetime() { return new GeneratorDateTimeFunctions.DateTimeFunctions( - MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone")); + MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone")); } public Object getCompressWhitespace() {