X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Flocalizer%2Fbasic%2FMirBasicProducerAssistantLocalizer.java;h=c13397bd2e6e092a569ad09dcd0c2f1c88ce47ea;hb=c581a663636b2f530016fafecccbe531d872b2bf;hp=253ed1d8484eef3580d56d1f41338e2180912ab0;hpb=f0f8f85e69c74d10a86f3c0f7ec6e70b6abd72ff;p=mir.git diff --git a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java index 253ed1d8..c13397bd 100755 --- a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java +++ b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java @@ -29,34 +29,68 @@ */ package mircoders.localizer.basic; -import java.util.GregorianCalendar; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; - +import gnu.regexp.RE; import mir.config.MirPropertiesConfiguration; import mir.entity.adapter.EntityAdapter; import mir.entity.adapter.EntityIteratorAdapter; +import mir.generator.Generator; +import mir.generator.GeneratorExc; +import mir.generator.GeneratorFailure; import mir.log.LoggerWrapper; -import mir.misc.StringUtil; -import mir.util.DateToMapAdapter; -import mir.util.GeneratorExpressionFunctions; -import mir.util.GeneratorHTMLFunctions; -import mir.util.GeneratorIntegerFunctions; -import mir.util.GeneratorListFunctions; -import mir.util.GeneratorStringFunctions; +import mir.util.GeneratorDateTimeFunctions; +import mir.util.GeneratorFormatAdapters; +import mir.util.HTMLStripper; +import mir.util.StringRoutines; +import mir.util.generator.ReflectionGeneratorFunctionsAdapter; import mircoders.global.MirGlobal; +import mircoders.localizer.MirLocalizerExc; +import mircoders.localizer.MirLocalizerFailure; import mircoders.localizer.MirProducerAssistantLocalizer; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.tidy.Configuration; +import org.w3c.tidy.Tidy; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringWriter; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer { protected LoggerWrapper logger; - public void initializeGenerationValueSet(Map aValueSet) { + private HTMLStripper stripper; + private RE regularExpressionLT; + private RE regularExpressionGT; + private RE regularExpressionWhitespace; + private RE regularExpressionLeadingSlashes; + + + public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure { + try { + stripper = new HTMLStripper(); + + regularExpressionLT = new RE("<"); + regularExpressionGT = new RE(">"); + regularExpressionWhitespace = new RE("\\s+| | "); + regularExpressionLeadingSlashes = new RE("^//+"); + } + catch (Throwable t) { + throw new MirLocalizerFailure(t); + } + } + + public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure { try { Iterator i; Map configMap = new HashMap(); - Map utilityMap = new HashMap(); logger = new LoggerWrapper("Localizer.ProducerAssistant"); @@ -67,7 +101,7 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction")); configMap.put("docRoot", MirGlobal.config().getString("RootUri")); configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir"); - configMap.put("now", new DateToMapAdapter( (new GregorianCalendar()).getTime())); + configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone"))); configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host")); configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host")); configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host")); @@ -78,24 +112,15 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL // "new": configMap.putAll(MirPropertiesConfiguration.instance().allSettings()); - utilityMap.put("compressWhitespace", new freemarker.template.utility.CompressWhitespace()); - utilityMap.put("encodeHTML", new GeneratorHTMLFunctions.encodeHTMLGeneratorFunction()); - utilityMap.put("encodeXML", new GeneratorHTMLFunctions.encodeXMLGeneratorFunction()); - utilityMap.put("encodeURI", new GeneratorHTMLFunctions.encodeURIGeneratorFunction()); - utilityMap.put("subString", new GeneratorStringFunctions.subStringFunction()); - utilityMap.put("subList", new GeneratorListFunctions.subListFunction()); - utilityMap.put("isOdd", new GeneratorIntegerFunctions.isOddFunction()); - utilityMap.put("increment", new GeneratorIntegerFunctions.incrementFunction()); - utilityMap.put("evaluate", new GeneratorExpressionFunctions.evaluateExpressionFunction()); - aValueSet.put("config", configMap); - aValueSet.put("utility", utilityMap); + + aValueSet.put("utility", new Utility()); aValueSet.put("languages", - new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language")); + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language")); aValueSet.put("topics", - new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic")); + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic")); Map articleTypeMap = new HashMap(); articleTypeMap.put("openposting", "0"); @@ -120,25 +145,300 @@ public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantL commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id")); } aValueSet.put("commentstatus", commentStatusMap); + aValueSet.put("languageCodeToId", new getLanguageIdFunction()); } catch (Throwable t) { logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage()); - throw new RuntimeException(t.getMessage()); + + throw new MirLocalizerFailure(t); + } + + } + + public static class getLanguageIdFunction implements Generator.Function { + private Map languageCodeToId; + private String otherLanguageId; + private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction"); + + public getLanguageIdFunction() throws MirLocalizerFailure { + try { + otherLanguageId = ""; + languageCodeToId = new HashMap(); + + Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"); + while (i.hasNext()) { + EntityAdapter language = (EntityAdapter) i.next(); + if (language.get("code").equals("ot")) { + otherLanguageId = (String) language.get("id"); + } + + languageCodeToId.put(language.get("code"), language.get("id")); + } + } + catch (Throwable t) { + logger.error(t.toString()); + + throw new MirLocalizerFailure(t); + } } - }; + public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure { + try { + if (aParameters.size() != 1) { + throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code"); + } + + String result = (String) languageCodeToId.get(aParameters.get(0)); + if (result == null) { + result = otherLanguageId; + } + + return result; + } + catch (GeneratorExc e) { + throw e; + } + catch (Throwable t) { + throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t); + } + } + } + public String filterNonHTMLText(String aText) { - return StringUtil.createHTML( - StringUtil.removeHTMLTags(aText), - MirGlobal.config().getString("Producer.ImageRoot"), - MirGlobal.config().getString("Producer.MailLinkName"), - MirGlobal.config().getString("Producer.ExtLinkName"), - MirGlobal.config().getString("Producer.IntLinkName") - ); + + logger.debug("about to filter non HTML Text of length " + aText.length()); + try { + String result = + stripper.createHTML( + stripper.removeHTMLTags(aText), + MirGlobal.config().getString("Producer.ImageRoot"), + MirGlobal.config().getString("Producer.MailLinkName"), + MirGlobal.config().getString("Producer.ExtLinkName"), + MirGlobal.config().getString("Producer.IntLinkName")); + logger.debug("done filtering non-HTML text "); + return result; + } + catch (Throwable t) { + logger.error("error while filtering non-HTML text: " + t.toString()); + + throw new RuntimeException(t.toString()); + } + } + + public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure { + + if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) { + return new Generator.Interceptor() { + + public Object intercept(Object anObject) { + if (anObject instanceof EntityAdapter) { + return new InterceptedEntityAdapter((EntityAdapter) anObject); + } + + return anObject; + } + }; + } + else { + return null; + } + } + + public class InterceptedEntityAdapter { + private EntityAdapter adapter; + + InterceptedEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + Object result = adapter.get(aField); + if (result instanceof String) { + return filterHTMLText((String) result); + } + else { + return result; + } + } + + public Object getRaw() { + return new RawEntityAdapter(adapter); + } + } + + public class RawEntityAdapter { + private EntityAdapter adapter; + + RawEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + return adapter.get(aField); + } } public String filterHTMLText(String aText) { - return StringUtil.deleteForbiddenTags(aText); + try { + StringWriter out = new StringWriter(); + Tidy tidy = new Tidy(); + ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8")); + tidy.setMakeClean(true); + tidy.setCharEncoding(Configuration.UTF8); + tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE)); + print(tidy.parseDOM(in, null), out); + + return out.toString(); + } + catch (IOException e) { + return e.getMessage(); + } + } + + + + private boolean isBadAttr(String attrName) { + List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";"); + Iterator i = badAttributes.iterator(); + while (i.hasNext()) { + if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) { + return true; + } + } + return false; + } + + private String stripWhitespace(String aString) { + try { + return regularExpressionWhitespace.substituteAll(aString, ""); + } + catch (Throwable t) { + return ""; + } + } + + private boolean checkAttr(String attrName) { + if (isBadAttr(attrName)) { + return false; + } + return true; + + } + + private boolean checkAttrValue(String attrValue) { + List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";"); + Iterator i = badPrefixes.iterator(); + while (i.hasNext()) { + if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) { + return false; + } + } + return true; + } + + + private boolean checkNode(String nodeName) { + List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";"); + + Iterator i = acceptableNodes.iterator(); + while (i.hasNext()) { + if (nodeName.equals(i.next())) { + return true; + } + } + return false; + } + + private void print(Node node, StringWriter out) throws IOException { + if (node == null) { + return; + } + int type = node.getNodeType(); + boolean canOutput = checkNode(node.getNodeName()); + + switch (type) { + + case Node.DOCUMENT_NODE: + + print(((Document) node).getDocumentElement(), out); + out.flush(); + break; + + case Node.ELEMENT_NODE: + if (canOutput) { + out.write('<'); + + out.write(node.getNodeName()); + NamedNodeMap attrs = node.getAttributes(); + + for (int i = 0; i < attrs.getLength(); i++) { + String attrName = attrs.item(i).getNodeName(); + String attrValue = attrs.item(i).getNodeValue(); + if (attrValue.startsWith("//")){ + attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/"); + } + + if (checkAttr(attrName) && checkAttrValue(attrValue)) { + out.write(' '); + out.write(attrs.item(i).getNodeName()); + out.write("=\""); + + out.write(attrs.item(i).getNodeValue()); + out.write('"'); + } + } + + if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) { + out.write("/"); + } + out.write('>'); + } + NodeList children = node.getChildNodes(); + if (children != null) { + int len = children.getLength(); + for (int i = 0; i < len; i++) { + print(children.item(i), out); + } + } + break; + + case Node.TEXT_NODE: + String value = node.getNodeValue(); + try { + value = regularExpressionLT.substituteAll(value, "<"); + value = regularExpressionGT.substituteAll(value, ">"); + } + catch (Throwable t) { + value = ""; + } + out.write(value); + + break; + + } + + if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) { + out.write("'); + } + + out.flush(); + } + + public static class Utility extends ReflectionGeneratorFunctionsAdapter { + public Utility() { + super(new MirBasicUtilityFunctions()); + } + + public Object getDatetime() { + return new GeneratorDateTimeFunctions.DateTimeFunctions( + MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone")); + } + + public Object getCompressWhitespace() { + return new freemarker.template.utility.CompressWhitespace(); + } } }