X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Flocalizer%2Fbasic%2FMirBasicProducerAssistantLocalizer.java;h=e408c09e3bd161d2c95630679d431d840579e9b2;hb=4368e83f65cd067f2ac70668e35cd1ab4432c4d6;hp=3f4637316a8e9222f3776d4bcac9d6f81314707d;hpb=ed48afa4cb10e822343e79d7c933e97447f92941;p=mir.git diff --git a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java index 3f463731..e408c09e 100755 --- a/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java +++ b/source/mircoders/localizer/basic/MirBasicProducerAssistantLocalizer.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 The Mir-coders group + * Copyright (C) 2001, 2002 The Mir-coders group * * This file is part of Mir. * @@ -18,115 +18,447 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * In addition, as a special exception, The Mir-coders gives permission to link - * the code of this program with the com.oreilly.servlet library, any library - * licensed under the Apache Software License, The Sun (tm) Java Advanced - * Imaging library (JAI), The Sun JIMI library (or with modified versions of - * the above that use the same license as the above), and distribute linked - * combinations including the two. You must obey the GNU General Public - * License in all respects for all of the code used other than the above - * mentioned libraries. If you modify this file, you may extend this exception - * to your version of the file, but you are not obligated to do so. If you do - * not wish to do so, delete this exception statement from your version. + * the code of this program with any library licensed under the Apache Software License, + * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library + * (or with modified versions of the above that use the same license as the above), + * and distribute linked combinations including the two. You must obey the + * GNU General Public License in all respects for all of the code used other than + * the above mentioned libraries. If you modify this file, you may extend this + * exception to your version of the file, but you are not obligated to do so. + * If you do not wish to do so, delete this exception statement from your version. */ - package mircoders.localizer.basic; -import java.util.*; -import java.io.*; -import freemarker.template.utility.*; -import mir.misc.*; -import mir.entity.*; -import mir.entity.adapter.*; -import mir.util.*; -import mircoders.module.*; -import mircoders.storage.*; -import mircoders.localizer.*; -import mircoders.global.*; +import gnu.regexp.RE; +import mir.config.MirPropertiesConfiguration; +import mir.entity.adapter.EntityAdapter; +import mir.entity.adapter.EntityIteratorAdapter; +import mir.generator.Generator; +import mir.generator.GeneratorExc; +import mir.generator.GeneratorFailure; +import mir.log.LoggerWrapper; +import mir.util.GeneratorDateTimeFunctions; +import mir.util.GeneratorFormatAdapters; +import mir.util.HTMLStripper; +import mir.util.StringRoutines; +import mir.util.generator.ReflectionGeneratorFunctionsAdapter; +import mircoders.global.MirGlobal; +import mircoders.localizer.MirLocalizerExc; +import mircoders.localizer.MirLocalizerFailure; +import mircoders.localizer.MirProducerAssistantLocalizer; +import org.w3c.dom.Document; +import org.w3c.dom.NamedNodeMap; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.tidy.Configuration; +import org.w3c.tidy.Tidy; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.StringWriter; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer { - protected static Logfile logger = Logfile.getInstance( MirGlobal.getConfigProperty("Home") + "/" + MirGlobal.getConfigProperty("Mir.Localizer.Logfile")); + protected LoggerWrapper logger; + + private HTMLStripper stripper; + private RE regularExpressionLT; + private RE regularExpressionGT; + private RE regularExpressionWhitespace; + + + public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure { + try { + stripper = new HTMLStripper(); + + regularExpressionLT = new RE("<"); + regularExpressionGT = new RE(">"); + regularExpressionWhitespace = new RE("\\s+"); + } + catch (Throwable t) { + throw new MirLocalizerFailure(t); + } + } + + public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure { + try { + Iterator i; - public void initializeGenerationValueSet(Map aValueSet) { - Iterator i; + Map configMap = new HashMap(); - Map configMap = new HashMap(); - Map utilityMap = new HashMap(); + logger = new LoggerWrapper("Localizer.ProducerAssistant"); // obsolete: - configMap.put("producerDocRoot", MirGlobal.getConfigProperty("Producer.DocRoot")); - configMap.put("storageRoot", MirGlobal.getConfigProperty("Producer.StorageRoot")); - configMap.put("productionHost", MirGlobal.getConfigProperty("Producer.ProductionHost")); - configMap.put("openAction", MirGlobal.getConfigProperty("Producer.OpenAction")); - configMap.put("docRoot", MirGlobal.getConfigProperty("RootUri")); - configMap.put("actionRoot", MirGlobal.getConfigProperty("RootUri")+"/servlet/Mir"); - configMap.put("now", new DateToMapAdapter((new GregorianCalendar()).getTime())); - configMap.put("videoHost", MirGlobal.getConfigProperty("Producer.Video.Host")); - configMap.put("audioHost", MirGlobal.getConfigProperty("Producer.Audio.Host")); - configMap.put("imageHost", MirGlobal.getConfigProperty("Producer.Image.Host")); - configMap.put("imagePath", MirGlobal.getConfigProperty("Producer.Image.Path")); - configMap.put("mirVersion", MirGlobal.getConfigProperty("Mir.Version")); - configMap.put("defEncoding", MirGlobal.getConfigProperty("Mir.DefaultEncoding")); + configMap.put("producerDocRoot", MirGlobal.config().getString("Producer.DocRoot")); + configMap.put("storageRoot", MirGlobal.config().getString("Producer.StorageRoot")); + configMap.put("productionHost", MirGlobal.config().getString("Producer.ProductionHost")); + configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction")); + configMap.put("docRoot", MirGlobal.config().getString("RootUri")); + configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir"); + configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone"))); + configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host")); + configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host")); + configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host")); + configMap.put("imagePath", MirGlobal.config().getString("Producer.Image.Path")); + configMap.put("mirVersion", MirGlobal.config().getString("Mir.Version")); + configMap.put("defEncoding", MirGlobal.config().getString("Mir.DefaultEncoding")); // "new": - configMap.putAll( MirConfig.allSettings() ); + configMap.putAll(MirPropertiesConfiguration.instance().allSettings()); - utilityMap.put("compressWhitespace", new freemarker.template.utility.CompressWhitespace() ); - utilityMap.put("encodeHTML", new GeneratorHTMLFunctions.encodeHTMLGeneratorFunction()); - utilityMap.put("encodeURI", new GeneratorHTMLFunctions.encodeURIGeneratorFunction()); + aValueSet.put("config", configMap); - aValueSet.put("config", configMap); - aValueSet.put("utility", utilityMap); + aValueSet.put("utility", new Utility()); - EntityList topicList=null; - EntityList entityList=null; - EntityList parentList=null; - EntityList languageList=null; + aValueSet.put("languages", + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language")); - try { - ModuleTopics topicsModule = new ModuleTopics(DatabaseTopics.getInstance()); - ModuleLanguage languageModule = new ModuleLanguage(DatabaseLanguage.getInstance()); + aValueSet.put("topics", + new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic")); - topicList = topicsModule.getTopicsList(); - languageList = languageModule.getByWhereClause("", "id", -1); + Map articleTypeMap = new HashMap(); + articleTypeMap.put("openposting", "0"); + articleTypeMap.put("newswire", "1"); + articleTypeMap.put("feature", "2"); + articleTypeMap.put("topicspecial", "3"); + articleTypeMap.put("startspecial", "4"); + + i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType"); + while (i.hasNext()) { + EntityAdapter articleType = (EntityAdapter) i.next(); + + articleTypeMap.put(articleType.get("name"), articleType.get("id")); + } + aValueSet.put("articletype", articleTypeMap); + + Map commentStatusMap = new HashMap(); + i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "commentStatus"); + while (i.hasNext()) { + EntityAdapter commentStatus = (EntityAdapter) i.next(); + + commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id")); + } + aValueSet.put("commentstatus", commentStatusMap); + aValueSet.put("languageCodeToId", new getLanguageIdFunction()); } catch (Throwable t) { - logger.printError("initializeGenerationValueSet: Exception "+t.getMessage()); + logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage()); + + throw new MirLocalizerFailure(t); + } + + } + + public static class getLanguageIdFunction implements Generator.Function { + private Map languageCodeToId; + private String otherLanguageId; + private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction"); + + public getLanguageIdFunction() throws MirLocalizerFailure { + try { + otherLanguageId = ""; + languageCodeToId = new HashMap(); + + Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"); + while (i.hasNext()) { + EntityAdapter language = (EntityAdapter) i.next(); + if (language.get("code").equals("ot")) { + otherLanguageId = (String) language.get("id"); + } + + languageCodeToId.put(language.get("code"), language.get("id")); + } + } + catch (Throwable t) { + logger.error(t.toString()); + + throw new MirLocalizerFailure(t); + } + } + + public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure { + try { + if (aParameters.size() != 1) { + throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code"); + } + + String result = (String) languageCodeToId.get(aParameters.get(0)); + if (result == null) { + result = otherLanguageId; + } + + return result; + } + catch (GeneratorExc e) { + throw e; + } + catch (Throwable t) { + throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t); + } } + } - aValueSet.put("topics", topicList); - aValueSet.put("imclist", entityList); - aValueSet.put("parentlist", parentList); - Map articleTypeMap = new HashMap(); - articleTypeMap.put("openposting", "0"); - articleTypeMap.put("newswire", "1"); - articleTypeMap.put("feature", "2"); - articleTypeMap.put("topicspecial", "3"); - articleTypeMap.put("startspecial", "4"); + public String filterNonHTMLText(String aText) { + logger.debug("about to filter non HTML Text of length " + aText.length()); try { - i = new EntityIteratorAdapter( "", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType" ); + String result = + stripper.createHTML( + stripper.removeHTMLTags(aText), + MirGlobal.config().getString("Producer.ImageRoot"), + MirGlobal.config().getString("Producer.MailLinkName"), + MirGlobal.config().getString("Producer.ExtLinkName"), + MirGlobal.config().getString("Producer.IntLinkName")); + logger.debug("done filtering non-HTML text "); + return result; + } + catch (Throwable t) { + logger.error("error while filtering non-HTML text: " + t.toString()); - while (i.hasNext()) { - EntityAdapter articleType = (EntityAdapter) i.next(); + throw new RuntimeException(t.toString()); + } + } - articleTypeMap.put(articleType.get("name"), articleType.get("id")); + public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure { + + if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) { + return new Generator.Interceptor() { + + public Object intercept(Object anObject) { + if (anObject instanceof EntityAdapter) { + return new InterceptedEntityAdapter((EntityAdapter) anObject); + } + + return anObject; + } + }; + } + else { + return null; + } + } + + public class InterceptedEntityAdapter { + private EntityAdapter adapter; + + InterceptedEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + Object result = adapter.get(aField); + if (result instanceof String) { + return filterHTMLText((String) result); + } + else { + return result; + } + } + + public Object getRaw() { + return new RawEntityAdapter(adapter); + } + } + + public class RawEntityAdapter { + private EntityAdapter adapter; + + RawEntityAdapter(EntityAdapter anEntityAdapter) { + adapter = anEntityAdapter; + } + + public Object get(String aField) { + return adapter.get(aField); + } + } + + public String filterHTMLText(String aText) { + try { + StringWriter out = new StringWriter(); + Tidy tidy = new Tidy(); + ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8")); + tidy.setMakeClean(true); + tidy.setCharEncoding(Configuration.UTF8); + tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE)); + print(tidy.parseDOM(in, null), out); + + return out.toString(); + } + catch (IOException e) { + return e.getMessage(); + } + } + + + private String[] badAttributeValuePrefixes = { + "javascript", "vbscript", "about", "wysiwyg", "data", "view-source", + "ms-its", "mhtml", "shell", "lynxexec", "lynxcgi", "hcp", "ms-help", + "help", "disk", "vnd.ms.radio", "opera", "res", "resource", "chrome", + "mocha", "livescript"}; + + + private String[] badAttributes = { + "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror", + "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown", + "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", + "onselect", "onsubmit", "onunload", "onload", "onclick", "onfocus", + "onblur", "FSCommand", "onAbort", "onActivate", "onAfterPrint", + "onAfterUpdate", "onBeforeActivate", "onBeforeCopy", "onBeforeCut", + "onBeforeDeactivate", "onBeforeEditFocus", "onBeforePaste", + "onBeforePrint", "onBeforeUnload", "onBegin", "onBlur", "onBounce", + "onCellChange", "onChange", "onClick", "onContextMenu", "onControlSelect", + "onCopy", "onCut", "onDataAvailible", "onDataSetChanged", "onDataSetComplete", + "onDblClick", "onDeactivate", "onDrag", "onDragEnd", "onDragLeave", "onDragEnter", + "onDragOver", "onDragDrop", "onDrop", "onEnd", "onError", "onErrorUpdate", "onExit", + "onFilterChange", "onFinish", "onFocus", "onFocusIn", "onFocusOut", "onHelp", + "onKeyDown", "onKeyPress", "onKeyUp", "onLayoutComplete", "onLoad", "onLoseCapture", + "onMediaComplete", "onMediaError", "onMouseDown", "onMouseEnter", "onMouseLeave", + "onMouseMove", "onMouseOut", "onMouseOver", "onMouseUp", "onMouseWheel", "onMove", + "onMoveEnd", "onMoveStart", "onOutOfSync", "onPaste", "onPause", "onProgress", + "onPropertyChange", "onReadyStateChange", "onRepeat", "onReset", "onResize", + "onResizeEnd", "onResizeStart", "onResume", "onReverse", "onRowEnter", "onRowExit", + "onRowDelete", "onRowInserted", "onScroll", "onSeek", "onSelect", "onSelectionChange", + "onSelectStart", "onStart", "onStop", "onSynchRestored", "onSubmit", "onTimeError", + "onTrackChange", "onUnload", "onURLFlip", "seekSegmentTime", "style", "height", "width"}; + + private boolean isBadAttr(String attrName) { + for (int i = 0; i < badAttributes.length; i++) { + if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase())) { + return true; } } + return false; + } + + private String stripWhitespace(String aString) { + try { + return regularExpressionWhitespace.substituteAll(aString, ""); + } catch (Throwable t) { - logger.printError("initializeGenerationValueSet: Exception "+t.getMessage()); + return ""; + } + } + + private boolean checkAttr(String attrName) { + if (isBadAttr(attrName)) { + return false; + } + return true; + + } + + private boolean checkAttrValue(String attrValue) { + for (int i = 0; i < badAttributeValuePrefixes.length; i++) { + if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase() + ":")) { + return false; + } + } + return true; + } + + + private boolean checkNode(String nodeName) { + List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";"); + + Iterator i = languages.iterator(); + while (i.hasNext()) { + if (nodeName.equals(i.next())) { + return true; + } } + return false; + } + + private void print(Node node, StringWriter out) throws IOException { + if (node == null) { + return; + } + int type = node.getNodeType(); + boolean canOutput = checkNode(node.getNodeName()); + + switch (type) { + + case Node.DOCUMENT_NODE: + + print(((Document) node).getDocumentElement(), out); + out.flush(); + break; + + case Node.ELEMENT_NODE: + if (canOutput) { + out.write('<'); + + out.write(node.getNodeName()); + NamedNodeMap attrs = node.getAttributes(); - aValueSet.put("articletype", articleTypeMap); - }; + for (int i = 0; i < attrs.getLength(); i++) { + String attrName = attrs.item(i).getNodeName(); + String attrValue = attrs.item(i).getNodeValue(); + if (checkAttr(attrName) && checkAttrValue(attrValue)) { + out.write(' '); + out.write(attrs.item(i).getNodeName()); + out.write("=\""); - public String filterText(String aText) { - return StringUtil.createHTML( - StringUtil.deleteForbiddenTags(aText), - MirGlobal.getConfigProperty("Producer.ImageRoot"), - MirGlobal.getConfigProperty("Producer.MailLinkName"), - MirGlobal.getConfigProperty("Producer.ExtLinkName"), - MirGlobal.getConfigProperty("Producer.IntLinkName") - ); + out.write(attrs.item(i).getNodeValue()); + out.write('"'); + } + } + + if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) { + out.write("/"); + } + out.write('>'); + } + NodeList children = node.getChildNodes(); + if (children != null) { + int len = children.getLength(); + for (int i = 0; i < len; i++) { + print(children.item(i), out); + } + } + break; + + case Node.TEXT_NODE: + String value = node.getNodeValue(); + try { + value = regularExpressionLT.substituteAll(value, "<"); + value = regularExpressionGT.substituteAll(value, ">"); + } + catch (Throwable t) { + value = ""; + } + out.write(value); + + break; + + } + + if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) { + out.write("'); + } + + out.flush(); + } + + public static class Utility extends ReflectionGeneratorFunctionsAdapter { + public Utility() { + super(new MirBasicUtilityFunctions()); + } + + public Object getDatetime() { + return new GeneratorDateTimeFunctions.DateTimeFunctions( + MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone")); + } + + public Object getCompressWhitespace() { + return new freemarker.template.utility.CompressWhitespace(); + } } }