2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
30 package mircoders.localizer.basic;
33 import mir.config.MirPropertiesConfiguration;
34 import mir.entity.adapter.EntityAdapter;
35 import mir.entity.adapter.EntityIteratorAdapter;
36 import mir.generator.Generator;
37 import mir.generator.GeneratorExc;
38 import mir.generator.GeneratorFailure;
39 import mir.log.LoggerWrapper;
40 import mir.util.GeneratorDateTimeFunctions;
41 import mir.util.GeneratorFormatAdapters;
42 import mir.util.HTMLStripper;
43 import mir.util.StringRoutines;
44 import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
45 import mircoders.global.MirGlobal;
46 import mircoders.localizer.MirLocalizerExc;
47 import mircoders.localizer.MirLocalizerFailure;
48 import mircoders.localizer.MirProducerAssistantLocalizer;
49 import org.w3c.dom.Document;
50 import org.w3c.dom.NamedNodeMap;
51 import org.w3c.dom.Node;
52 import org.w3c.dom.NodeList;
53 import org.w3c.tidy.Configuration;
54 import org.w3c.tidy.Tidy;
56 import java.io.ByteArrayInputStream;
57 import java.io.IOException;
58 import java.io.StringWriter;
59 import java.util.GregorianCalendar;
60 import java.util.HashMap;
61 import java.util.Iterator;
62 import java.util.List;
65 public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
66 protected LoggerWrapper logger;
68 private HTMLStripper stripper;
69 private RE regularExpressionLT;
70 private RE regularExpressionGT;
71 private RE regularExpressionWhitespace;
74 public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
76 stripper = new HTMLStripper();
78 regularExpressionLT = new RE("<");
79 regularExpressionGT = new RE(">");
80 regularExpressionWhitespace = new RE("\\s+");
83 throw new MirLocalizerFailure(t);
87 public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
91 Map configMap = new HashMap();
93 logger = new LoggerWrapper("Localizer.ProducerAssistant");
96 configMap.put("producerDocRoot", MirGlobal.config().getString("Producer.DocRoot"));
97 configMap.put("storageRoot", MirGlobal.config().getString("Producer.StorageRoot"));
98 configMap.put("productionHost", MirGlobal.config().getString("Producer.ProductionHost"));
99 configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction"));
100 configMap.put("docRoot", MirGlobal.config().getString("RootUri"));
101 configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir");
102 configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone")));
103 configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host"));
104 configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host"));
105 configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host"));
106 configMap.put("imagePath", MirGlobal.config().getString("Producer.Image.Path"));
107 configMap.put("mirVersion", MirGlobal.config().getString("Mir.Version"));
108 configMap.put("defEncoding", MirGlobal.config().getString("Mir.DefaultEncoding"));
111 configMap.putAll(MirPropertiesConfiguration.instance().allSettings());
113 aValueSet.put("config", configMap);
115 aValueSet.put("utility", new Utility());
117 aValueSet.put("languages",
118 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
120 aValueSet.put("topics",
121 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
123 Map articleTypeMap = new HashMap();
124 articleTypeMap.put("openposting", "0");
125 articleTypeMap.put("newswire", "1");
126 articleTypeMap.put("feature", "2");
127 articleTypeMap.put("topicspecial", "3");
128 articleTypeMap.put("startspecial", "4");
130 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType");
131 while (i.hasNext()) {
132 EntityAdapter articleType = (EntityAdapter) i.next();
134 articleTypeMap.put(articleType.get("name"), articleType.get("id"));
136 aValueSet.put("articletype", articleTypeMap);
138 Map commentStatusMap = new HashMap();
139 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "commentStatus");
140 while (i.hasNext()) {
141 EntityAdapter commentStatus = (EntityAdapter) i.next();
143 commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id"));
145 aValueSet.put("commentstatus", commentStatusMap);
146 aValueSet.put("languageCodeToId", new getLanguageIdFunction());
148 catch (Throwable t) {
149 logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
151 throw new MirLocalizerFailure(t);
156 public static class getLanguageIdFunction implements Generator.Function {
157 private Map languageCodeToId;
158 private String otherLanguageId;
159 private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction");
161 public getLanguageIdFunction() throws MirLocalizerFailure {
163 otherLanguageId = "";
164 languageCodeToId = new HashMap();
166 Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
167 while (i.hasNext()) {
168 EntityAdapter language = (EntityAdapter) i.next();
169 if (language.get("code").equals("ot")) {
170 otherLanguageId = (String) language.get("id");
173 languageCodeToId.put(language.get("code"), language.get("id"));
176 catch (Throwable t) {
177 logger.error(t.toString());
179 throw new MirLocalizerFailure(t);
183 public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
185 if (aParameters.size() != 1) {
186 throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
189 String result = (String) languageCodeToId.get(aParameters.get(0));
190 if (result == null) {
191 result = otherLanguageId;
196 catch (GeneratorExc e) {
199 catch (Throwable t) {
200 throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
206 public String filterNonHTMLText(String aText) {
208 logger.debug("about to filter non HTML Text of length " + aText.length());
212 stripper.removeHTMLTags(aText),
213 MirGlobal.config().getString("Producer.ImageRoot"),
214 MirGlobal.config().getString("Producer.MailLinkName"),
215 MirGlobal.config().getString("Producer.ExtLinkName"),
216 MirGlobal.config().getString("Producer.IntLinkName"));
217 logger.debug("done filtering non-HTML text ");
220 catch (Throwable t) {
221 logger.error("error while filtering non-HTML text: " + t.toString());
223 throw new RuntimeException(t.toString());
227 public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
229 if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
230 return new Generator.Interceptor() {
232 public Object intercept(Object anObject) {
233 if (anObject instanceof EntityAdapter) {
234 return new InterceptedEntityAdapter((EntityAdapter) anObject);
246 public class InterceptedEntityAdapter {
247 private EntityAdapter adapter;
249 InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
250 adapter = anEntityAdapter;
253 public Object get(String aField) {
254 Object result = adapter.get(aField);
255 if (result instanceof String) {
256 return filterHTMLText((String) result);
263 public Object getRaw() {
264 return new RawEntityAdapter(adapter);
268 public class RawEntityAdapter {
269 private EntityAdapter adapter;
271 RawEntityAdapter(EntityAdapter anEntityAdapter) {
272 adapter = anEntityAdapter;
275 public Object get(String aField) {
276 return adapter.get(aField);
280 public String filterHTMLText(String aText) {
282 StringWriter out = new StringWriter();
283 Tidy tidy = new Tidy();
284 ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
285 tidy.setMakeClean(true);
286 tidy.setCharEncoding(Configuration.UTF8);
287 tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
288 print(tidy.parseDOM(in, null), out);
290 return out.toString();
292 catch (IOException e) {
293 return e.getMessage();
298 private String[] badAttributeValuePrefixes = {
299 "javascript", "vbscript", "about", "wysiwyg", "data", "view-source",
300 "ms-its", "mhtml", "shell", "lynxexec", "lynxcgi", "hcp", "ms-help",
301 "help", "disk", "vnd.ms.radio", "opera", "res", "resource", "chrome",
302 "mocha", "livescript"};
305 private String[] badAttributes = {
306 "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror",
307 "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown",
308 "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset",
309 "onselect", "onsubmit", "onunload", "onload", "onclick", "onfocus",
310 "onblur", "FSCommand", "onAbort", "onActivate", "onAfterPrint",
311 "onAfterUpdate", "onBeforeActivate", "onBeforeCopy", "onBeforeCut",
312 "onBeforeDeactivate", "onBeforeEditFocus", "onBeforePaste",
313 "onBeforePrint", "onBeforeUnload", "onBegin", "onBlur", "onBounce",
314 "onCellChange", "onChange", "onClick", "onContextMenu", "onControlSelect",
315 "onCopy", "onCut", "onDataAvailible", "onDataSetChanged", "onDataSetComplete",
316 "onDblClick", "onDeactivate", "onDrag", "onDragEnd", "onDragLeave", "onDragEnter",
317 "onDragOver", "onDragDrop", "onDrop", "onEnd", "onError", "onErrorUpdate", "onExit",
318 "onFilterChange", "onFinish", "onFocus", "onFocusIn", "onFocusOut", "onHelp",
319 "onKeyDown", "onKeyPress", "onKeyUp", "onLayoutComplete", "onLoad", "onLoseCapture",
320 "onMediaComplete", "onMediaError", "onMouseDown", "onMouseEnter", "onMouseLeave",
321 "onMouseMove", "onMouseOut", "onMouseOver", "onMouseUp", "onMouseWheel", "onMove",
322 "onMoveEnd", "onMoveStart", "onOutOfSync", "onPaste", "onPause", "onProgress",
323 "onPropertyChange", "onReadyStateChange", "onRepeat", "onReset", "onResize",
324 "onResizeEnd", "onResizeStart", "onResume", "onReverse", "onRowEnter", "onRowExit",
325 "onRowDelete", "onRowInserted", "onScroll", "onSeek", "onSelect", "onSelectionChange",
326 "onSelectStart", "onStart", "onStop", "onSynchRestored", "onSubmit", "onTimeError",
327 "onTrackChange", "onUnload", "onURLFlip", "seekSegmentTime", "style", "height", "width"};
329 private boolean isBadAttr(String attrName) {
330 for (int i = 0; i < badAttributes.length; i++) {
331 if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase())) {
338 private String stripWhitespace(String aString) {
340 return regularExpressionWhitespace.substituteAll(aString, "");
342 catch (Throwable t) {
347 private boolean checkAttr(String attrName) {
348 if (isBadAttr(attrName)) {
355 private boolean checkAttrValue(String attrValue) {
356 for (int i = 0; i < badAttributeValuePrefixes.length; i++) {
357 if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase() + ":")) {
365 private boolean checkNode(String nodeName) {
366 List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
368 Iterator i = languages.iterator();
369 while (i.hasNext()) {
370 if (nodeName.equals(i.next())) {
377 private void print(Node node, StringWriter out) throws IOException {
381 int type = node.getNodeType();
382 boolean canOutput = checkNode(node.getNodeName());
386 case Node.DOCUMENT_NODE:
388 print(((Document) node).getDocumentElement(), out);
392 case Node.ELEMENT_NODE:
396 out.write(node.getNodeName());
397 NamedNodeMap attrs = node.getAttributes();
399 for (int i = 0; i < attrs.getLength(); i++) {
400 String attrName = attrs.item(i).getNodeName();
401 String attrValue = attrs.item(i).getNodeValue();
402 if (checkAttr(attrName) && checkAttrValue(attrValue)) {
404 out.write(attrs.item(i).getNodeName());
407 out.write(attrs.item(i).getNodeValue());
412 if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
417 NodeList children = node.getChildNodes();
418 if (children != null) {
419 int len = children.getLength();
420 for (int i = 0; i < len; i++) {
421 print(children.item(i), out);
427 String value = node.getNodeValue();
429 value = regularExpressionLT.substituteAll(value, "<");
430 value = regularExpressionGT.substituteAll(value, ">");
432 catch (Throwable t) {
441 if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) {
443 out.write(node.getNodeName());
450 public static class Utility extends ReflectionGeneratorFunctionsAdapter {
452 super(new MirBasicUtilityFunctions());
455 public Object getDatetime() {
456 return new GeneratorDateTimeFunctions.DateTimeFunctions(
457 MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
460 public Object getCompressWhitespace() {
461 return new freemarker.template.utility.CompressWhitespace();