2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
30 package mircoders.localizer.basic;
32 import java.io.ByteArrayInputStream;
33 import java.io.IOException;
34 import java.io.StringWriter;
35 import java.util.GregorianCalendar;
36 import java.util.HashMap;
37 import java.util.Iterator;
38 import java.util.List;
43 import mir.config.MirPropertiesConfiguration;
44 import mir.entity.adapter.EntityAdapter;
45 import mir.entity.adapter.EntityIteratorAdapter;
46 import mir.generator.Generator;
47 import mir.generator.GeneratorExc;
48 import mir.generator.GeneratorFailure;
49 import mir.log.LoggerWrapper;
50 import mir.misc.StringUtil;
51 import mir.util.GeneratorDateTimeFunctions;
52 import mir.util.GeneratorFormatAdapters;
53 import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
54 import mir.util.StringRoutines;
55 import mircoders.global.MirGlobal;
56 import mircoders.localizer.MirLocalizerExc;
57 import mircoders.localizer.MirLocalizerFailure;
58 import mircoders.localizer.MirProducerAssistantLocalizer;
60 import org.w3c.dom.Document;
61 import org.w3c.dom.NamedNodeMap;
62 import org.w3c.dom.Node;
63 import org.w3c.dom.NodeList;
64 import org.w3c.tidy.Configuration;
65 import org.w3c.tidy.Tidy;
67 public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
68 protected LoggerWrapper logger;
70 private RE regularExpressionLT;
71 private RE regularExpressionGT;
72 private RE regularExpressionWhitespace;
74 public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
76 regularExpressionLT = new RE("<");
77 regularExpressionGT = new RE(">");
78 regularExpressionWhitespace = new RE("\\s+");
81 throw new MirLocalizerFailure(t);
85 public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
89 Map configMap = new HashMap();
91 logger = new LoggerWrapper("Localizer.ProducerAssistant");
94 configMap.put("producerDocRoot", MirGlobal.config().getString("Producer.DocRoot"));
95 configMap.put("storageRoot", MirGlobal.config().getString("Producer.StorageRoot"));
96 configMap.put("productionHost", MirGlobal.config().getString("Producer.ProductionHost"));
97 configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction"));
98 configMap.put("docRoot", MirGlobal.config().getString("RootUri"));
99 configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir");
100 configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone")));
101 configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host"));
102 configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host"));
103 configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host"));
104 configMap.put("imagePath", MirGlobal.config().getString("Producer.Image.Path"));
105 configMap.put("mirVersion", MirGlobal.config().getString("Mir.Version"));
106 configMap.put("defEncoding", MirGlobal.config().getString("Mir.DefaultEncoding"));
109 configMap.putAll(MirPropertiesConfiguration.instance().allSettings());
111 aValueSet.put("config", configMap);
113 aValueSet.put("utility", new Utility());
115 aValueSet.put("languages",
116 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
118 aValueSet.put("topics",
119 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
121 Map articleTypeMap = new HashMap();
122 articleTypeMap.put("openposting", "0");
123 articleTypeMap.put("newswire", "1");
124 articleTypeMap.put("feature", "2");
125 articleTypeMap.put("topicspecial", "3");
126 articleTypeMap.put("startspecial", "4");
128 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType");
129 while (i.hasNext()) {
130 EntityAdapter articleType = (EntityAdapter) i.next();
132 articleTypeMap.put(articleType.get("name"), articleType.get("id"));
134 aValueSet.put("articletype", articleTypeMap);
136 Map commentStatusMap = new HashMap();
137 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "commentStatus");
138 while (i.hasNext()) {
139 EntityAdapter commentStatus = (EntityAdapter) i.next();
141 commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id"));
143 aValueSet.put("commentstatus", commentStatusMap);
144 aValueSet.put("languageCodeToId", new getLanguageIdFunction());
146 catch (Throwable t) {
147 logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
149 throw new MirLocalizerFailure(t);
154 public static class getLanguageIdFunction implements Generator.Function {
155 private Map languageCodeToId;
156 private String otherLanguageId;
157 private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction");
159 public getLanguageIdFunction() throws MirLocalizerFailure {
161 otherLanguageId = "";
162 languageCodeToId = new HashMap();
164 Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
165 while (i.hasNext()) {
166 EntityAdapter language = (EntityAdapter) i.next();
167 if (language.get("code").equals("ot"))
168 otherLanguageId = (String) language.get("id");
170 languageCodeToId.put(language.get("code"), language.get("id"));
173 catch (Throwable t) {
174 logger.error(t.toString());
176 throw new MirLocalizerFailure(t);
180 public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
182 if (aParameters.size() != 1)
183 throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
185 String result = (String) languageCodeToId.get(aParameters.get(0));
187 result = otherLanguageId;
191 catch (GeneratorExc e) {
194 catch (Throwable t) {
195 throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
201 public String filterNonHTMLText(String aText) {
203 logger.debug("about to filter non HTML Text of length " + aText.length());
206 StringUtil.createHTML(
207 StringUtil.removeHTMLTags(aText),
208 MirGlobal.config().getString("Producer.ImageRoot"),
209 MirGlobal.config().getString("Producer.MailLinkName"),
210 MirGlobal.config().getString("Producer.ExtLinkName"),
211 MirGlobal.config().getString("Producer.IntLinkName")
213 logger.debug("done filtering non-HTML text ");
216 catch (Throwable t) {
217 logger.error("error while filtering non-HTML text: " + t.toString());
219 throw new RuntimeException(t.toString());
222 public String filterHTMLText(String aText) {
224 StringWriter out = new StringWriter();
225 Tidy tidy = new Tidy();
226 ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
227 tidy.setMakeClean(true);
228 tidy.setCharEncoding(Configuration.UTF8);
229 tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
230 print(tidy.parseDOM(in, null), out);
232 return out.toString();
234 catch (IOException e) {
235 return e.getMessage();
240 private String[] badAttributeValuePrefixes= {"javascript","vbscript","about","wysiwyg","data","view-source","ms-its","mhtml","shell","lynxexec","lynxcgi","hcp","ms-help","help","disk","vnd.ms.radio","opera","res","resource","chrome","mocha","livescript"};
243 private String[] badAttributes = {"onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror", "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", "onselect", "onsubmit", "onunload","onload","onclick","onfocus","onblur","style","height","width"};
245 private boolean isBadAttr(String attrName){
246 for (int i=0;i<badAttributes.length;i++){
247 if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase()))
253 private String stripWhitespace(String aString){
255 return regularExpressionWhitespace.substituteAll(aString, "");
262 private boolean checkAttr(String attrName) {
263 if (isBadAttr(attrName)){
270 private boolean checkAttrValue(String attrValue) {
271 for (int i=0;i<badAttributeValuePrefixes.length;i++){
272 if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase()+":")){
280 private boolean checkNode(String nodeName) {
281 List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
283 Iterator i = languages.iterator();
284 while (i.hasNext()) {
285 if (nodeName.equals(i.next()))
291 private void print(Node node, StringWriter out) throws IOException {
295 int type = node.getNodeType();
296 boolean canOutput = checkNode(node.getNodeName());
300 case Node.DOCUMENT_NODE:
302 print(((Document) node).getDocumentElement(), out);
306 case Node.ELEMENT_NODE:
310 out.write(node.getNodeName());
311 NamedNodeMap attrs = node.getAttributes();
313 for (int i = 0; i < attrs.getLength(); i++) {
314 String attrName = attrs.item(i).getNodeName();
315 String attrValue = attrs.item(i).getNodeValue();
316 if (checkAttr(attrName) && checkAttrValue(attrValue)) {
318 out.write(attrs.item(i).getNodeName());
321 out.write(attrs.item(i).getNodeValue());
326 if (node.getChildNodes()==null || node.getChildNodes().getLength()==0) {
331 NodeList children = node.getChildNodes();
332 if (children != null) {
333 int len = children.getLength();
334 for (int i = 0; i < len; i++) {
335 print(children.item(i), out);
341 String value=node.getNodeValue();
343 value=regularExpressionLT.substituteAll(value, "<");
344 value=regularExpressionGT.substituteAll(value, ">");
355 if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes()!=null && node.getChildNodes().getLength()>0) {
357 out.write(node.getNodeName());
364 public static class Utility extends ReflectionGeneratorFunctionsAdapter {
366 super(new MirBasicUtilityFunctions());
369 public Object getDatetime() {
370 return new GeneratorDateTimeFunctions.DateTimeFunctions(
371 MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
374 public Object getCompressWhitespace() {
375 return new freemarker.template.utility.CompressWhitespace();