2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
30 package mircoders.localizer.basic;
32 import mir.config.MirPropertiesConfiguration;
33 import mir.entity.adapter.EntityAdapter;
34 import mir.entity.adapter.EntityIteratorAdapter;
35 import mir.generator.Generator;
36 import mir.generator.GeneratorExc;
37 import mir.generator.GeneratorFailure;
38 import mir.log.LoggerWrapper;
40 import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
41 import mircoders.global.MirGlobal;
42 import mircoders.localizer.MirLocalizerExc;
43 import mircoders.localizer.MirLocalizerFailure;
44 import mircoders.localizer.MirProducerAssistantLocalizer;
45 import org.w3c.dom.Document;
46 import org.w3c.dom.NamedNodeMap;
47 import org.w3c.dom.Node;
48 import org.w3c.dom.NodeList;
49 import org.w3c.tidy.Configuration;
50 import org.w3c.tidy.Tidy;
51 import org.apache.oro.text.regex.*;
53 import java.io.ByteArrayInputStream;
54 import java.io.IOException;
55 import java.io.StringWriter;
58 public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
59 protected LoggerWrapper logger;
61 private HTMLStripper stripper;
62 private Pattern regularExpressionWhitespace;
63 private Pattern regularExpressionLeadingSlashes;
64 private Set disallowedAttributes = new HashSet();
65 private Set disallowedPrefixes = new HashSet();
66 private Set allowedNodes = new HashSet();
67 private Set externalPrefixes = new HashSet();
68 private Set allowedExternalPrefixes = new HashSet();
73 public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
75 stripper = new HTMLStripper();
76 Perl5Compiler compiler = new Perl5Compiler();
78 regularExpressionWhitespace = compiler.compile("\\s+|
|
", Perl5Compiler.READ_ONLY_MASK);
79 regularExpressionLeadingSlashes = compiler.compile("^//+", Perl5Compiler.READ_ONLY_MASK);
81 Iterator i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";").iterator();
83 disallowedAttributes.add(((String) i.next()).toLowerCase());
86 i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";").iterator();
88 disallowedPrefixes.add(((String) i.next()).toLowerCase());
91 i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";").iterator();
93 allowedNodes.add(((String) i.next()).toLowerCase());
96 i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.ExternalLocationAttributeValuePrefixes"), ";").iterator();
98 externalPrefixes.add(((String) i.next()).toLowerCase());
101 i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.WhitelistedExternalLocationAttributeValuePrefixes"), ";").iterator();
102 while (i.hasNext()) {
103 allowedExternalPrefixes.add(((String) i.next()).toLowerCase());
106 catch (Throwable t) {
107 throw new MirLocalizerFailure(t);
111 public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
115 Map configMap = new HashMap();
117 logger = new LoggerWrapper("Localizer.ProducerAssistant");
120 configMap.put("producerDocRoot", MirGlobal.config().getString("Producer.DocRoot"));
121 configMap.put("storageRoot", MirGlobal.config().getString("Producer.StorageRoot"));
122 configMap.put("productionHost", MirGlobal.config().getString("Producer.ProductionHost"));
123 configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction"));
124 configMap.put("docRoot", MirGlobal.config().getString("RootUri"));
125 configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir");
126 configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone")));
127 configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host"));
128 configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host"));
129 configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host"));
130 configMap.put("imagePath", MirGlobal.config().getString("Producer.Image.Path"));
131 configMap.put("mirVersion", MirGlobal.config().getString("Mir.Version"));
132 configMap.put("defEncoding", MirGlobal.config().getString("Mir.DefaultEncoding"));
135 configMap.putAll(MirPropertiesConfiguration.instance().allSettings());
137 aValueSet.put("config", configMap);
139 aValueSet.put("utility", new Utility());
141 aValueSet.put("languages",
142 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
144 aValueSet.put("topics",
145 new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
147 Map articleTypeMap = new HashMap();
148 articleTypeMap.put("openposting", "0");
149 articleTypeMap.put("newswire", "1");
150 articleTypeMap.put("feature", "2");
151 articleTypeMap.put("topicspecial", "3");
152 articleTypeMap.put("startspecial", "4");
154 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType");
155 while (i.hasNext()) {
156 EntityAdapter articleType = (EntityAdapter) i.next();
158 articleTypeMap.put(articleType.get("name"), articleType.get("id"));
160 aValueSet.put("articletype", articleTypeMap);
162 Map commentStatusMap = new HashMap();
163 i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "commentStatus");
164 while (i.hasNext()) {
165 EntityAdapter commentStatus = (EntityAdapter) i.next();
167 commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id"));
169 aValueSet.put("commentstatus", commentStatusMap);
170 aValueSet.put("languageCodeToId", new getLanguageIdFunction());
172 catch (Throwable t) {
173 logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
175 throw new MirLocalizerFailure(t);
180 public static class getLanguageIdFunction implements Generator.Function {
181 private Map languageCodeToId;
182 private String otherLanguageId;
183 private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction");
185 public getLanguageIdFunction() throws MirLocalizerFailure {
187 otherLanguageId = "";
188 languageCodeToId = new HashMap();
190 Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
191 while (i.hasNext()) {
192 EntityAdapter language = (EntityAdapter) i.next();
193 if (language.get("code").equals("ot")) {
194 otherLanguageId = (String) language.get("id");
197 languageCodeToId.put(language.get("code"), language.get("id"));
200 catch (Throwable t) {
201 logger.error(t.toString());
203 throw new MirLocalizerFailure(t);
207 public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
209 if (aParameters.size() != 1) {
210 throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
213 String result = (String) languageCodeToId.get(aParameters.get(0));
214 if (result == null) {
215 result = otherLanguageId;
220 catch (GeneratorExc e) {
223 catch (Throwable t) {
224 throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
230 public String filterNonHTMLText(String aText) {
232 logger.debug("about to filter non HTML Text of length " + aText.length());
236 stripper.removeHTMLTags(aText),
237 MirGlobal.config().getString("Producer.ImageRoot"),
238 MirGlobal.config().getString("Producer.MailLinkName"),
239 MirGlobal.config().getString("Producer.ExtLinkName"),
240 MirGlobal.config().getString("Producer.IntLinkName"));
241 logger.debug("done filtering non-HTML text ");
244 catch (Throwable t) {
245 logger.error("error while filtering non-HTML text: " + t.toString());
247 throw new RuntimeException(t.toString());
251 public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
253 if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
254 return new Generator.Interceptor() {
256 public Object intercept(Object anObject) {
257 if (anObject instanceof EntityAdapter) {
258 return new InterceptedEntityAdapter((EntityAdapter) anObject);
270 public class InterceptedEntityAdapter {
271 private EntityAdapter adapter;
273 InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
274 adapter = anEntityAdapter;
277 public Object get(String aField) {
278 Object result = adapter.get(aField);
279 if (result instanceof String) {
280 return filterHTMLText((String) result);
287 public Object getRaw() {
288 return new RawEntityAdapter(adapter);
292 public class RawEntityAdapter {
293 private EntityAdapter adapter;
295 RawEntityAdapter(EntityAdapter anEntityAdapter) {
296 adapter = anEntityAdapter;
299 public Object get(String aField) {
300 return adapter.get(aField);
304 public String filterHTMLText(String aText) {
306 StringWriter out = new StringWriter();
307 Tidy tidy = new Tidy();
308 ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
309 tidy.setMakeClean(true);
310 tidy.setCharEncoding(Configuration.UTF8);
311 tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
312 print(tidy.parseDOM(in, null), out);
314 return out.toString();
316 catch (IOException e) {
317 return e.getMessage();
323 * Test whether attributes of the given type are acceptable
325 * @param anAttibuteName
326 * @return <code>true</code> if the attribute is acceptable
328 private boolean testAttribueName(String anAttibuteName) {
329 return !disallowedAttributes.contains(anAttibuteName.toLowerCase());
332 private String stripWhitespace(String aString) {
333 return Util.substitute(
334 new Perl5Matcher(), regularExpressionWhitespace, new Perl5Substitution(""), aString, Util.SUBSTITUTE_ALL);
337 private boolean testAttibuteValue(String anAttributeValue) {
338 Iterator i = disallowedPrefixes.iterator();
340 while (i.hasNext()) {
341 // todo: split the attribute value on : and use contains
342 if ((stripWhitespace(anAttributeValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) {
351 private boolean checkNode(String nodeName) {
352 return allowedNodes.contains(nodeName.toLowerCase());
355 private boolean testAttributeInContext(String aTag, String anAttibute, String aValue){
356 /* The intent here is to prevent external content from being loaded by the user's browser.
357 It's extra paranoid, so will strip some legitimate stuff like an alt="http://www.indymedia.org"
359 if (! MirGlobal.config().getBoolean("Localizer.HTML.KillWebBugs")) {
363 if (("a".equalsIgnoreCase(aTag) && "href".equalsIgnoreCase(anAttibute)) ||
364 ("form".equalsIgnoreCase(aTag) && "action".equalsIgnoreCase(anAttibute))) {
365 // because we still love the web, even if it doesn't return the favor
370 String value = stripWhitespace(aValue.toLowerCase());
372 Iterator i = externalPrefixes.iterator();
373 while (i.hasNext()) {
374 if (value.startsWith((String) i.next())) {
375 // we have hit a bad prefix, but we need to check the whitelist
376 Iterator wl = allowedExternalPrefixes.iterator();
377 while (wl.hasNext()) {
378 if (value.startsWith((String) wl.next())) {
384 return false; //don't let this attribute through
387 return true; //didn't seem to be an external prefix, so it's fine
392 private void print(Node node, StringWriter out) throws IOException {
396 int type = node.getNodeType();
398 // will this node be present in the output?
399 boolean keepNode = checkNode(node.getNodeName());
403 case Node.DOCUMENT_NODE:
405 print(((Document) node).getDocumentElement(), out);
409 case Node.ELEMENT_NODE:
413 out.write(node.getNodeName());
414 NamedNodeMap attrs = node.getAttributes();
416 for (int i = 0; i < attrs.getLength(); i++) {
417 String attrName = attrs.item(i).getNodeName();
418 String attrValue = attrs.item(i).getNodeValue();
420 // todo: what is this?
421 if (attrValue.startsWith("//")){
422 attrValue = Util.substitute(
423 new Perl5Matcher(), regularExpressionLeadingSlashes, new Perl5Substitution("/"), attrValue);
426 if (testAttribueName(attrName) && testAttibuteValue(attrValue) && testAttributeInContext(node.getNodeName(), attrName, attrValue)) {
428 out.write(attrs.item(i).getNodeName());
431 out.write(attrs.item(i).getNodeValue());
436 // nodes without children will use the shorthand form <br/>. Some browsers
437 // treat <br></br> as a double linebreak
438 if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
447 NodeList children = node.getChildNodes();
448 if (children != null) {
449 int len = children.getLength();
450 for (int i = 0; i < len; i++) {
451 print(children.item(i), out);
457 out.write(node.getNodeName());
464 out.write(HTMLRoutines.encodeHTML(node.getNodeValue()));
472 public static class Utility extends ReflectionGeneratorFunctionsAdapter {
474 super(new MirBasicUtilityFunctions());
477 public Object getDatetime() {
478 return new GeneratorDateTimeFunctions.DateTimeFunctions(
479 MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
482 public Object getCompressWhitespace() {
483 return new freemarker.template.utility.CompressWhitespace();