84afe0d1f870c35d4e44c97a1a991549c0ad3a60
[mir.git] / source / mircoders / localizer / basic / MirBasicProducerAssistantLocalizer.java
1 /*
2  * Copyright (C) 2001, 2002 The Mir-coders group
3  *
4  * This file is part of Mir.
5  *
6  * Mir is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * Mir is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with Mir; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19  *
20  * In addition, as a special exception, The Mir-coders gives permission to link
21  * the code of this program with  any library licensed under the Apache Software License,
22  * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23  * (or with modified versions of the above that use the same license as the above),
24  * and distribute linked combinations including the two.  You must obey the
25  * GNU General Public License in all respects for all of the code used other than
26  * the above mentioned libraries.  If you modify this file, you may extend this
27  * exception to your version of the file, but you are not obligated to do so.
28  * If you do not wish to do so, delete this exception statement from your version.
29  */
30 package mircoders.localizer.basic;
31
32 import java.io.ByteArrayInputStream;
33 import java.io.IOException;
34 import java.io.StringWriter;
35 import java.util.GregorianCalendar;
36 import java.util.HashMap;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.Map;
40
41 import gnu.regexp.RE;
42
43 import mir.config.MirPropertiesConfiguration;
44 import mir.entity.adapter.EntityAdapter;
45 import mir.entity.adapter.EntityIteratorAdapter;
46 import mir.generator.Generator;
47 import mir.generator.GeneratorExc;
48 import mir.generator.GeneratorFailure;
49 import mir.log.LoggerWrapper;
50 import mir.misc.StringUtil;
51 import mir.util.GeneratorDateTimeFunctions;
52 import mir.util.GeneratorFormatAdapters;
53 import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
54 import mir.util.StringRoutines;
55 import mircoders.global.MirGlobal;
56 import mircoders.localizer.MirLocalizerExc;
57 import mircoders.localizer.MirLocalizerFailure;
58 import mircoders.localizer.MirProducerAssistantLocalizer;
59
60 import org.w3c.dom.Document;
61 import org.w3c.dom.NamedNodeMap;
62 import org.w3c.dom.Node;
63 import org.w3c.dom.NodeList;
64 import org.w3c.tidy.Configuration;
65 import org.w3c.tidy.Tidy;
66
67 public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
68   protected LoggerWrapper logger;
69
70   private RE regularExpressionLT;
71   private RE regularExpressionGT;
72   private RE regularExpressionWhitespace;
73
74   public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
75     try{
76       regularExpressionLT = new RE("<");
77       regularExpressionGT = new RE(">");
78       regularExpressionWhitespace = new RE("\\s+");
79     }
80     catch (Throwable t) {
81       throw new MirLocalizerFailure(t);
82     }
83   }
84
85   public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure  {
86     try {
87       Iterator i;
88
89       Map configMap = new HashMap();
90
91       logger = new LoggerWrapper("Localizer.ProducerAssistant");
92
93 // obsolete:
94       configMap.put("producerDocRoot", MirGlobal.config().getString("Producer.DocRoot"));
95       configMap.put("storageRoot", MirGlobal.config().getString("Producer.StorageRoot"));
96       configMap.put("productionHost", MirGlobal.config().getString("Producer.ProductionHost"));
97       configMap.put("openAction", MirGlobal.config().getString("Producer.OpenAction"));
98       configMap.put("docRoot", MirGlobal.config().getString("RootUri"));
99       configMap.put("actionRoot", MirGlobal.config().getString("RootUri") + "/servlet/Mir");
100       configMap.put("now", new GeneratorFormatAdapters.DateFormatAdapter(new GregorianCalendar().getTime(), MirGlobal.config().getString("Mir.DefaultTimezone")));
101       configMap.put("videoHost", MirGlobal.config().getString("Producer.Video.Host"));
102       configMap.put("audioHost", MirGlobal.config().getString("Producer.Audio.Host"));
103       configMap.put("imageHost", MirGlobal.config().getString("Producer.Image.Host"));
104       configMap.put("imagePath", MirGlobal.config().getString("Producer.Image.Path"));
105       configMap.put("mirVersion", MirGlobal.config().getString("Mir.Version"));
106       configMap.put("defEncoding", MirGlobal.config().getString("Mir.DefaultEncoding"));
107
108 // "new":
109       configMap.putAll(MirPropertiesConfiguration.instance().allSettings());
110
111       aValueSet.put("config", configMap);
112
113       aValueSet.put("utility", new Utility()); 
114
115       aValueSet.put("languages",
116         new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
117
118       aValueSet.put("topics",
119         new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
120
121       Map articleTypeMap = new HashMap();
122       articleTypeMap.put("openposting", "0");
123       articleTypeMap.put("newswire", "1");
124       articleTypeMap.put("feature", "2");
125       articleTypeMap.put("topicspecial", "3");
126       articleTypeMap.put("startspecial", "4");
127
128       i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "articleType");
129       while (i.hasNext()) {
130         EntityAdapter articleType = (EntityAdapter) i.next();
131
132         articleTypeMap.put(articleType.get("name"), articleType.get("id"));
133       }
134       aValueSet.put("articletype", articleTypeMap);
135
136       Map commentStatusMap = new HashMap();
137       i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "commentStatus");
138       while (i.hasNext()) {
139         EntityAdapter commentStatus = (EntityAdapter) i.next();
140
141         commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id"));
142       }
143       aValueSet.put("commentstatus", commentStatusMap);
144       aValueSet.put("languageCodeToId", new getLanguageIdFunction());
145     }
146     catch (Throwable t) {
147       logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
148
149       throw new MirLocalizerFailure(t);
150     }
151
152   }
153   
154   public static class getLanguageIdFunction implements Generator.Function {
155     private Map languageCodeToId;
156     private String otherLanguageId;
157     private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction");
158
159     public getLanguageIdFunction() throws MirLocalizerFailure {
160       try {
161         otherLanguageId = "";
162         languageCodeToId = new HashMap();
163
164         Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
165         while (i.hasNext()) {
166           EntityAdapter language = (EntityAdapter) i.next();
167           if (language.get("code").equals("ot"))
168             otherLanguageId = (String) language.get("id");
169
170           languageCodeToId.put(language.get("code"), language.get("id"));
171         }
172       }
173       catch (Throwable t) {
174         logger.error(t.toString());
175
176         throw new MirLocalizerFailure(t);
177       }
178     }
179
180     public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
181       try {
182         if (aParameters.size() != 1)
183           throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
184
185         String result = (String) languageCodeToId.get(aParameters.get(0));
186         if (result == null)
187           result = otherLanguageId;
188
189         return result;
190       }
191       catch (GeneratorExc e) {
192         throw e;
193       }
194       catch (Throwable t) {
195         throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
196       }
197     }
198   }
199
200
201   public String filterNonHTMLText(String aText) {
202
203     logger.debug("about to filter non HTML Text of length " + aText.length());
204     try {
205       String result =
206           StringUtil.createHTML(
207           StringUtil.removeHTMLTags(aText),
208           MirGlobal.config().getString("Producer.ImageRoot"),
209           MirGlobal.config().getString("Producer.MailLinkName"),
210           MirGlobal.config().getString("Producer.ExtLinkName"),
211           MirGlobal.config().getString("Producer.IntLinkName")
212           );
213       logger.debug("done filtering non-HTML text ");
214       return result;
215     }
216     catch (Throwable t) {
217       logger.error("error while filtering non-HTML text: " + t.toString());
218
219       throw new RuntimeException(t.toString());
220     }
221   }
222   public String filterHTMLText(String aText) {
223     try {
224       StringWriter out = new StringWriter();
225       Tidy tidy = new Tidy();
226       ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
227       tidy.setMakeClean(true);
228       tidy.setCharEncoding(Configuration.UTF8);
229       tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
230       print(tidy.parseDOM(in, null), out);
231       
232       return out.toString();
233     }
234     catch (IOException e) {
235       return e.getMessage();
236     }
237   }
238
239
240   private String[] badAttributeValuePrefixes= {"javascript","vbscript","about","wysiwyg","data","view-source","ms-its","mhtml","shell","lynxexec","lynxcgi","hcp","ms-help","help","disk","vnd.ms.radio","opera","res","resource","chrome","mocha","livescript"};
241  
242
243   private String[] badAttributes = {"onabort", "onblur",  "onchange", "onclick", "ondblclick", "onerror", "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown", "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset", "onselect", "onsubmit", "onunload","onload","onclick","onfocus","onblur","FSCommand","onAbort","onActivate","onAfterPrint","onAfterUpdate","onBeforeActivate","onBeforeCopy","onBeforeCut","onBeforeDeactivate","onBeforeEditFocus","onBeforePaste","onBeforePrint","onBeforeUnload","onBegin","onBlur","onBounce","onCellChange","onChange","onClick","onContextMenu","onControlSelect","onCopy","onCut","onDataAvailible","onDataSetChanged","onDataSetComplete","onDblClick","onDeactivate","onDrag","onDragEnd","onDragLeave","onDragEnter","onDragOver","onDragDrop","onDrop","onEnd","onError","onErrorUpdate","onExit","onFilterChange","onFinish","onFocus","onFocusIn","onFocusOut","onHelp","onKeyDown","onKeyPress","onKeyUp","onLayoutComplete","onLoad","onLoseCapture","onMediaComplete","onMediaError","onMouseDown","onMouseEnter","onMouseLeave","onMouseMove","onMouseOut","onMouseOver","onMouseUp","onMouseWheel","onMove","onMoveEnd","onMoveStart","onOutOfSync","onPaste","onPause","onProgress","onPropertyChange","onReadyStateChange","onRepeat","onReset","onResize","onResizeEnd","onResizeStart","onResume","onReverse","onRowEnter","onRowExit","onRowDelete","onRowInserted","onScroll","onSeek","onSelect","onSelectionChange","onSelectStart","onStart","onStop","onSynchRestored","onSubmit","onTimeError","onTrackChange","onUnload","onURLFlip","seekSegmentTime","style","height","width"};
244   
245   private boolean isBadAttr(String attrName){
246     for (int i=0;i<badAttributes.length;i++){
247       if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase()))
248         return true;
249       }
250     return false;
251   }
252
253   private String stripWhitespace(String aString){
254     try{
255       return regularExpressionWhitespace.substituteAll(aString, "");
256      }
257     catch (Throwable t){
258       return "";
259     }
260   }
261
262   private boolean checkAttr(String attrName) {
263     if (isBadAttr(attrName)){
264         return false;
265     }
266     return true;
267
268   }
269
270   private boolean checkAttrValue(String attrValue) {
271     for (int i=0;i<badAttributeValuePrefixes.length;i++){
272       if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase()+":")){
273         return false;
274       } 
275     }
276     return true;
277   }
278
279
280   private boolean checkNode(String nodeName) {
281     List languages =  StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
282     
283     Iterator i = languages.iterator();
284     while (i.hasNext()) {
285       if (nodeName.equals(i.next()))
286         return true;
287     }
288     return false;
289   }
290
291   private void print(Node node, StringWriter out) throws IOException {
292     if (node == null) {
293       return;
294     }
295     int type = node.getNodeType();
296     boolean canOutput = checkNode(node.getNodeName());
297
298     switch (type) {
299
300       case Node.DOCUMENT_NODE:
301
302         print(((Document) node).getDocumentElement(), out);
303         out.flush();
304         break;
305
306       case Node.ELEMENT_NODE:
307         if (canOutput) {
308           out.write('<');
309
310           out.write(node.getNodeName());
311           NamedNodeMap attrs = node.getAttributes();
312
313           for (int i = 0; i < attrs.getLength(); i++) {
314             String attrName = attrs.item(i).getNodeName();
315             String attrValue = attrs.item(i).getNodeValue();
316             if (checkAttr(attrName) && checkAttrValue(attrValue)) {
317               out.write(' ');
318               out.write(attrs.item(i).getNodeName());
319               out.write("=\"");
320               
321               out.write(attrs.item(i).getNodeValue());
322               out.write('"');
323             }
324           }
325
326           if (node.getChildNodes()==null || node.getChildNodes().getLength()==0) {
327             out.write("/");
328           }
329           out.write('>');
330         }
331         NodeList children = node.getChildNodes();
332         if (children != null) {
333           int len = children.getLength();
334           for (int i = 0; i < len; i++) {
335             print(children.item(i), out);
336           }
337         }
338         break;
339
340       case Node.TEXT_NODE:
341         String value=node.getNodeValue();
342         try{
343           value=regularExpressionLT.substituteAll(value, "&lt;");
344           value=regularExpressionGT.substituteAll(value, "&gt;");
345         }
346         catch (Throwable t){
347           value="";
348         }
349         out.write(value);
350
351         break;
352
353     }
354
355     if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes()!=null && node.getChildNodes().getLength()>0) {
356       out.write("</");
357       out.write(node.getNodeName());
358       out.write('>');
359     }
360
361     out.flush();
362   }
363
364   public static class Utility extends ReflectionGeneratorFunctionsAdapter {
365     public Utility () {
366       super(new MirBasicUtilityFunctions());
367     }
368
369     public Object getDatetime() {
370       return new GeneratorDateTimeFunctions.DateTimeFunctions(
371         MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
372     }
373
374     public Object getCompressWhitespace() {
375       return new freemarker.template.utility.CompressWhitespace();
376     }
377   }
378 }