*/
package mircoders.localizer.basic;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.StringWriter;
-import java.util.GregorianCalendar;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
import gnu.regexp.RE;
-
import mir.config.MirPropertiesConfiguration;
import mir.entity.adapter.EntityAdapter;
import mir.entity.adapter.EntityIteratorAdapter;
import mir.generator.GeneratorExc;
import mir.generator.GeneratorFailure;
import mir.log.LoggerWrapper;
-import mir.misc.StringUtil;
import mir.util.GeneratorDateTimeFunctions;
import mir.util.GeneratorFormatAdapters;
-import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
+import mir.util.HTMLStripper;
import mir.util.StringRoutines;
+import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
import mircoders.global.MirGlobal;
import mircoders.localizer.MirLocalizerExc;
import mircoders.localizer.MirLocalizerFailure;
import mircoders.localizer.MirProducerAssistantLocalizer;
-
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.tidy.Configuration;
import org.w3c.tidy.Tidy;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.GregorianCalendar;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
protected LoggerWrapper logger;
+ private HTMLStripper stripper;
private RE regularExpressionLT;
private RE regularExpressionGT;
+ private RE regularExpressionWhitespace;
- public MirBasicProducerAssistantLocalizer() throws MirLocalizerExc, MirLocalizerFailure {
- try{
- regularExpressionLT = new RE("<");
- regularExpressionGT = new RE(">");
- }
- catch (Throwable t) {
- throw new MirLocalizerFailure(t);
- }
+
+ public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
+ try {
+ stripper = new HTMLStripper();
+
+ regularExpressionLT = new RE("<");
+ regularExpressionGT = new RE(">");
+ regularExpressionWhitespace = new RE("\\s+");
+ }
+ catch (Throwable t) {
+ throw new MirLocalizerFailure(t);
+ }
}
- public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
+ public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
try {
Iterator i;
aValueSet.put("config", configMap);
- aValueSet.put("utility", new Utility());
+ aValueSet.put("utility", new Utility());
aValueSet.put("languages",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
aValueSet.put("topics",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
Map articleTypeMap = new HashMap();
articleTypeMap.put("openposting", "0");
}
}
-
+
public static class getLanguageIdFunction implements Generator.Function {
private Map languageCodeToId;
private String otherLanguageId;
Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
while (i.hasNext()) {
EntityAdapter language = (EntityAdapter) i.next();
- if (language.get("code").equals("ot"))
+ if (language.get("code").equals("ot")) {
otherLanguageId = (String) language.get("id");
+ }
languageCodeToId.put(language.get("code"), language.get("id"));
}
public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
try {
- if (aParameters.size() != 1)
+ if (aParameters.size() != 1) {
throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
+ }
String result = (String) languageCodeToId.get(aParameters.get(0));
- if (result == null)
+ if (result == null) {
result = otherLanguageId;
+ }
return result;
}
logger.debug("about to filter non HTML Text of length " + aText.length());
try {
String result =
- StringUtil.createHTML(
- StringUtil.removeHTMLTags(aText),
- MirGlobal.config().getString("Producer.ImageRoot"),
- MirGlobal.config().getString("Producer.MailLinkName"),
- MirGlobal.config().getString("Producer.ExtLinkName"),
- MirGlobal.config().getString("Producer.IntLinkName")
- );
+ stripper.createHTML(
+ stripper.removeHTMLTags(aText),
+ MirGlobal.config().getString("Producer.ImageRoot"),
+ MirGlobal.config().getString("Producer.MailLinkName"),
+ MirGlobal.config().getString("Producer.ExtLinkName"),
+ MirGlobal.config().getString("Producer.IntLinkName"));
logger.debug("done filtering non-HTML text ");
return result;
}
throw new RuntimeException(t.toString());
}
}
+
+ public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
+
+ if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
+ return new Generator.Interceptor() {
+
+ public Object intercept(Object anObject) {
+ if (anObject instanceof EntityAdapter) {
+ return new InterceptedEntityAdapter((EntityAdapter) anObject);
+ }
+
+ return anObject;
+ }
+ };
+ }
+ else {
+ return null;
+ }
+ }
+
+ public class InterceptedEntityAdapter {
+ private EntityAdapter adapter;
+
+ InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ Object result = adapter.get(aField);
+ if (result instanceof String) {
+ return filterHTMLText((String) result);
+ }
+ else {
+ return result;
+ }
+ }
+
+ public Object getRaw() {
+ return new RawEntityAdapter(adapter);
+ }
+ }
+
+ public class RawEntityAdapter {
+ private EntityAdapter adapter;
+
+ RawEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ return adapter.get(aField);
+ }
+ }
+
public String filterHTMLText(String aText) {
try {
StringWriter out = new StringWriter();
tidy.setCharEncoding(Configuration.UTF8);
tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
print(tidy.parseDOM(in, null), out);
-
+
return out.toString();
}
catch (IOException e) {
}
}
+
+ private String[] badAttributeValuePrefixes = {
+ "javascript", "vbscript", "about", "wysiwyg", "data", "view-source",
+ "ms-its", "mhtml", "shell", "lynxexec", "lynxcgi", "hcp", "ms-help",
+ "help", "disk", "vnd.ms.radio", "opera", "res", "resource", "chrome",
+ "mocha", "livescript"};
+
+
+ private String[] badAttributes = {
+ "onabort", "onblur", "onchange", "onclick", "ondblclick", "onerror",
+ "onfocus", "onkeydown", "onKeypress", "onkeyup", "onload", "onmousedown",
+ "onmousemove", "onmouseout", "onmouseover", "onmouseup", "onreset",
+ "onselect", "onsubmit", "onunload", "onload", "onclick", "onfocus",
+ "onblur", "FSCommand", "onAbort", "onActivate", "onAfterPrint",
+ "onAfterUpdate", "onBeforeActivate", "onBeforeCopy", "onBeforeCut",
+ "onBeforeDeactivate", "onBeforeEditFocus", "onBeforePaste",
+ "onBeforePrint", "onBeforeUnload", "onBegin", "onBlur", "onBounce",
+ "onCellChange", "onChange", "onClick", "onContextMenu", "onControlSelect",
+ "onCopy", "onCut", "onDataAvailible", "onDataSetChanged", "onDataSetComplete",
+ "onDblClick", "onDeactivate", "onDrag", "onDragEnd", "onDragLeave", "onDragEnter",
+ "onDragOver", "onDragDrop", "onDrop", "onEnd", "onError", "onErrorUpdate", "onExit",
+ "onFilterChange", "onFinish", "onFocus", "onFocusIn", "onFocusOut", "onHelp",
+ "onKeyDown", "onKeyPress", "onKeyUp", "onLayoutComplete", "onLoad", "onLoseCapture",
+ "onMediaComplete", "onMediaError", "onMouseDown", "onMouseEnter", "onMouseLeave",
+ "onMouseMove", "onMouseOut", "onMouseOver", "onMouseUp", "onMouseWheel", "onMove",
+ "onMoveEnd", "onMoveStart", "onOutOfSync", "onPaste", "onPause", "onProgress",
+ "onPropertyChange", "onReadyStateChange", "onRepeat", "onReset", "onResize",
+ "onResizeEnd", "onResizeStart", "onResume", "onReverse", "onRowEnter", "onRowExit",
+ "onRowDelete", "onRowInserted", "onScroll", "onSeek", "onSelect", "onSelectionChange",
+ "onSelectStart", "onStart", "onStop", "onSynchRestored", "onSubmit", "onTimeError",
+ "onTrackChange", "onUnload", "onURLFlip", "seekSegmentTime", "style", "height", "width"};
+
+ private boolean isBadAttr(String attrName) {
+ for (int i = 0; i < badAttributes.length; i++) {
+ if (badAttributes[i].toLowerCase().equals(attrName.toLowerCase())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private String stripWhitespace(String aString) {
+ try {
+ return regularExpressionWhitespace.substituteAll(aString, "");
+ }
+ catch (Throwable t) {
+ return "";
+ }
+ }
+
private boolean checkAttr(String attrName) {
- if (attrName.equals("onLoad") || attrName.equals("onClick") || attrName.equals("onFocus") || attrName.equals("onBlur") || attrName.equals("onMouseOver") || attrName.equals("onMouseOut") || attrName.equals("style") || attrName.equals("STYLE") || attrName.equals("height") || attrName.equals("width") || attrName.equals("HEIGHT") || attrName.equals("WIDTH"))
+ if (isBadAttr(attrName)) {
return false;
- return true;
+ }
+ return true;
+
+ }
+ private boolean checkAttrValue(String attrValue) {
+ for (int i = 0; i < badAttributeValuePrefixes.length; i++) {
+ if ((stripWhitespace(attrValue.toLowerCase())).startsWith(badAttributeValuePrefixes[i].toLowerCase() + ":")) {
+ return false;
+ }
+ }
+ return true;
}
+
private boolean checkNode(String nodeName) {
- List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
-
+ List languages = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
+
Iterator i = languages.iterator();
while (i.hasNext()) {
- if (nodeName.equals(i.next()))
+ if (nodeName.equals(i.next())) {
return true;
+ }
}
return false;
}
for (int i = 0; i < attrs.getLength(); i++) {
String attrName = attrs.item(i).getNodeName();
- if (checkAttr(attrName)) {
+ String attrValue = attrs.item(i).getNodeValue();
+ if (checkAttr(attrName) && checkAttrValue(attrValue)) {
out.write(' ');
out.write(attrs.item(i).getNodeName());
out.write("=\"");
}
}
- if (node.getChildNodes()==null || node.getChildNodes().getLength()==0) {
+ if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
out.write("/");
}
out.write('>');
break;
case Node.TEXT_NODE:
- String value=node.getNodeValue();
- try{
- value=regularExpressionLT.substituteAll(value, "<");
- value=regularExpressionGT.substituteAll(value, ">");
- }
- catch (Throwable t){
- value="";
- }
- out.write(value);
+ String value = node.getNodeValue();
+ try {
+ value = regularExpressionLT.substituteAll(value, "<");
+ value = regularExpressionGT.substituteAll(value, ">");
+ }
+ catch (Throwable t) {
+ value = "";
+ }
+ out.write(value);
break;
}
- if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes()!=null && node.getChildNodes().getLength()>0) {
+ if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) {
out.write("</");
out.write(node.getNodeName());
out.write('>');
}
public static class Utility extends ReflectionGeneratorFunctionsAdapter {
- public Utility () {
+ public Utility() {
super(new MirBasicUtilityFunctions());
}
public Object getDatetime() {
return new GeneratorDateTimeFunctions.DateTimeFunctions(
- MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
+ MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
}
public Object getCompressWhitespace() {