*/
package mircoders.localizer.basic;
+import gnu.regexp.RE;
import mir.config.MirPropertiesConfiguration;
import mir.entity.adapter.EntityAdapter;
import mir.entity.adapter.EntityIteratorAdapter;
import mir.generator.GeneratorExc;
import mir.generator.GeneratorFailure;
import mir.log.LoggerWrapper;
-import mir.misc.StringUtil;
-import mir.util.*;
+import mir.util.GeneratorDateTimeFunctions;
+import mir.util.GeneratorFormatAdapters;
+import mir.util.HTMLStripper;
+import mir.util.StringRoutines;
import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
import mircoders.global.MirGlobal;
import mircoders.localizer.MirLocalizerExc;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
-import org.w3c.tidy.Tidy;
import org.w3c.tidy.Configuration;
+import org.w3c.tidy.Tidy;
import java.io.ByteArrayInputStream;
import java.io.IOException;
public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
protected LoggerWrapper logger;
- public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
+ private HTMLStripper stripper;
+ private RE regularExpressionLT;
+ private RE regularExpressionGT;
+ private RE regularExpressionWhitespace;
+ private RE regularExpressionLeadingSlashes;
+
+
+ public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
+ try {
+ stripper = new HTMLStripper();
+
+ regularExpressionLT = new RE("<");
+ regularExpressionGT = new RE(">");
+ regularExpressionWhitespace = new RE("\\s+|
|
");
+ regularExpressionLeadingSlashes = new RE("^//+");
+ }
+ catch (Throwable t) {
+ throw new MirLocalizerFailure(t);
+ }
+ }
+
+ public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
try {
Iterator i;
aValueSet.put("config", configMap);
- aValueSet.put("utility", new Utility());
+ aValueSet.put("utility", new Utility());
aValueSet.put("languages",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
aValueSet.put("topics",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
Map articleTypeMap = new HashMap();
articleTypeMap.put("openposting", "0");
}
catch (Throwable t) {
logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
- throw new RuntimeException(t.getMessage());
+
+ throw new MirLocalizerFailure(t);
}
- };
+ }
+
public static class getLanguageIdFunction implements Generator.Function {
private Map languageCodeToId;
private String otherLanguageId;
Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
while (i.hasNext()) {
EntityAdapter language = (EntityAdapter) i.next();
- if (language.get("code").equals("ot"))
+ if (language.get("code").equals("ot")) {
otherLanguageId = (String) language.get("id");
+ }
languageCodeToId.put(language.get("code"), language.get("id"));
}
public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
try {
- if (aParameters.size() != 1)
+ if (aParameters.size() != 1) {
throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
+ }
String result = (String) languageCodeToId.get(aParameters.get(0));
- if (result == null)
+ if (result == null) {
result = otherLanguageId;
+ }
return result;
}
catch (Throwable t) {
throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
}
- };
+ }
}
logger.debug("about to filter non HTML Text of length " + aText.length());
try {
String result =
- StringUtil.createHTML(
- StringUtil.removeHTMLTags(aText),
- MirGlobal.config().getString("Producer.ImageRoot"),
- MirGlobal.config().getString("Producer.MailLinkName"),
- MirGlobal.config().getString("Producer.ExtLinkName"),
- MirGlobal.config().getString("Producer.IntLinkName")
- );
+ stripper.createHTML(
+ stripper.removeHTMLTags(aText),
+ MirGlobal.config().getString("Producer.ImageRoot"),
+ MirGlobal.config().getString("Producer.MailLinkName"),
+ MirGlobal.config().getString("Producer.ExtLinkName"),
+ MirGlobal.config().getString("Producer.IntLinkName"));
logger.debug("done filtering non-HTML text ");
return result;
}
throw new RuntimeException(t.toString());
}
}
+
+ public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
+
+ if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
+ return new Generator.Interceptor() {
+
+ public Object intercept(Object anObject) {
+ if (anObject instanceof EntityAdapter) {
+ return new InterceptedEntityAdapter((EntityAdapter) anObject);
+ }
+
+ return anObject;
+ }
+ };
+ }
+ else {
+ return null;
+ }
+ }
+
+ public class InterceptedEntityAdapter {
+ private EntityAdapter adapter;
+
+ InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ Object result = adapter.get(aField);
+ if (result instanceof String) {
+ return filterHTMLText((String) result);
+ }
+ else {
+ return result;
+ }
+ }
+
+ public Object getRaw() {
+ return new RawEntityAdapter(adapter);
+ }
+ }
+
+ public class RawEntityAdapter {
+ private EntityAdapter adapter;
+
+ RawEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ return adapter.get(aField);
+ }
+ }
+
public String filterHTMLText(String aText) {
try {
StringWriter out = new StringWriter();
ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
tidy.setMakeClean(true);
tidy.setCharEncoding(Configuration.UTF8);
+ tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
print(tidy.parseDOM(in, null), out);
+
return out.toString();
- } catch (IOException e) {
+ }
+ catch (IOException e) {
return e.getMessage();
}
}
+
+
+ private boolean isBadAttr(String attrName) {
+ List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";");
+ Iterator i = badAttributes.iterator();
+ while (i.hasNext()) {
+ if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private String stripWhitespace(String aString) {
+ try {
+ return regularExpressionWhitespace.substituteAll(aString, "");
+ }
+ catch (Throwable t) {
+ return "";
+ }
+ }
+
private boolean checkAttr(String attrName) {
- if (attrName.equals("onLoad") || attrName.equals("onClick") || attrName.equals("onFocus") || attrName.equals("onBlur") || attrName.equals("onMouseOver") || attrName.equals("onMouseOut") || attrName.equals("style") || attrName.equals("STYLE") || attrName.equals("height") || attrName.equals("width") || attrName.equals("HEIGHT") || attrName.equals("WIDTH"))
+ if (isBadAttr(attrName)) {
return false;
- else
- return true;
+ }
+ return true;
}
+ private boolean checkAttrValue(String attrValue) {
+ List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";");
+ Iterator i = badPrefixes.iterator();
+ while (i.hasNext()) {
+ if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
private boolean checkNode(String nodeName) {
- if (nodeName.equals("a") ||
- nodeName.equals("img") ||
- nodeName.equals("h1") ||
- nodeName.equals("h2") ||
- nodeName.equals("h3") ||
- nodeName.equals("h4") ||
- nodeName.equals("h5") ||
- nodeName.equals("h6") ||
- nodeName.equals("br") ||
- nodeName.equals("form") ||
- nodeName.equals("input") ||
- nodeName.equals("hr") ||
- nodeName.equals("strong") ||
- nodeName.equals("font") ||
- nodeName.equals("b") ||
- nodeName.equals("i") ||
- nodeName.equals("em") ||
- nodeName.equals("p") ||
- nodeName.equals("table") ||
- nodeName.equals("tr") ||
- nodeName.equals("td") ||
- nodeName.equals("th") ||
- nodeName.equals("ul") ||
- nodeName.equals("ol") ||
- nodeName.equals("li")
- ) {
- return true;
- } else {
+ List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
- return false;
+ Iterator i = acceptableNodes.iterator();
+ while (i.hasNext()) {
+ if (nodeName.equals(i.next())) {
+ return true;
+ }
}
+ return false;
}
private void print(Node node, StringWriter out) throws IOException {
for (int i = 0; i < attrs.getLength(); i++) {
String attrName = attrs.item(i).getNodeName();
- if (checkAttr(attrName)) {
+ String attrValue = attrs.item(i).getNodeValue();
+ if (attrValue.startsWith("//")){
+ attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/");
+ }
+
+ if (checkAttr(attrName) && checkAttrValue(attrValue)) {
out.write(' ');
out.write(attrs.item(i).getNodeName());
out.write("=\"");
}
}
- if (node.getChildNodes()==null || node.getChildNodes().getLength()==0) {
+ if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
out.write("/");
}
out.write('>');
break;
case Node.TEXT_NODE:
- out.write(node.getNodeValue());
+ String value = node.getNodeValue();
+ try {
+ value = regularExpressionLT.substituteAll(value, "<");
+ value = regularExpressionGT.substituteAll(value, ">");
+ }
+ catch (Throwable t) {
+ value = "";
+ }
+ out.write(value);
+
break;
}
- if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes()!=null && node.getChildNodes().getLength()>0) {
+ if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) {
out.write("</");
out.write(node.getNodeName());
out.write('>');
}
public static class Utility extends ReflectionGeneratorFunctionsAdapter {
- public Utility () {
+ public Utility() {
super(new MirBasicUtilityFunctions());
}
+
public Object getDatetime() {
return new GeneratorDateTimeFunctions.DateTimeFunctions(
- MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
+ MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
}
public Object getCompressWhitespace() {