*/
package mircoders.localizer.basic;
-import java.util.GregorianCalendar;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-
import mir.config.MirPropertiesConfiguration;
import mir.entity.adapter.EntityAdapter;
import mir.entity.adapter.EntityIteratorAdapter;
+import mir.generator.Generator;
+import mir.generator.GeneratorExc;
+import mir.generator.GeneratorFailure;
import mir.log.LoggerWrapper;
-import mir.misc.StringUtil;
-import mir.util.GeneratorDateTimeFunctions;
-import mir.util.GeneratorExpressionFunctions;
-import mir.util.GeneratorFormatAdapters;
-import mir.util.GeneratorHTMLFunctions;
-import mir.util.GeneratorIntegerFunctions;
-import mir.util.GeneratorListFunctions;
-import mir.util.GeneratorRegularExpressionFunctions;
-import mir.util.GeneratorStringFunctions;
+import mir.util.*;
+import mir.util.generator.ReflectionGeneratorFunctionsAdapter;
import mircoders.global.MirGlobal;
import mircoders.localizer.MirLocalizerExc;
import mircoders.localizer.MirLocalizerFailure;
import mircoders.localizer.MirProducerAssistantLocalizer;
+import org.w3c.dom.Document;
+import org.w3c.dom.NamedNodeMap;
+import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
+import org.w3c.tidy.Configuration;
+import org.w3c.tidy.Tidy;
+import org.apache.oro.text.regex.*;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.StringWriter;
+import java.util.*;
public class MirBasicProducerAssistantLocalizer implements MirProducerAssistantLocalizer {
protected LoggerWrapper logger;
- public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
+ private HTMLStripper stripper;
+ private Pattern regularExpressionWhitespace;
+ private Pattern regularExpressionLeadingSlashes;
+ private Set disallowedAttributes = new HashSet();
+ private Set disallowedPrefixes = new HashSet();
+ private Set allowedNodes = new HashSet();
+ private Set externalPrefixes = new HashSet();
+ private Set allowedExternalPrefixes = new HashSet();
+
+
+
+
+ public MirBasicProducerAssistantLocalizer() throws MirLocalizerFailure {
+ try {
+ stripper = new HTMLStripper();
+ Perl5Compiler compiler = new Perl5Compiler();
+
+ regularExpressionWhitespace = compiler.compile("\\s+|
|
", Perl5Compiler.READ_ONLY_MASK);
+ regularExpressionLeadingSlashes = compiler.compile("^//+", Perl5Compiler.READ_ONLY_MASK);
+
+ Iterator i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";").iterator();
+ while (i.hasNext()) {
+ disallowedAttributes.add(((String) i.next()).toLowerCase());
+ }
+
+ i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";").iterator();
+ while (i.hasNext()) {
+ disallowedPrefixes.add(((String) i.next()).toLowerCase());
+ }
+
+ i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";").iterator();
+ while (i.hasNext()) {
+ allowedNodes.add(((String) i.next()).toLowerCase());
+ }
+
+ i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.ExternalLocationAttributeValuePrefixes"), ";").iterator();
+ while (i.hasNext()) {
+ externalPrefixes.add(((String) i.next()).toLowerCase());
+ }
+
+ i = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.WhitelistedExternalLocationAttributeValuePrefixes"), ";").iterator();
+ while (i.hasNext()) {
+ allowedExternalPrefixes.add(((String) i.next()).toLowerCase());
+ }
+ }
+ catch (Throwable t) {
+ throw new MirLocalizerFailure(t);
+ }
+ }
+
+ public void initializeGenerationValueSet(Map aValueSet) throws MirLocalizerExc, MirLocalizerFailure {
try {
Iterator i;
Map configMap = new HashMap();
- Map utilityMap = new HashMap();
logger = new LoggerWrapper("Localizer.ProducerAssistant");
// "new":
configMap.putAll(MirPropertiesConfiguration.instance().allSettings());
- utilityMap.put("compressWhitespace", new freemarker.template.utility.CompressWhitespace());
- utilityMap.put("encodeHTML", new GeneratorHTMLFunctions.encodeHTMLGeneratorFunction());
- utilityMap.put("prettyEncodeHTML", new GeneratorHTMLFunctions.prettyEncodeHTMLGeneratorFunction());
- utilityMap.put("encodeXML", new GeneratorHTMLFunctions.encodeXMLGeneratorFunction());
- utilityMap.put("encodeURI", new GeneratorHTMLFunctions.encodeURIGeneratorFunction());
- utilityMap.put("subString", new GeneratorStringFunctions.subStringFunction());
- utilityMap.put("subList", new GeneratorListFunctions.subListFunction());
- utilityMap.put("isOdd", new GeneratorIntegerFunctions.isOddFunction());
- utilityMap.put("increment", new GeneratorIntegerFunctions.incrementFunction());
- utilityMap.put("evaluate", new GeneratorExpressionFunctions.evaluateExpressionFunction());
- utilityMap.put("constructString", new GeneratorStringFunctions.constructStructuredStringFunction());
- utilityMap.put("parseStructuredString", new GeneratorStringFunctions.structuredStringParserFunction());
- utilityMap.put("escapeJDBCString", new GeneratorStringFunctions.jdbcStringEscapeFunction());
- utilityMap.put("regexpreplace", new GeneratorRegularExpressionFunctions.regularExpressionReplaceFunction());
- utilityMap.put("regexpmatch", new GeneratorRegularExpressionFunctions.regularExpressionMatchFunction());
- utilityMap.put("datetime", new GeneratorDateTimeFunctions.DateTimeFunctions(
- MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone")));
-
aValueSet.put("config", configMap);
- aValueSet.put("utility", utilityMap);
+
+ aValueSet.put("utility", new Utility());
aValueSet.put("languages",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language"));
aValueSet.put("topics",
- new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
+ new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "topic"));
Map articleTypeMap = new HashMap();
articleTypeMap.put("openposting", "0");
commentStatusMap.put(commentStatus.get("name"), commentStatus.get("id"));
}
aValueSet.put("commentstatus", commentStatusMap);
+ aValueSet.put("languageCodeToId", new getLanguageIdFunction());
}
catch (Throwable t) {
logger.error("initializeGenerationValueSet: Exception while collecting comment statuses" + t.getMessage());
- throw new RuntimeException(t.getMessage());
+
+ throw new MirLocalizerFailure(t);
+ }
+
+ }
+
+ public static class getLanguageIdFunction implements Generator.Function {
+ private Map languageCodeToId;
+ private String otherLanguageId;
+ private LoggerWrapper logger = new LoggerWrapper("Localizer.Earth.getLanguageIdFunction");
+
+ public getLanguageIdFunction() throws MirLocalizerFailure {
+ try {
+ otherLanguageId = "";
+ languageCodeToId = new HashMap();
+
+ Iterator i = new EntityIteratorAdapter("", "", 20, MirGlobal.localizer().dataModel().adapterModel(), "language");
+ while (i.hasNext()) {
+ EntityAdapter language = (EntityAdapter) i.next();
+ if (language.get("code").equals("ot")) {
+ otherLanguageId = (String) language.get("id");
+ }
+
+ languageCodeToId.put(language.get("code"), language.get("id"));
+ }
+ }
+ catch (Throwable t) {
+ logger.error(t.toString());
+
+ throw new MirLocalizerFailure(t);
+ }
+ }
+
+ public Object perform(List aParameters) throws GeneratorExc, GeneratorFailure {
+ try {
+ if (aParameters.size() != 1) {
+ throw new GeneratorExc("getLanguageIdFunction: 1 parameter expected: language-code");
+ }
+
+ String result = (String) languageCodeToId.get(aParameters.get(0));
+ if (result == null) {
+ result = otherLanguageId;
+ }
+
+ return result;
+ }
+ catch (GeneratorExc e) {
+ throw e;
+ }
+ catch (Throwable t) {
+ throw new GeneratorFailure("getLanguageIdFunction: " + t.getMessage(), t);
+ }
}
+ }
- };
public String filterNonHTMLText(String aText) {
logger.debug("about to filter non HTML Text of length " + aText.length());
try {
String result =
- StringUtil.createHTML(
- StringUtil.removeHTMLTags(aText),
- MirGlobal.config().getString("Producer.ImageRoot"),
- MirGlobal.config().getString("Producer.MailLinkName"),
- MirGlobal.config().getString("Producer.ExtLinkName"),
- MirGlobal.config().getString("Producer.IntLinkName")
- );
+ stripper.createHTML(
+ stripper.removeHTMLTags(aText),
+ MirGlobal.config().getString("Producer.ImageRoot"),
+ MirGlobal.config().getString("Producer.MailLinkName"),
+ MirGlobal.config().getString("Producer.ExtLinkName"),
+ MirGlobal.config().getString("Producer.IntLinkName"));
logger.debug("done filtering non-HTML text ");
return result;
}
}
}
+ public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
+
+ if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
+ return new Generator.Interceptor() {
+
+ public Object intercept(Object anObject) {
+ if (anObject instanceof EntityAdapter) {
+ return new InterceptedEntityAdapter((EntityAdapter) anObject);
+ }
+
+ return anObject;
+ }
+ };
+ }
+ else {
+ return null;
+ }
+ }
+
+ public class InterceptedEntityAdapter {
+ private EntityAdapter adapter;
+
+ InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ Object result = adapter.get(aField);
+ if (result instanceof String) {
+ return filterHTMLText((String) result);
+ }
+ else {
+ return result;
+ }
+ }
+
+ public Object getRaw() {
+ return new RawEntityAdapter(adapter);
+ }
+ }
+
+ public class RawEntityAdapter {
+ private EntityAdapter adapter;
+
+ RawEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ return adapter.get(aField);
+ }
+ }
+
public String filterHTMLText(String aText) {
- return StringUtil.deleteForbiddenTags(aText);
+ try {
+ StringWriter out = new StringWriter();
+ Tidy tidy = new Tidy();
+ ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
+ tidy.setMakeClean(true);
+ tidy.setCharEncoding(Configuration.UTF8);
+ tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
+ print(tidy.parseDOM(in, null), out);
+
+ return out.toString();
+ }
+ catch (IOException e) {
+ return e.getMessage();
+ }
+ }
+
+
+ /**
+ * Test whether attributes of the given type are acceptable
+ *
+ * @param anAttibuteName
+ * @return <code>true</code> if the attribute is acceptable
+ */
+ private boolean testAttribueName(String anAttibuteName) {
+ return !disallowedAttributes.contains(anAttibuteName.toLowerCase());
+ }
+
+ private String stripWhitespace(String aString) {
+ return Util.substitute(
+ new Perl5Matcher(), regularExpressionWhitespace, new Perl5Substitution(""), aString, Util.SUBSTITUTE_ALL);
+ }
+
+ private boolean testAttibuteValue(String anAttributeValue) {
+ Iterator i = disallowedPrefixes.iterator();
+
+ while (i.hasNext()) {
+ // todo: split the attribute value on : and use contains
+ if ((stripWhitespace(anAttributeValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+
+ private boolean checkNode(String nodeName) {
+ return allowedNodes.contains(nodeName.toLowerCase());
+ }
+
+ private boolean testAttributeInContext(String aTag, String anAttibute, String aValue){
+ /* The intent here is to prevent external content from being loaded by the user's browser.
+ It's extra paranoid, so will strip some legitimate stuff like an alt="http://www.indymedia.org"
+ */
+ if (! MirGlobal.config().getBoolean("Localizer.HTML.KillWebBugs")) {
+ return true;
+ }
+ else {
+ if (("a".equalsIgnoreCase(aTag) && "href".equalsIgnoreCase(anAttibute)) ||
+ ("form".equalsIgnoreCase(aTag) && "action".equalsIgnoreCase(anAttibute))) {
+ // because we still love the web, even if it doesn't return the favor
+
+ return true;
+ }
+ else {
+ String value = stripWhitespace(aValue.toLowerCase());
+
+ Iterator i = externalPrefixes.iterator();
+ while (i.hasNext()) {
+ if (value.startsWith((String) i.next())) {
+ // we have hit a bad prefix, but we need to check the whitelist
+ Iterator wl = allowedExternalPrefixes.iterator();
+ while (wl.hasNext()) {
+ if (value.startsWith((String) wl.next())) {
+ return true;
+ }
+ }
+ }
+
+ return false; //don't let this attribute through
+ }
+
+ return true; //didn't seem to be an external prefix, so it's fine
+ }
+ }
+ }
+
+ private void print(Node node, StringWriter out) throws IOException {
+ if (node == null) {
+ return;
+ }
+ int type = node.getNodeType();
+
+ // will this node be present in the output?
+ boolean keepNode = checkNode(node.getNodeName());
+
+ switch (type) {
+
+ case Node.DOCUMENT_NODE:
+
+ print(((Document) node).getDocumentElement(), out);
+ out.flush();
+ break;
+
+ case Node.ELEMENT_NODE:
+ if (keepNode) {
+ out.write('<');
+
+ out.write(node.getNodeName());
+ NamedNodeMap attrs = node.getAttributes();
+
+ for (int i = 0; i < attrs.getLength(); i++) {
+ String attrName = attrs.item(i).getNodeName();
+ String attrValue = attrs.item(i).getNodeValue();
+
+ // todo: what is this?
+ if (attrValue.startsWith("//")){
+ attrValue = Util.substitute(
+ new Perl5Matcher(), regularExpressionLeadingSlashes, new Perl5Substitution("/"), attrValue);
+ }
+
+ if (testAttribueName(attrName) && testAttibuteValue(attrValue) && testAttributeInContext(node.getNodeName(), attrName, attrValue)) {
+ out.write(' ');
+ out.write(attrs.item(i).getNodeName());
+ out.write("=\"");
+
+ out.write(attrs.item(i).getNodeValue());
+ out.write('"');
+ }
+ }
+
+ // nodes without children will use the shorthand form <br/>. Some browsers
+ // treat <br></br> as a double linebreak
+ if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
+ out.write("/");
+ out.write('>');
+ break;
+ }
+ out.write('>');
+ }
+
+
+ NodeList children = node.getChildNodes();
+ if (children != null) {
+ int len = children.getLength();
+ for (int i = 0; i < len; i++) {
+ print(children.item(i), out);
+ }
+ }
+
+ if (keepNode) {
+ out.write("</");
+ out.write(node.getNodeName());
+ out.write('>');
+ }
+
+ break;
+
+ case Node.TEXT_NODE:
+ out.write(HTMLRoutines.encodeHTML(node.getNodeValue()));
+
+ break;
+ }
+
+ out.flush();
+ }
+
+ public static class Utility extends ReflectionGeneratorFunctionsAdapter {
+ public Utility() {
+ super(new MirBasicUtilityFunctions());
+ }
+
+ public Object getDatetime() {
+ return new GeneratorDateTimeFunctions.DateTimeFunctions(
+ MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
+ }
+
+ public Object getCompressWhitespace() {
+ return new freemarker.template.utility.CompressWhitespace();
+ }
}
}