- logger.printError("initializeGenerationValueSet: Exception "+t.getMessage());
- }
-
- aValueSet.put("topics", topicList);
- aValueSet.put("imclist", entityList);
- aValueSet.put("parentlist", parentList);
-
- Map articleTypeMap = new HashMap();
- articleTypeMap.put("openposting", "0");
- articleTypeMap.put("newswire", "1");
- articleTypeMap.put("feature", "2");
- articleTypeMap.put("topicspecial", "3");
- articleTypeMap.put("startspecial", "4");
- aValueSet.put("articletype", articleTypeMap);
- };
-
- public String filterText(String aText) {
- return StringUtil.createHTML(
- StringUtil.deleteForbiddenTags(aText),
- MirGlobal.getConfigProperty("Producer.ImageRoot"),
- MirGlobal.getConfigProperty("Producer.MailLinkName"),
- MirGlobal.getConfigProperty("Producer.ExtLinkName"),
- MirGlobal.getConfigProperty("Producer.IntLinkName")
- );
+ logger.error("error while filtering non-HTML text: " + t.toString());
+
+ throw new RuntimeException(t.toString());
+ }
+ }
+
+ public Generator.Interceptor createGenerationInterceptor() throws MirLocalizerExc, MirLocalizerFailure {
+
+ if (MirGlobal.config().getBoolean("Mir.Producer.UseInterceptor", true)) {
+ return new Generator.Interceptor() {
+
+ public Object intercept(Object anObject) {
+ if (anObject instanceof EntityAdapter) {
+ return new InterceptedEntityAdapter((EntityAdapter) anObject);
+ }
+
+ return anObject;
+ }
+ };
+ }
+ else {
+ return null;
+ }
+ }
+
+ public class InterceptedEntityAdapter {
+ private EntityAdapter adapter;
+
+ InterceptedEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ Object result = adapter.get(aField);
+ if (result instanceof String) {
+ return filterHTMLText((String) result);
+ }
+ else {
+ return result;
+ }
+ }
+
+ public Object getRaw() {
+ return new RawEntityAdapter(adapter);
+ }
+ }
+
+ public class RawEntityAdapter {
+ private EntityAdapter adapter;
+
+ RawEntityAdapter(EntityAdapter anEntityAdapter) {
+ adapter = anEntityAdapter;
+ }
+
+ public Object get(String aField) {
+ return adapter.get(aField);
+ }
+ }
+
+ public String filterHTMLText(String aText) {
+ try {
+ StringWriter out = new StringWriter();
+ Tidy tidy = new Tidy();
+ ByteArrayInputStream in = new ByteArrayInputStream(aText.getBytes("UTF8"));
+ tidy.setMakeClean(true);
+ tidy.setCharEncoding(Configuration.UTF8);
+ tidy.setErrout(logger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
+ print(tidy.parseDOM(in, null), out);
+
+ return out.toString();
+ }
+ catch (IOException e) {
+ return e.getMessage();
+ }
+ }
+
+
+
+ private boolean isBadAttr(String attrName) {
+ List badAttributes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributes"), ";");
+ Iterator i = badAttributes.iterator();
+ while (i.hasNext()) {
+ if (((String) i.next()).toLowerCase().equals(attrName.toLowerCase())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private String stripWhitespace(String aString) {
+ try {
+ return regularExpressionWhitespace.substituteAll(aString, "");
+ }
+ catch (Throwable t) {
+ return "";
+ }
+ }
+
+ private boolean checkAttr(String attrName) {
+ if (isBadAttr(attrName)) {
+ return false;
+ }
+ return true;
+
+ }
+
+ private boolean checkAttrValue(String attrValue) {
+ List badPrefixes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.BadAttributeValuePrefixes"), ";");
+ Iterator i = badPrefixes.iterator();
+ while (i.hasNext()) {
+ if ((stripWhitespace(attrValue.toLowerCase())).startsWith(((String) i.next()).toLowerCase() + ":")) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+
+ private boolean checkNode(String nodeName) {
+ List acceptableNodes = StringRoutines.splitString(MirGlobal.config().getString("Localizer.HTML.Whitelist"), ";");
+
+ Iterator i = acceptableNodes.iterator();
+ while (i.hasNext()) {
+ if (nodeName.equals(i.next())) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void print(Node node, StringWriter out) throws IOException {
+ if (node == null) {
+ return;
+ }
+ int type = node.getNodeType();
+ boolean canOutput = checkNode(node.getNodeName());
+
+ switch (type) {
+
+ case Node.DOCUMENT_NODE:
+
+ print(((Document) node).getDocumentElement(), out);
+ out.flush();
+ break;
+
+ case Node.ELEMENT_NODE:
+ if (canOutput) {
+ out.write('<');
+
+ out.write(node.getNodeName());
+ NamedNodeMap attrs = node.getAttributes();
+
+ for (int i = 0; i < attrs.getLength(); i++) {
+ String attrName = attrs.item(i).getNodeName();
+ String attrValue = attrs.item(i).getNodeValue();
+ if (attrValue.startsWith("//")){
+ attrValue=regularExpressionLeadingSlashes.substitute(attrValue, "/");
+ }
+
+ if (checkAttr(attrName) && checkAttrValue(attrValue)) {
+ out.write(' ');
+ out.write(attrs.item(i).getNodeName());
+ out.write("=\"");
+
+ out.write(attrs.item(i).getNodeValue());
+ out.write('"');
+ }
+ }
+
+ if (node.getChildNodes() == null || node.getChildNodes().getLength() == 0) {
+ out.write("/");
+ }
+ out.write('>');
+ }
+ NodeList children = node.getChildNodes();
+ if (children != null) {
+ int len = children.getLength();
+ for (int i = 0; i < len; i++) {
+ print(children.item(i), out);
+ }
+ }
+ break;
+
+ case Node.TEXT_NODE:
+ String value = node.getNodeValue();
+ try {
+ value = regularExpressionLT.substituteAll(value, "<");
+ value = regularExpressionGT.substituteAll(value, ">");
+ }
+ catch (Throwable t) {
+ value = "";
+ }
+ out.write(value);
+
+ break;
+
+ }
+
+ if (type == Node.ELEMENT_NODE && canOutput && node.getChildNodes() != null && node.getChildNodes().getLength() > 0) {
+ out.write("</");
+ out.write(node.getNodeName());
+ out.write('>');
+ }
+
+ out.flush();
+ }
+
+ public static class Utility extends ReflectionGeneratorFunctionsAdapter {
+ public Utility() {
+ super(new MirBasicUtilityFunctions());
+ }
+
+ public Object getDatetime() {
+ return new GeneratorDateTimeFunctions.DateTimeFunctions(
+ MirPropertiesConfiguration.instance().getString("Mir.DefaultTimezone"));
+ }
+
+ public Object getCompressWhitespace() {
+ return new freemarker.template.utility.CompressWhitespace();
+ }