+/*\r
+ * Copyright (C) 2006 The Mir-coders group\r
+ *\r
+ * This file is part of Mir.\r
+ *\r
+ * Mir is free software; you can redistribute it and/or modify\r
+ * it under the terms of the GNU General Public License as published by\r
+ * the Free Software Foundation; either version 2 of the License, or\r
+ * (at your option) any later version.\r
+ *\r
+ * Mir is distributed in the hope that it will be useful,\r
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of\r
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\r
+ * GNU General Public License for more details.\r
+ *\r
+ * You should have received a copy of the GNU General Public License\r
+ * along with Mir; if not, write to the Free Software\r
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA\r
+ *\r
+ * In addition, as a special exception, The Mir-coders gives permission to link\r
+ * the code of this program with any library licensed under the Apache Software License,\r
+ * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library\r
+ * (or with modified versions of the above that use the same license as the above),\r
+ * and distribute linked combinations including the two. You must obey the\r
+ * GNU General Public License in all respects for all of the code used other than\r
+ * the above mentioned libraries. If you modify this file, you may extend this\r
+ * exception to your version of the file, but you are not obligated to do so.\r
+ * If you do not wish to do so, delete this exception statement from your version.\r
+ */\r
+package mir.util;\r
+\r
+import org.apache.oro.text.regex.Pattern;\r
+import org.apache.oro.text.regex.Perl5Compiler;\r
+import org.apache.oro.text.regex.Perl5Matcher;\r
+import org.apache.oro.text.regex.Perl5Substitution;\r
+import org.apache.oro.text.regex.Util;\r
+\r
+/**\r
+ * Class used to enrich text-based content with HTML links\r
+ * according to a set of rules\r
+ */\r
+public class HTMLStripper {\r
+\r
+ private Pattern newLineExpression;\r
+ private Pattern doubleBRExpression;\r
+ private Pattern emailAddressExpression;\r
+ private Pattern urlExpression;\r
+ private Pattern htmlTagExpression;\r
+\r
+ public HTMLStripper() {\r
+ Perl5Compiler compiler = new Perl5Compiler();\r
+\r
+ try {\r
+ newLineExpression =\r
+ compiler.compile("(\r?\n){1}", Perl5Compiler.READ_ONLY_MASK);\r
+ doubleBRExpression =\r
+ compiler.compile("(<br>\r?\n<br>){1,}", Perl5Compiler.READ_ONLY_MASK);\r
+ emailAddressExpression =\r
+ compiler.compile("\\b([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)\\b", Perl5Compiler.READ_ONLY_MASK);\r
+ urlExpression =\r
+ compiler.compile("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])", Perl5Compiler.READ_ONLY_MASK);\r
+ htmlTagExpression =\r
+ compiler.compile("<[^>]*>", Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK);\r
+ }\r
+ catch (Exception e) {\r
+ throw new RuntimeException(e.getMessage());\r
+ }\r
+ }\r
+\r
+ /**\r
+ * this routine takes text in url format and makes\r
+ * a clickaeble "<href>" link removing any "illegal" html tags\r
+ * @param haystack the url\r
+ * @param title the href link text\r
+ * @param imageRoot the place to find icons\r
+ * @param extImage the url of the icon to show next to the link\r
+ * @return a String containing the url\r
+ */\r
+ private String createURLLinks(String haystack, String title, String imageRoot, String extImage) {\r
+ if (title == null) {\r
+ return substituteAll(haystack, urlExpression,\r
+ "<img src=\"" + imageRoot + "/" + extImage + "\" border=\"0\"/> <a href=\"$0\">$0</a>");\r
+ }\r
+\r
+ title = removeHTMLTags(title);\r
+\r
+ return substituteAll(haystack, urlExpression,\r
+ "<img src=\"" + imageRoot + "/" + extImage + "\" border=\"0\"/> <a href=\"$0\">" + title + "</a>");\r
+ }\r
+\r
+ private String substituteAll(String anInput, Pattern anExpression, String aReplacement) {\r
+ Perl5Matcher matcher = new Perl5Matcher();\r
+\r
+ return Util.substitute(\r
+ matcher, anExpression,\r
+ new Perl5Substitution(aReplacement), anInput,\r
+ Util.SUBSTITUTE_ALL);\r
+ }\r
+\r
+ /**\r
+ * Remove all HTML tags\r
+ */\r
+ public String removeHTMLTags(String haystack){\r
+ return substituteAll(haystack, htmlTagExpression, "");\r
+ }\r
+\r
+\r
+ /**\r
+ * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)\r
+ * in den html-tag <p>\r
+ * nur sinnvoll, wenn text nicht im html-format eingegeben\r
+ */\r
+ private String convertNewline2P(String haystack) {\r
+ return substituteAll(haystack, doubleBRExpression, "\n</p><p>");\r
+ }\r
+\r
+ /**\r
+ * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)\r
+ * in den html-tag <br>\r
+ * nur sinnvoll, wenn text nicht im html-format eingegeben\r
+ */\r
+ private String convertNewline2Break(String haystack) {\r
+ return substituteAll(haystack, newLineExpression, "$0<br />");\r
+ }\r
+\r
+\r
+ /**\r
+ * createMailLinks wandelt text im email-adressenformat\r
+ * in einen klickbaren link um\r
+ * nur sinnvoll, wenn text nicht im html-format eingegeben\r
+ */\r
+ private String createMailLinks(String haystack, String imageRoot, String mailImage) {\r
+ return substituteAll(haystack, emailAddressExpression,\r
+ "<img src=\"" + imageRoot + "/" + mailImage + "\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");\r
+ }\r
+\r
+\r
+ /**\r
+ * this routine takes text in url format and makes\r
+ * a clickaeble "<href>" link removing any "illegal" html tags\r
+ * @param haystack the url\r
+ * @param imageRoot the place to find icons\r
+ * @param extImage the url of the icon to show next to the link\r
+ * @param intImage unused\r
+ * @return a String containing the url\r
+ */\r
+ private String createURLLinks(String haystack, String title, String imageRoot,String extImage, String intImage) {\r
+ return createURLLinks(haystack, title, imageRoot, extImage);\r
+ }\r
+\r
+ /**\r
+ */\r
+ public String createHTML(String content, String producerDocRoot, String mailImage, String extImage, String intImage){\r
+ content = convertNewline2Break(content);\r
+ content = convertNewline2P(content);\r
+ content = createMailLinks(content, producerDocRoot, mailImage);\r
+ content = createURLLinks(content, null, producerDocRoot,\r
+ extImage, intImage);\r
+\r
+ return content;\r
+ }\r
+\r
+\r
+}\r