X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmir%2Futil%2FHTMLStripper.java;fp=source%2Fmir%2Futil%2FHTMLStripper.java;h=92fca3351ae4cc580c3dca60d8fd001051dfe598;hb=c9ac8fa71b679f8d967aac901bbef945c13b94c9;hp=0000000000000000000000000000000000000000;hpb=d63595f89aaa4b6a524dc0b4af9e0eef888f4c6b;p=mir.git diff --git a/source/mir/util/HTMLStripper.java b/source/mir/util/HTMLStripper.java new file mode 100644 index 00000000..92fca335 --- /dev/null +++ b/source/mir/util/HTMLStripper.java @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2006 The Mir-coders group + * + * This file is part of Mir. + * + * Mir is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Mir is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Mir; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * In addition, as a special exception, The Mir-coders gives permission to link + * the code of this program with any library licensed under the Apache Software License, + * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library + * (or with modified versions of the above that use the same license as the above), + * and distribute linked combinations including the two. You must obey the + * GNU General Public License in all respects for all of the code used other than + * the above mentioned libraries. If you modify this file, you may extend this + * exception to your version of the file, but you are not obligated to do so. + * If you do not wish to do so, delete this exception statement from your version. + */ +package mir.util; + +import org.apache.oro.text.regex.Pattern; +import org.apache.oro.text.regex.Perl5Compiler; +import org.apache.oro.text.regex.Perl5Matcher; +import org.apache.oro.text.regex.Perl5Substitution; +import org.apache.oro.text.regex.Util; + +/** + * Class used to enrich text-based content with HTML links + * according to a set of rules + */ +public class HTMLStripper { + + private Pattern newLineExpression; + private Pattern doubleBRExpression; + private Pattern emailAddressExpression; + private Pattern urlExpression; + private Pattern htmlTagExpression; + + public HTMLStripper() { + Perl5Compiler compiler = new Perl5Compiler(); + + try { + newLineExpression = + compiler.compile("(\r?\n){1}", Perl5Compiler.READ_ONLY_MASK); + doubleBRExpression = + compiler.compile("(
\r?\n
){1,}", Perl5Compiler.READ_ONLY_MASK); + emailAddressExpression = + compiler.compile("\\b([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)\\b", Perl5Compiler.READ_ONLY_MASK); + urlExpression = + compiler.compile("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])", Perl5Compiler.READ_ONLY_MASK); + htmlTagExpression = + compiler.compile("<[^>]*>", Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK); + } + catch (Exception e) { + throw new RuntimeException(e.getMessage()); + } + } + + /** + * this routine takes text in url format and makes + * a clickaeble "" link removing any "illegal" html tags + * @param haystack the url + * @param title the href link text + * @param imageRoot the place to find icons + * @param extImage the url of the icon to show next to the link + * @return a String containing the url + */ + private String createURLLinks(String haystack, String title, String imageRoot, String extImage) { + if (title == null) { + return substituteAll(haystack, urlExpression, + " $0"); + } + + title = removeHTMLTags(title); + + return substituteAll(haystack, urlExpression, + " " + title + ""); + } + + private String substituteAll(String anInput, Pattern anExpression, String aReplacement) { + Perl5Matcher matcher = new Perl5Matcher(); + + return Util.substitute( + matcher, anExpression, + new Perl5Substitution(aReplacement), anInput, + Util.SUBSTITUTE_ALL); + } + + /** + * Remove all HTML tags + */ + public String removeHTMLTags(String haystack){ + return substituteAll(haystack, htmlTagExpression, ""); + } + + + /** + * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n) + * in den html-tag

+ * nur sinnvoll, wenn text nicht im html-format eingegeben + */ + private String convertNewline2P(String haystack) { + return substituteAll(haystack, doubleBRExpression, "\n

"); + } + + /** + * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n) + * in den html-tag
+ * nur sinnvoll, wenn text nicht im html-format eingegeben + */ + private String convertNewline2Break(String haystack) { + return substituteAll(haystack, newLineExpression, "$0
"); + } + + + /** + * createMailLinks wandelt text im email-adressenformat + * in einen klickbaren link um + * nur sinnvoll, wenn text nicht im html-format eingegeben + */ + private String createMailLinks(String haystack, String imageRoot, String mailImage) { + return substituteAll(haystack, emailAddressExpression, + " $0"); + } + + + /** + * this routine takes text in url format and makes + * a clickaeble "" link removing any "illegal" html tags + * @param haystack the url + * @param imageRoot the place to find icons + * @param extImage the url of the icon to show next to the link + * @param intImage unused + * @return a String containing the url + */ + private String createURLLinks(String haystack, String title, String imageRoot,String extImage, String intImage) { + return createURLLinks(haystack, title, imageRoot, extImage); + } + + /** + */ + public String createHTML(String content, String producerDocRoot, String mailImage, String extImage, String intImage){ + content = convertNewline2Break(content); + content = convertNewline2P(content); + content = createMailLinks(content, producerDocRoot, mailImage); + content = createURLLinks(content, null, producerDocRoot, + extImage, intImage); + + return content; + } + + +}