fix up the deleteForbiddenTags and table tags regexp's so they handle multiline and...
authormh <mh>
Fri, 13 Dec 2002 05:55:40 +0000 (05:55 +0000)
committermh <mh>
Fri, 13 Dec 2002 05:55:40 +0000 (05:55 +0000)
source/mir/misc/StringUtil.java

index 0e96f4c..4754a90 100755 (executable)
@@ -40,14 +40,15 @@ import  gnu.regexp.*;
 /**
  * Statische Hilfsmethoden zur Stringbehandlung
  *
- * @version $Id: StringUtil.java,v 1.23.2.4 2002/12/10 09:07:50 mh Exp $
+ * @version $Id: StringUtil.java,v 1.23.2.5 2002/12/13 05:55:40 mh Exp $
  * @author rk, mir-coders group
  *
  */
 
 public final class StringUtil {
 
-       private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
+       private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags,
+                      re_tables, re_forbiddenTags;
 
        private StringUtil() { }  // this avoids contruction
 
@@ -59,6 +60,8 @@ public final class StringUtil {
       re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
                        re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
                        re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
+                       re_tables = new RE("<[ \t\r\n/]*(table|td|tr)[ \t\r\n]*>",RE.REG_ICASE);
+                       re_forbiddenTags = new RE("<[ \t\r\n/]*(body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
                }
                catch (REException e){
                        System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
@@ -886,17 +889,7 @@ public final class StringUtil {
         *  this method deletes all <script>, <body> and <head>-tags
         */
        public static final String deleteForbiddenTags(String haystack) {
-               try {
-                       RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
-                       haystack = regex.substituteAll(haystack,"");
-                       regex = new RE("<head>(.*?)</head>");
-                       haystack = regex.substituteAll(haystack,"");
-                       regex = new RE("<[ \t\r\n/]*body(.*?)>");
-                       haystack = regex.substituteAll(haystack,"");
-                       return haystack;
-               } catch(REException ex){
-                       return null;
-               }
+    return re_forbiddenTags.substituteAll(haystack,"");
        }
 
         /**
@@ -904,13 +897,7 @@ public final class StringUtil {
         *  this method deletes all <table>, <tr> and <td>-tags
         */
        public static final String deleteHTMLTableTags(String haystack) {
-               try {
-                       RE regex = new RE("</?(table|td|tr)>",RE.REG_ICASE);
-                       haystack = regex.substituteAll(haystack,"");
-                       return haystack;
-               } catch(REException ex){
-                       return null;
-               }
+    return re_tables.substituteAll(haystack,"");
        }
 
        /**