fix small email address filtering bug (originally the fix was from Max in the HEAD...
[mir.git] / source / mir / misc / StringUtil.java
index 76a323b..4315a45 100755 (executable)
@@ -1,23 +1,82 @@
 /*
- * put your module comment here
+ * Copyright (C) 2001, 2002  The Mir-coders group
+ *
+ * This file is part of Mir.
+ *
+ * Mir is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Mir is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Mir; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * In addition, as a special exception, The Mir-coders gives permission to link
+ * the code of this program with the com.oreilly.servlet library, any library
+ * licensed under the Apache Software License, The Sun (tm) Java Advanced
+ * Imaging library (JAI), The Sun JIMI library (or with modified versions of
+ * the above that use the same license as the above), and distribute linked
+ * combinations including the two.  You must obey the GNU General Public
+ * License in all respects for all of the code used other than the above
+ * mentioned libraries.  If you modify this file, you may extend this exception
+ * to your version of the file, but you are not obligated to do so.  If you do
+ * not wish to do so, delete this exception statement from your version.
  */
 
-
 package  mir.misc;
 
 import  java.io.*;
 import  java.lang.*;
 import  java.util.*;
+import  java.text.NumberFormat;
 import  gnu.regexp.*;
 
 /**
  * Statische Hilfsmethoden zur Stringbehandlung
  *
- * @version 29.6.99
- * @author RK
+ * @version $Id: StringUtil.java,v 1.23.2.3 2002/12/09 06:23:38 mh Exp $
+ * @author rk, mir-coders group
+ *
  */
+
 public final class StringUtil {
 
+       private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
+
+       private StringUtil() { }  // this avoids contruction
+
+       static {
+               try {
+                       //precompile regex
+                       re_newline2br = new RE("(\r?\n){1}");
+                       re_brbr2p     = new RE("(<br>\r?\n<br>){1,}");
+      re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
+                       re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
+                       re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
+               }
+               catch (REException e){
+                       System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
+               }
+       }
+
+  /**
+   * Formats a number with the specified minimum and maximum number of digits.
+   **/
+  public static synchronized String zeroPaddingNumber(long value, int minDigits,
+                                                      int maxDigits)
+  {
+    NumberFormat numberFormat = NumberFormat.getInstance();
+    numberFormat.setMinimumIntegerDigits(minDigits);
+    numberFormat.setMaximumIntegerDigits(maxDigits);
+    return numberFormat.format(value);
+  }
+
        /**
         * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
         * @param theDate
@@ -33,6 +92,54 @@ public final class StringUtil {
        }
 
        /**
+        * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
+        * @param theDate
+        * @return 12-ziffriger String (yyyymmdd)
+        */
+
+       public static final String date2webdbDateTime (GregorianCalendar theDate) {
+               StringBuffer webdbDate = new StringBuffer();
+               webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
+               webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
+               webdbDate.append(pad2(theDate.get(Calendar.DATE)));
+               webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
+               webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
+               return  webdbDate.toString();
+       }
+
+       /**
+        * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
+        * @param theDate
+        * @return w3approved datetime
+        */
+
+       public static final String date2w3DateTime (GregorianCalendar theDate) {
+               StringBuffer webdbDate = new StringBuffer();
+               webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
+               webdbDate.append("-");
+               webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
+               webdbDate.append("-");
+               webdbDate.append(pad2(theDate.get(Calendar.DATE)));
+               webdbDate.append("T");
+               webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
+               webdbDate.append(":");
+               webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
+               webdbDate.append(":");
+               webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
+               //assumes you are an hour-multiple away from UTC....
+               int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
+               if (offset < 0){
+               webdbDate.append("-");
+               }
+               else{
+               webdbDate.append("+");
+               }
+               webdbDate.append(pad2(Math.abs(offset)));
+               webdbDate.append(":00");
+               return  webdbDate.toString();
+       }
+
+       /**
         * wandelt Calendar in dd.mm.yyyy / hh.mm um
         * @param theDate
         * @return String mit (dd.mm.yyyy / hh.mm um)
@@ -62,7 +169,9 @@ public final class StringUtil {
                StringBuffer path = new StringBuffer();
                path.append("/").append(webdbDate.substring(0, 4));
                path.append("/").append(webdbDate.substring(4, 6));
-               path.append("/").append(webdbDate.substring(6, 8));
+               path.append("/");
+               //who did this?
+               //path.append("/").append(webdbDate.substring(6, 8));
                return  path.toString();
        }
 
@@ -70,21 +179,21 @@ public final class StringUtil {
         * wandelt Calendar in dd.mm.yyyy um
         *
         * @param theDate
-        * @return String mit  <code>dd.mm.yyyy</code>
+        * @return String mit  <code>yyyy.mm.dd</code>
         */
        public static final String webdbDate2readableDate (String webdbDate) {
                String date = "";
-               date += webdbDate.substring(6, 8);
-               date += "." + webdbDate.substring(4, 6);
-               date += "." + webdbDate.substring(0, 4);
+               date += webdbDate.substring(0, 4);
+               date += "-" + webdbDate.substring(5, 7);
+               date += "-"+webdbDate.substring(8, 10);
                return  date;
        }
 
 
-  /**
-   * converts string from format: yyyy-mm-dd__hh:mm:ss.d
-   * to dd.mm.yyyy hh:mm
-   */
+       /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.d
+        * to dd.mm.yyyy hh:mm
+        */
        public static String dateToReadableDate(String date) {
                StringBuffer returnDate = new StringBuffer();
                if (date!=null) {
@@ -96,6 +205,127 @@ public final class StringUtil {
                }
                return returnDate.toString();
        }
+        
+  /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.dddddd+TZ
+        * to yyyy-mm-ddThh:mm:ss+TZ:00 (w3 format for Dublin Core)
+        */
+       public static String webdbdateToDCDate(String date) {
+               StringBuffer returnDate = new StringBuffer();
+               if (date!=null) {
+      returnDate.append(date.substring(0,10));
+      returnDate.append("T");
+      returnDate.append(date.substring(11,19));
+      //String tzInfo=date.substring(26,29);
+      //if (tzInfo.equals("+00")){
+      //UTC gets a special code in w3 dates
+      //    returnDate.append("Z");
+      //}
+      //else{
+      //need to see what a newfoundland postgres
+      //timestamp looks like before making this robust
+      //    returnDate.append(tzInfo);
+      //    returnDate.append(":00");
+      //}
+
+               }
+               return returnDate.toString();
+       }
+
+
+       /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.d
+        * to yyyy
+        */
+       public static String dateToYear (String date) {
+               StringBuffer returnDate = new StringBuffer();
+               if (date!=null) {
+
+                       returnDate.append(date.substring(0,4));
+               }
+               return returnDate.toString();
+       }
+
+       /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.d
+        * to [m]m
+        */
+       public static String dateToMonth (String date) {
+               StringBuffer returnDate = new StringBuffer();
+               if (date!=null) {
+                       if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
+                       else returnDate.append(date.substring(6,7));
+               }
+               return returnDate.toString();
+       }
+
+       /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.d
+        * to [d]d
+        */
+       public static String dateToDayOfMonth (String date) {
+               StringBuffer returnDate = new StringBuffer();
+               if (date!=null) {
+                       if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
+                       else returnDate.append(date.substring(9,10));
+               }
+               return returnDate.toString();
+       }
+
+       /**
+        * converts string from format: yyyy-mm-dd__hh:mm:ss.d
+        * to hh:mm
+        */
+       public static String dateToTime (String date) {
+               StringBuffer returnDate = new StringBuffer();
+               if (date!=null) {
+                       returnDate.append(date.substring(11,16));
+               }
+               return returnDate.toString();
+       }
+
+    /**
+     * Splits the provided CSV text into a list. stolen wholesale from
+     * from Jakarta Turbine StrinUtils.java -mh
+     *
+     * @param text      The CSV list of values to split apart.
+     * @param separator The separator character.
+     * @return          The list of values.
+     */
+    public static String[] split(String text, String separator)
+    {
+        StringTokenizer st = new StringTokenizer(text, separator);
+        String[] values = new String[st.countTokens()];
+        int pos = 0;
+        while (st.hasMoreTokens())
+        {
+            values[pos++] = st.nextToken();
+        }
+        return values;
+    }
+
+    /**
+     * Joins the elements of the provided array into a single string
+     * containing a list of CSV elements. Stolen wholesale from Jakarta
+     * Turbine StringUtils.java. -mh
+     *
+     * @param list      The list of values to join together.
+     * @param separator The separator character.
+     * @return          The CSV text.
+     */
+    public static String join(String[] list, String separator)
+    {
+        StringBuffer csv = new StringBuffer();
+        for (int i = 0; i < list.length; i++)
+        {
+            if (i > 0)
+            {
+                csv.append(separator);
+            }
+            csv.append(list[i]);
+        }
+        return csv.toString();
+    }
 
 
        /**
@@ -119,22 +349,21 @@ public final class StringUtil {
         */
        public static String quote(String s)
        {
-    //String s2 = quote(s, '\'');
-
-    //Quickhack
-    //Because of '?-Bug in Postgresql-JDBC-Driver
-    StringBuffer temp = new StringBuffer();
-    for(int i=0;i<s.length();i++){
-      if(s.charAt(i)=='\''){
-        temp.append("&acute;");
-      } else {
-        temp.append(s.charAt(i));
-      }
-    }
-    String s2 = temp.toString();
-    //Quickhack end
-
-    s2 = quote(s2, '\"');
+               //String s2 = quote(s, '\'');
+               //Quickhack     ÊÊ Ê Ê Ê Ê Ê Ê
+               //Because of '?-Bug in Postgresql-JDBC-Driver
+               StringBuffer temp = new StringBuffer();
+               for(int i=0;i<s.length();i++){
+                       if(s.charAt(i)=='\''){
+                               temp.append("&#39;");
+                       } else {
+                               temp.append(s.charAt(i));
+                       }
+               }
+               String s2 = temp.toString();
+               //end Quickhack
+               
+               s2 = quote(s2, '\"');
                return s2;
        }
 
@@ -157,15 +386,15 @@ public final class StringUtil {
                        pos = i;
                        if (pos < s.length()) {
                                buf.append('\\');
-             buf.append(quoteChar);
-       pos++;
+                               buf.append(quoteChar);
+                               pos++;
                        }
                }
                return buf.toString();
        }
 
        /**
-        * ersetzt gefährliche zeichen in  <code>s</code>
+        * replaces dangerous characters in <code>s</code>
         *
         */
 
@@ -194,8 +423,8 @@ public final class StringUtil {
                String crlf = System.getProperty("line.separator");
                if (!crlf.equals("\n"))
                        s = replace(s, "\n", crlf);
-    // byte[] buf = new byte[s.length()];
-    byte[] buf = s.getBytes();
+               // byte[] buf = new byte[s.length()];
+               byte[] buf = s.getBytes();
                return buf;
        }
 
@@ -223,6 +452,24 @@ public final class StringUtil {
                return buf.toString();
        }
 
+       /**
+        * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
+        * @param s
+        * @param pattern
+        * @param substitute
+        * @return String mit den Ersetzungen
+        */
+       public static String regexpReplace(String haystack, String pattern, String substitute) {
+               try {
+                       RE regex = new RE(pattern);
+                       return regex.substituteAll(haystack,substitute);
+               } catch(REException ex){
+                       return null;
+               }
+       }
+
+
+
 
        /**
         * Fügt einen Separator an den Pfad an
@@ -254,6 +501,16 @@ public final class StringUtil {
        }
 
        /**
+        * Checks to see if the path is absolute by looking for a leading file
+        * separater
+        * @param path
+        * @return
+        */
+       public static boolean isAbsolutePath (String path) {
+               return  path.startsWith(File.separator);
+       }
+
+       /**
         * Löscht Slash am Anfang des Strings
         * @param path
         * @return
@@ -333,288 +590,20 @@ public final class StringUtil {
                return buf.toString();
        }
 
-       /**
-        * wandelt Sonderzeichen in Quotes um
-        *
-        * @return Kovertierter String
-        */
-       public static String encodeHtml(String s) {
-               StringBuffer buf = new StringBuffer();
-               for(int i=0;i < s.length(); i++ ) {
-
-                       /** @todo looks inefficient */
-                       if (s.charAt(i)=='&') {
-                               // convert html to xml-parsable representation
-                               if( s.indexOf( "&ouml;", i ) == i ) {
-                                       buf.append( "&#246;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&auml;", i ) == i ) {
-                                       buf.append( "&#228;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&uuml;", i ) == i ) {
-                                       buf.append( "&#252;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&Ouml;", i ) == i ) {
-                                       buf.append( "&#214;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&Auml;", i ) == i ) {
-                                       buf.append( "&#196;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&Uuml;", i ) == i ) {
-                                       buf.append( "&#220;" ); i += 5;
-                                       continue;
-                               }
-                               if( s.indexOf( "&szlig;", i ) == i ) {
-                                       buf.append( "&#223;" ); i += 6;
-                                       continue;
-                               }
-
-                               /** @todo should only escape outside of tags */
-
-                               if( s.indexOf( "&quot;", i ) == i ) {
-                                       buf.append( "&#223;" ); i += 5;
-                                       continue;
-        }
-                               if( s.indexOf( "&ndash;", i ) == i ) {
-                                       buf.append( "&#8211;" ); i += 6;
-                                       continue;
-        }
-        if( s.indexOf( "&mdash;", i ) == i ) {
-                                       buf.append( "&#8212;" ); i += 6;
-                                       continue;
-        }
-        if( s.indexOf( "&ldquo;", i ) == i ) {
-                                       buf.append( "&#8220;" ); i += 6;
-                                       continue;
-        }
-        if( s.indexOf( "&rdquo;", i ) == i ) {
-                                       buf.append( "&#8221;" ); i += 6;
-                                       continue;
-        }
-        if( s.indexOf( "&bdquo;", i ) == i ) {
-                                       buf.append( "&#8222;" ); i += 6;
-                                       continue;
-        }
-
-        //looks pretty stupid
-        if( s.indexOf( "&lt;", i ) == i ) {
-                                       buf.append( "&lt;" ); i += 3;
-                                       continue;
-        }
-        if( s.indexOf( "&gt;", i ) == i ) {
-                                       buf.append( "&gt;" ); i += 3;
-                                       continue;
-        }
-        if( s.indexOf( "&acute;", i ) == i ) {
-                                       buf.append( "&acute;" ); i += 6;
-                                       continue;
-        }
-        if( s.indexOf( "&nbsp;", i ) == i ) {
-                                       buf.append( "&nbsp;" ); i += 5;
-                                       continue;
-        }
-        //has to be the last
-        if( s.indexOf( "&", i ) == i ) {
-                                       buf.append( "&#38;" ); i += 0;
-                                       continue;
-        }
-                       }
-                       // convert umlauts an other special charakters
-                       switch( s.charAt(i) ) {
-                               case 'ö': buf.append( "&#246;" ); break;
-                               case 'ä': buf.append( "&#228;" ); break;
-                               case 'ü': buf.append( "&#252;" ); break;
-                               case 'Ö': buf.append( "&#214;" ); break;
-                               case 'Ä': buf.append( "&#196;" ); break;
-                               case 'Ü': buf.append( "&#220;" ); break;
-                               case 'ß': buf.append( "&#223;" ); break;
-                               case 'é': buf.append( "&#233;" ); break;
-                               case 'è': buf.append( "&#232;" ); break;
-                               case 'á': buf.append( "&#225;" ); break;
-                               case 'à': buf.append( "&#224;" ); break;
-                               case 'â': buf.append( "&#226;" ); break;
-                               case 'ã': buf.append( "&#227;" ); break;
-                               case '¬': buf.append( "&#172;" ); break;
-                               case '¹': buf.append( "&#185;" ); break;
-                               case '²': buf.append( "&#178;" ); break;
-                               case '³': buf.append( "&#179;" ); break;
-                               case '¼': buf.append( "&#188;" ); break;
-                               case '½': buf.append( "&#189;" ); break;
-                               case '¾': buf.append( "&#190;" ); break;
-                               case '¶': buf.append( "&#182;" ); break;
-                               case 'æ': buf.append( "&#230;" ); break;
-                               case 'ð': buf.append( "&#240;" ); break;
-                               case '|': buf.append( "&#166;" ); break;
-                               case '·': buf.append( "&#183;" ); break;
-                               case '°': buf.append( "&#176;" ); break;
-                               case '§': buf.append( "&#167;" ); break;
-                               case 'ø': buf.append( "&#248;" ); break;
-                               case 'ç': buf.append( "&#231;" ); break;
-                               case '¤': buf.append( "&#164;" ); break;
-                               case 'ª': buf.append( "&#170;" ); break;
-                               case 'Ç': buf.append( "&#199;" ); break;
-                               case 'Ã': buf.append( "&#195;" ); break;
-                               case 'Â': buf.append( "&#194;" ); break;
-                               case 'Æ': buf.append( "&#198;" ); break;
-                               case '©': buf.append( "&#169;" ); break;
-                               case '®': buf.append( "&#174;" ); break;
-                               case '¥': buf.append( "&#165;" ); break;
-                               case 'Þ': buf.append( "&#254;" ); break;
-                               case '¯': buf.append( "&#175;" ); break;
-                               case 'Ð': buf.append( "&#208;" ); break;
-                               case 'º': buf.append( "&#186;" ); break;
-                               case '¡': buf.append( "&#161;" ); break;
-                               case '£': buf.append( "&#163;" ); break;
-                               case '±': buf.append( "&#177;" ); break;
-                               case '¿': buf.append( "&#191;" ); break;
-                               case 'Ø': buf.append( "&#216;" ); break;
-                               case 'Á': buf.append( "&#192;" ); break;
-                               case 'À': buf.append( "&#193;" ); break;
-                               case 'É': buf.append( "&#200;" ); break;
-                               case 'È': buf.append( "&#201;" ); break;
-                               case 'ù': buf.append( "&#250;" ); break;
-                               case 'ñ': buf.append( "&#241;" ); break;
-                               case 'Ñ': buf.append( "&#209;" ); break;
-                               case 'µ': buf.append( "&#181;" ); break;
-                               case 'Í': buf.append( "&#204;" ); break;
-                               case 'Ì': buf.append( "&#205;" ); break;
-                               case 'í': buf.append( "&#236;" ); break;
-                               case 'ì': buf.append( "&#237;" ); break;
-                               case 'î': buf.append( "&#238;" ); break;
-                               case 'Î': buf.append( "&#206;" ); break;
-                               case 'ó': buf.append( "&#243;" ); break;
-                               case 'Ó': buf.append( "&#210;" ); break;
-                               case 'ò': buf.append( "&#206;" ); break;
-                               case 'Ò': buf.append( "&#211;" ); break;
-                               case 'ô': buf.append( "&#244;" ); break;
-                               case 'Ô': buf.append( "&#212;" ); break;
-                               case 'õ': buf.append( "&#245;" ); break;
-                               case 'Õ': buf.append( "&#213;" ); break;
-                               case 'ý': buf.append( "&#253;" ); break;
-                               case 'Ý': buf.append( "&#221;" ); break;
-                               case 'û': buf.append( "&#251;" ); break;
-                               case 'Û': buf.append( "&#219;" ); break;
-                               case 'ú': buf.append( "&#249;" ); break;
-                               case 'Ú': buf.append( "&#217;" ); break;
-                               case 'Ù': buf.append( "&#218;" ); break;
-                               case 'Ê': buf.append( "&#202;" ); break;
-                               case 'ê': buf.append( "&#234;" ); break;
-                               case 'å': buf.append( "&#229;" ); break;
-                               case 'Å': buf.append( "&#197;" ); break;
-                               case 'ë': buf.append( "&#235;" ); break;
-                               case 'Ë': buf.append( "&#203;" ); break;
-                               case 'ÿ': buf.append( "&#255;" ); break;
-                               case 'ï': buf.append( "&#239;" ); break;
-                               case 'Ï': buf.append( "&#207;" ); break;
-                               case '«': buf.append( "&#171;" ); break;
-        case '»': buf.append( "&#187;" ); break;
-        case '\'': buf.append( "&acute;" ); break;
-        case '\"': buf.append( "&quot;" ); break;
-        //case '\u8211': buf.append( "&#8211;" ); break;
-        //case '\u8212': buf.append( "&#8212;" ); break;
-        //case '\u8220': buf.append( "&#8220;" ); break;
-        //case '\u8221': buf.append( "&#8221;" ); break;
-        //case '\u8222': buf.append( "&#8222;" ); break;
-                               //case '\"': buf.append( "&#34;" ); break;
-                               default: buf.append( s.charAt(i) );
-                       }
-
-               }
-               return buf.toString();
-       }
-
-
-  public static String decodeHTMLinTags(String s){
-    StringBuffer buffer = new StringBuffer();
-    boolean start = false;
-    boolean stop = false;
-    int startIndex = 0;
-    int stopIndex = 0;
-    int temp = 0;
-
-    for(int i=0;i<s.length();i++){
-      if(s.charAt(i)=='<'){
-        start = true;
-        startIndex = i;
-      } else if(s.charAt(i)=='>'){
-        stop = true;
-        stopIndex = i;
-
-        if(start && stop){
-          buffer.append(s.substring(temp,startIndex));
-          buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
-          i= temp= stopIndex+1;
-          start= stop= false;
-        }
-      }
-    }
-    buffer.append(s.substring(stopIndex+1));
-    return buffer.toString();
-  }
-
-  public static String replaceQuot(String s) {
-    StringBuffer buffer = new StringBuffer();
-    for(int j = 0; j < s.length();j++){
-      if(s.charAt(j)=='&'){
-        if(s.indexOf( "&quot;",j) == j) {
-          buffer.append( "\"" );
-          j += 5;
-        }//if
-      } else {
-        buffer.append(s.charAt(j));
-      }//else
-    }//for
-    return buffer.toString();
-  }
-
-       /** wandelt Quotes in Sonderzeichen um
-        */
-       /**
-       public static String decodeHtml(String s) {
-               StringBuffer buf = new StringBuffer();
-               for(int i=0;i < s.length(); i++ ) {
-                       if( s.indexOf( "&ouml;", i ) == i ) {
-                               buf.append( "ö" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&auml;", i ) == i ) {
-                               buf.append( "ä" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&uuml;", i ) == i ) {
-                               buf.append( "ü" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&Ouml;", i ) == i ) {
-                               buf.append( "Ö" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&Auml;", i ) == i ) {
-                               buf.append( "Ä" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&Uuml;", i ) == i ) {
-                               buf.append( "Ü" ); i += 5;
-                               continue;
-                       }
-                       if( s.indexOf( "&szlig;", i ) == i ) {
-                               buf.append( "ß" ); i += 6;
-                               continue;
-                       }
-      if( s.indexOf( "&quot;", i ) == i ) {
-                               buf.append( "\"" ); i += 5;
-                               continue;
-                       }
-                       buf.append( s.charAt(i) );
-               }
-               return buf.toString();
+       public static String replaceQuot(String s) {
+               StringBuffer buffer = new StringBuffer();
+               for(int j = 0; j < s.length();j++){
+                       if(s.charAt(j)=='&'){
+                               if(s.indexOf( "&quot;",j) == j) {
+                                       buffer.append( "\"" );
+                                       j += 5;
+                               }//if
+                       } else {
+                               buffer.append(s.charAt(j));
+                       }//else
+               }//for
+               return buffer.toString();
        }
-        */
 
        /**
         * schnellere Variante der String.toLowerCase()-Routine
@@ -658,7 +647,7 @@ public final class StringUtil {
         * @param s
         * @return CRC-Prüfsumme
         */
-  public static int getCRC(String s) {
+       public static int getCRC(String s) {
                int h = 0;
                char val[] = s.toCharArray();
                int len = val.length;
@@ -710,20 +699,20 @@ public final class StringUtil {
         * @return index des Satzendes, oder -1
         */
        public static int findEndOfSentence(String text, int startIndex) {
-     while (true) {
-       int i = text.indexOf('.', startIndex);
-       if (i < 0) return -1;
-       if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
-          (i+1 >= text.length()
-          || text.charAt(i+1) == ' '
-          || text.charAt(i+1) == '\n'
-          || text.charAt(i+1) == '\t'))
-          return i+1;
-       startIndex = i+1;
-     }
-  }
+                while (true) {
+                        int i = text.indexOf('.', startIndex);
+                        if (i < 0) return -1;
+                        if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
+                                       (i+1 >= text.length()
+                                       || text.charAt(i+1) == ' '
+                                       || text.charAt(i+1) == '\n'
+                                       || text.charAt(i+1) == '\t'))
+                                       return i+1;
+                        startIndex = i+1;
+                }
+       }
 
-       /**
+               /**
         * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
         * @param text
         * @param startIndex
@@ -737,35 +726,6 @@ public final class StringUtil {
                return Math.min(i, j);
        }
 
-       /**
-        * Diese Routine macht aus links in reinem text browsbare links
-        * @param text
-        * @return Konvertierter String
-        */
-       public static String makeLinks(String text) {
-               int i = 0;
-               StringBuffer buf = new StringBuffer(text.length());
-               while (true) {
-                       int j = text.indexOf("http://", i);
-                       if (j < 0) {
-                               buf.append(text.substring(i));
-                               break;
-                       } else {
-                               buf.append(text.substring(i, j));
-                               int k = findEndOfWord(text,j+7);
-                               String url="";
-                               if (k<0)        url = text.substring(j);
-                               else            url = text.substring(j,k);
-
-                               buf.append("<a href=\042"+url+"\042>"+url+"</a>");
-                               //System.out.println("url mark: " + url);
-                               i = j+url.length();
-                       }
-               }
-               return buf.toString();
-
-       }
-
 
        /**
         *  convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
@@ -773,12 +733,7 @@ public final class StringUtil {
         *  nur sinnvoll, wenn text nicht im html-format eingegeben
         */
        public static String convertNewline2P(String haystack) {
-               try {
-                       RE regex = new RE("(<br>\r?\n<br>){1,}");
-                       return regex.substituteAll(haystack,"\n</p><p>");
-               } catch(REException ex){
-                       return null;
-               }
+                       return re_brbr2p.substituteAll(haystack,"\n</p><p>");
        }
 
        /**
@@ -787,12 +742,7 @@ public final class StringUtil {
         *  nur sinnvoll, wenn text nicht im html-format eingegeben
         */
        public static String convertNewline2Break(String haystack) {
-               try {
-                       RE regex = new RE("(\r?\n){1}");
-                       return regex.substituteAll(haystack,"$0<br>");
-               } catch(REException ex){
-                       return null;
-               }
+               return re_newline2br.substituteAll(haystack,"$0<br />");
        }
 
        /**
@@ -801,12 +751,7 @@ public final class StringUtil {
         *  nur sinnvoll, wenn text nicht im html-format eingegeben
         */
        public static String createMailLinks(String haystack) {
-               try {
-                       RE regex = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
-                       return regex.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
-               } catch(REException ex){
-                       return null;
-               }
+                       return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
        }
 
 
@@ -816,12 +761,7 @@ public final class StringUtil {
         *  nur sinnvoll, wenn text nicht im html-format eingegeben
         */
        public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
-               try {
-                       RE regex = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
-                       return regex.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
-               } catch(REException ex){
-                       return null;
-               }
+               return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
        }
 
 
@@ -831,35 +771,45 @@ public final class StringUtil {
         *  nur sinnvoll, wenn text nicht im html-format eingegeben
         */
        public static String createURLLinks(String haystack) {
-               try {
-                       //dieser Ausdruck brauch dringend fachliche Beratung
-                       RE regex = new RE("((https://)|(http://)|(ftp://))+([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/([^ \t\r\n<>]+[^ \t\r\n.,<>])");
-                       return regex.substituteAll(haystack,"<a href=\"$0\">$0</a>");
-               } catch(REException ex){
-                       return null;
-               }
+               return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
        }
 
        /**
-        *  createURLLinks wandelt text im url-format
-        *  in einen klickbaren link um
-        *  nur sinnvoll, wenn text nicht im html-format eingegeben
-        */
-       public static String createURLLinks(String haystack,String imageRoot,String extImage,String intImage) {
-               try {
-                       //dieser Ausdruck brauch dringend fachliche Beratung
-                       RE regex = new RE("((https://)|(http://)|(ftp://))+([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>]+[^ \t\r\n.,<>])");
-                       return regex.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
-               } catch(REException ex){
-                       return null;
+        * this routine takes text in url format and makes
+        * a clickaeble "<href>" link removing any "illegal" html tags
+        * @param haystack, the url
+        * @param title, the href link text
+        * @param imagRoot, the place to find icons
+        * @param extImage, the url of the icon to show next to the link
+        * @return a String containing the url
+        */
+       public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
+               if (title == null) {
+                       return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
+               } else {
+                       title = removeHTMLTags(title);
+                       return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">"+title+"</a>");
                }
        }
 
        /**
+        * this routine takes text in url format and makes
+        * a clickaeble "<href>" link removing any "illegal" html tags
+        * @param haystack, the url
+        * @param imageRoot, the place to find icons
+        * @param extImage, the url of the icon to show next to the link
+        * @param intImage, unused
+        * @return a String containing the url
+        */
+       public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
+               return createURLLinks(haystack, title, imageRoot, extImage);
+       }
+
+        /**
         *  deleteForbiddenTags
         *  this method deletes all <script>, <body> and <head>-tags
         */
-       public static String deleteForbiddenTags(String haystack) {
+       public static final String deleteForbiddenTags(String haystack) {
                try {
                        RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
                        haystack = regex.substituteAll(haystack,"");
@@ -874,6 +824,49 @@ public final class StringUtil {
        }
 
        /**
+        * this method deletes all html tags
+        */
+       public static final String removeHTMLTags(String haystack){
+                       return re_tags.substituteAll(haystack,"");
+       }
+
+
+       /**
+        * this method deletes all but the approved tags html tags
+        * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
+        */
+       public static String approveHTMLTags(String haystack){
+               try {
+                       String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
+                       String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
+                       String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
+
+                       // kill all the bad tags that have attributes
+                       String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
+                       RE regex = new RE(s,RE.REG_ICASE);
+                       haystack = regex.substituteAll(haystack,"");
+
+                       // kill all the bad tags that are attributeless
+                       regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
+                       haystack = regex.substituteAll(haystack,"");
+
+                       // kill all the tags which have a javascript attribute like onLoad
+                       regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
+                       haystack = regex.substituteAll(haystack,"");
+
+                       // kill all the tags which include a url to an unacceptable protocol
+                       regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
+                       haystack = regex.substituteAll(haystack,"");
+
+                       return haystack;
+               } catch(REException ex){
+                       ex.printStackTrace();
+                       return null;
+               }
+       }
+
+
+       /**
         *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
         *  htmlcodierten string auf und returnt einen htmlcodierten String
         */
@@ -885,6 +878,7 @@ public final class StringUtil {
                return content;
        }
 
+
        /**
         *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
         *  htmlcodierten string auf und returnt einen htmlcodierten String
@@ -893,8 +887,9 @@ public final class StringUtil {
                content=convertNewline2Break(content);
                content=convertNewline2P(content);
                content=createMailLinks(content,producerDocRoot,mailImage);
-               content=createURLLinks(content,producerDocRoot,extImage,intImage);
+               content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
                return content;
        }
 
 }
+