/*
- * Copyright (C) 2001, 2002 The Mir-coders group
+ * Copyright (C) 2005 The Mir-coders group
*
* This file is part of Mir.
*
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, The Mir-coders gives permission to link
- * the code of this program with the com.oreilly.servlet library, any library
- * licensed under the Apache Software License, The Sun (tm) Java Advanced
- * Imaging library (JAI), The Sun JIMI library (or with modified versions of
- * the above that use the same license as the above), and distribute linked
- * combinations including the two. You must obey the GNU General Public
- * License in all respects for all of the code used other than the above
- * mentioned libraries. If you modify this file, you may extend this exception
- * to your version of the file, but you are not obligated to do so. If you do
- * not wish to do so, delete this exception statement from your version.
+ * the code of this program with any library licensed under the Apache Software License.
+ * You must obey the GNU General Public License in all respects for all of the code used
+ * other than the above mentioned libraries. If you modify this file, you may extend this
+ * exception to your version of the file, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
*/
-
package mir.misc;
-import java.io.*;
-import java.lang.*;
-import java.util.*;
-import java.text.NumberFormat;
-import gnu.regexp.*;
+import gnu.regexp.RE;
+import gnu.regexp.REException;
+
+import java.text.NumberFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
/**
- * Statische Hilfsmethoden zur Stringbehandlung
- *
- * @version $Id: StringUtil.java,v 1.30 2002/12/14 01:37:43 zapata Exp $
- * @author mir-coders group
- *
*/
public final class StringUtil {
- private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
+ private static TimeZone UTC = TimeZone.getTimeZone("UTC");
+ private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags,
+ re_tables, re_forbiddenTags;
private StringUtil() { } // this avoids contruction
//precompile regex
re_newline2br = new RE("(\r?\n){1}");
re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
- re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
+ re_mail = new RE("\\b([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)\\b");
re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
re_tags = new RE("<[^>]*>",RE.REG_ICASE);
+ re_tables = new RE("<[ \t\r\n/]*(table|td|tr)[ \t\r\n]*>",RE.REG_ICASE);
+ re_forbiddenTags = new RE("<[ \t\r\n/]*(html|meta|body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
}
catch (REException e){
System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
+
return webdbDate.toString();
}
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
+
return webdbDate.toString();
}
webdbDate.append("-");
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
webdbDate.append("T");
- webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
+ webdbDate.append(pad2(theDate.get(Calendar.HOUR_OF_DAY)));
webdbDate.append(":");
webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
webdbDate.append(":");
* this method deletes all <script>, <body> and <head>-tags
*/
public static final String deleteForbiddenTags(String haystack) {
- try {
- RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
- regex = new RE("<head>(.*?)</head>");
- haystack = regex.substituteAll(haystack,"");
- regex = new RE("<[ \t\r\n/]*body(.*?)>");
- haystack = regex.substituteAll(haystack,"");
- return haystack;
- } catch(REException ex){
- return null;
- }
+ return re_forbiddenTags.substituteAll(haystack,"");
+ }
+
+ /**
+ * deleteHTMLTableTags
+ * this method deletes all <table>, <tr> and <td>-tags
+ */
+ public static final String deleteHTMLTableTags(String haystack) {
+ return re_tables.substituteAll(haystack,"");
}
/**
}
/**
- * converts string from format: yyyy-mm-dd__hh:mm:ss.d
- * to dd.mm.yyyy hh:mm
- */
- public static String dateToReadableDate(String date) {
- StringBuffer returnDate = new StringBuffer();
- if (date!=null) {
-
- returnDate.append(date.substring(8,10)).append('.');
- returnDate.append(date.substring(5,7)).append('.');
- returnDate.append(date.substring(0,4)).append(' ');
- returnDate.append(date.substring(11,16));
- }
- return returnDate.toString();
- }
-
- /**
- * converts string from format: yyyy-mm-dd__hh:mm:ss.d
- * to yyyy
- */
- public static String dateToYear (String date) {
- StringBuffer returnDate = new StringBuffer();
- if (date!=null) {
-
- returnDate.append(date.substring(0,4));
- }
- return returnDate.toString();
- }
-
- /**
- * converts string from format: yyyy-mm-dd__hh:mm:ss.d
- * to [m]m
- */
- public static String dateToMonth (String date) {
- StringBuffer returnDate = new StringBuffer();
- if (date!=null) {
- if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
- else returnDate.append(date.substring(6,7));
- }
- return returnDate.toString();
- }
-
- /**
- * converts string from format: yyyy-mm-dd__hh:mm:ss.d
- * to [d]d
- */
- public static String dateToDayOfMonth (String date) {
- StringBuffer returnDate = new StringBuffer();
- if (date!=null) {
- if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
- else returnDate.append(date.substring(9,10));
- }
- return returnDate.toString();
- }
-
- /**
- * converts string from format: yyyy-mm-dd__hh:mm:ss.d
- * to hh:mm
- */
- public static String dateToTime (String date) {
- StringBuffer returnDate = new StringBuffer();
- if (date!=null) {
- returnDate.append(date.substring(11,16));
- }
- return returnDate.toString();
- }
-
- /**
- * Splits the provided CSV text into a list. stolen wholesale from
- * from Jakarta Turbine StrinUtils.java -mh
- *
- * @param text The CSV list of values to split apart.
- * @param separator The separator character.
- * @return The list of values.
- */
- public static String[] split(String text, String separator)
- {
- StringTokenizer st = new StringTokenizer(text, separator);
- String[] values = new String[st.countTokens()];
- int pos = 0;
- while (st.hasMoreTokens())
- {
- values[pos++] = st.nextToken();
- }
- return values;
- }
-
- /**
- * Joins the elements of the provided array into a single string
- * containing a list of CSV elements. Stolen wholesale from Jakarta
- * Turbine StringUtils.java. -mh
- *
- * @param list The list of values to join together.
- * @param separator The separator character.
- * @return The CSV text.
- */
- public static String join(String[] list, String separator)
- {
- StringBuffer csv = new StringBuffer();
- for (int i = 0; i < list.length; i++)
- {
- if (i > 0)
- {
- csv.append(separator);
- }
- csv.append(list[i]);
- }
- return csv.toString();
- }
-
-
- /**
- * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
- *
- * @return gequoteter String
- */
- public static String quoteIfNecessary(String s) {
- for (int i = 0; i < s.length(); i++)
- if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
- return quote(s, '"');
- return s;
- }
-
- /**
- * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
- * "gefährliche" Zeichen innerhalb des Strings
- * Quotes special SQL-characters in <code>s</code>
- *
- * @return geqoteter String
- */
-/*
- public static String quote(String s)
- {
- //String s2 = quote(s, '\'');
- //Quickhack ÊÊ Ê Ê Ê Ê Ê Ê
- //Because of '?-Bug in Postgresql-JDBC-Driver
- StringBuffer temp = new StringBuffer();
- for(int i=0;i<s.length();i++){
- if(s.charAt(i)=='\''){
- temp.append("'");
- } else {
- temp.append(s.charAt(i));
- }
- }
- String s2 = temp.toString();
- //end Quickhack
-
- s2 = quote(s2, '\"');
- return s2;
- }
-*/
- /**
- * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
- * "gefährliche" Zeichen innerhalb des Strings
- *
- * @param s String, der gequoted werden soll
- * @param quoteChar zu quotendes Zeichen
- * @return gequoteter String
- */
- public static String quote(String s, char quoteChar)
- {
- StringBuffer buf = new StringBuffer(s.length());
- int pos = 0;
- while (pos < s.length()) {
- int i = s.indexOf(quoteChar, pos);
- if (i < 0) i = s.length();
- buf.append(s.substring(pos, i));
- pos = i;
- if (pos < s.length()) {
- buf.append('\\');
- buf.append(quoteChar);
- pos++;
- }
- }
- return buf.toString();
- }
-
- /**
- * replaces dangerous characters in <code>s</code>
- *
- */
-
- public static String unquote(String s)
- {
- char quoteChar='\'';
- StringBuffer buf = new StringBuffer(s.length());
- int pos = 0;
- String searchString = "\\"+quoteChar;
- while (pos < s.length()) {
- int i = s.indexOf(searchString, pos);
- if (i < 0) i = s.length();
- buf.append(s.substring(pos, i));
- pos = i+1;
- }
- return buf.toString();
- }
-
- /**
- * Wandelet String in byte[] um.
- * @param s
- * @return byte[] des String
- */
-
- public static byte[] stringToBytes(String s) {
- String crlf = System.getProperty("line.separator");
- if (!crlf.equals("\n"))
- s = replace(s, "\n", crlf);
- // byte[] buf = new byte[s.length()];
- byte[] buf = s.getBytes();
- return buf;
- }
-
- /**
- * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
- * @param s
- * @param pattern
- * @param substitute
- * @return String mit den Ersetzungen
- */
- public static String replace(String s, String pattern, String substitute) {
- int i = 0, pLen = pattern.length(), sLen = substitute.length();
- StringBuffer buf = new StringBuffer(s.length());
- while (true) {
- int j = s.indexOf(pattern, i);
- if (j < 0) {
- buf.append(s.substring(i));
- break;
- } else {
- buf.append(s.substring(i, j));
- buf.append(substitute);
- i = j+pLen;
- }
- }
- return buf.toString();
- }
-
- /**
- * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
- * @param s
+ * Replaces in <code>haystack</code> matching <code>pattern</code> by <code>substitute</code>
+ * @param haystack
* @param pattern
* @param substitute
- * @return String mit den Ersetzungen
+ * @return String with replacements.
*/
public static String regexpReplace(String haystack, String pattern, String substitute) {
try {
}
}
-
-
-
- /**
- * Fügt einen Separator an den Pfad an
- * @param path
- * @return Pfad mit Separator am Ende
- */
- public static final String addSeparator (String path) {
- return path.length() == 0 || path.endsWith(File.separator) ? path : path
- + File.separatorChar;
- }
-
- /**
- * Fügt ein <code>/</code> ans ende des Strings and
- * @param path
- * @return Pfad mit <code>/</code> am Ende
- */
- public static final String addSlash (String path) {
- return path.length() == 0 || path.endsWith("/") ? path : path + '/';
- }
-
/**
- * Löscht <code>/</code> am Ende des Strings, falls vorhanden
+ * L?scht <code>/</code> am Ende des Strings, falls vorhanden
* @param path
* @return String ohne <code>/</code> am Ende
*/
}
/**
- * Checks to see if the path is absolute by looking for a leading file
- * separater
- * @param path
- * @return
- */
- public static boolean isAbsolutePath (String path) {
- return path.startsWith(File.separator);
- }
-
- /**
- * Löscht Slash am Anfang des Strings
- * @param path
- * @return
- */
- public static String removeFirstSlash (String path) {
- return path.startsWith("/") ? path.substring(1) : path;
- }
-
- /**
* formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
* @return zwistellige Zahl
*/
}
/**
- * Konvertiert Unix-Linefeeds in Win-Linefeeds
- * @param s
- * @return Konvertierter String
- */
- public static String unixLineFeedsToWin(String s) {
- int i = -1;
- while (true) {
- i = s.indexOf('\n', i+1);
- if (i < 0) break;
- if ((i == 0 || s.charAt(i-1) != '\r') &&
- (i == s.length()-1 || s.charAt(i+1) != '\r')) {
- s = s.substring(0, i)+'\r'+s.substring(i);
- i++;
- }
- }
- return s;
- }
-
- public static String replaceQuot(String s) {
- StringBuffer buffer = new StringBuffer();
- for(int j = 0; j < s.length();j++){
- if(s.charAt(j)=='&'){
- if(s.indexOf( """,j) == j) {
- buffer.append( "\"" );
- j += 5;
- }//if
- } else {
- buffer.append(s.charAt(j));
- }//else
- }//for
- return buffer.toString();
- }
-
- /**
- * schnellere Variante der String.toLowerCase()-Routine
- *
- * @return String in Kleinbuchsten
- */
- public static String toLowerCase(String s) {
- int l = s.length();
- char[] a = new char[l];
- for (int i = 0; i < l; i++)
- a[i] = Character.toLowerCase(s.charAt(i));
- return new String(a);
- }
-
- /**
- * Findet <code>element</code> im String-Array <code>array</code>
- * @param array
- * @param element
- * @return Fundstelle als int oder -1
- */
- public static int indexOf(String[] array, String element) {
- if (array != null)
- for (int i = 0; i < array.length; i++)
- if (array[i].equals(element))
- return i;
- return -1;
- }
-
- /**
- * Testet auf Vorkommen von <code>element</code> in <code>array</code>
- * @param array String-Array
- * @param element
- * @return true wenn <code>element</code> vorkommt, sonst false
- */
- public static boolean contains(String[] array, String element) {
- return indexOf(array, element) >= 0;
- }
-
- /**
- * Ermittelt CRC-Prüfsumme von String <code>s</code>
- * @param s
- * @return CRC-Prüfsumme
- */
- public static int getCRC(String s) {
- int h = 0;
- char val[] = s.toCharArray();
- int len = val.length;
-
- for (int i = 0 ; i < len; i++) {
- h &= 0x7fffffff;
- h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
- }
-
- return (h << 8) | (len & 0xff);
- }
-
- /**
- * Liefert Default-Wert def zurück, wenn String <code>s</code>
+ * Liefert Default-Wert def zur?ck, wenn String <code>s</code>
* kein Integer ist.
*
* @param s
}
}
- /**
- * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
- * @param s
- * @param def
- * @return geparster float oder def
- */
- public static float parseFloat(String s, float def) {
- if (s == null) return def;
- try {
- return new Float(s).floatValue();
- } catch (NumberFormatException e) {
- return def;
- }
- }
-
- /**
- * Findet Ende eines Satzes in String <code>text</code>
- * @param text
- * @param startIndex
- * @return index des Satzendes, oder -1
- */
- public static int findEndOfSentence(String text, int startIndex) {
- while (true) {
- int i = text.indexOf('.', startIndex);
- if (i < 0) return -1;
- if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
- (i+1 >= text.length()
- || text.charAt(i+1) == ' '
- || text.charAt(i+1) == '\n'
- || text.charAt(i+1) == '\t'))
- return i+1;
- startIndex = i+1;
- }
- }
-
- /**
- * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
- * @param text
- * @param startIndex
- * @return Index des Wortendes, oder -1
- */
- public static int findEndOfWord(String text, int startIndex) {
- int i = text.indexOf(' ', startIndex),
- j = text.indexOf('\n', startIndex);
- if (i < 0) i = text.length();
- if (j < 0) j = text.length();
- return Math.min(i, j);
- }
-
/**
* convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
* in den html-tag <p>
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String convertNewline2P(String haystack) {
+ private static String convertNewline2P(String haystack) {
return re_brbr2p.substituteAll(haystack,"\n</p><p>");
}
* in den html-tag <br>
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String convertNewline2Break(String haystack) {
+ private static String convertNewline2Break(String haystack) {
return re_newline2br.substituteAll(haystack,"$0<br />");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createMailLinks(String haystack) {
+ private static String createMailLinks(String haystack) {
return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
+ private static String createMailLinks(String haystack, String imageRoot, String mailImage) {
return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createURLLinks(String haystack) {
+ private static String createURLLinks(String haystack) {
return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
}
/**
* this routine takes text in url format and makes
* a clickaeble "<href>" link removing any "illegal" html tags
- * @param haystack, the url
- * @param title, the href link text
- * @param imagRoot, the place to find icons
- * @param extImage, the url of the icon to show next to the link
+ * @param haystack the url
+ * @param title the href link text
+ * @param imageRoot the place to find icons
+ * @param extImage the url of the icon to show next to the link
* @return a String containing the url
*/
- public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
+ private static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
if (title == null) {
return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
- } else {
- title = removeHTMLTags(title);
- return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
}
+ title = removeHTMLTags(title);
+ return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
}
/**
* this routine takes text in url format and makes
* a clickaeble "<href>" link removing any "illegal" html tags
- * @param haystack, the url
- * @param imageRoot, the place to find icons
- * @param extImage, the url of the icon to show next to the link
- * @param intImage, unused
+ * @param haystack the url
+ * @param imageRoot the place to find icons
+ * @param extImage the url of the icon to show next to the link
+ * @param intImage unused
* @return a String containing the url
*/
- public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
+ private static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
return createURLLinks(haystack, title, imageRoot, extImage);
}
/**
* this method deletes all html tags
*/
- public static final String removeHTMLTags(String haystack){
+ public static String removeHTMLTags(String haystack){
return re_tags.substituteAll(haystack,"");
}
- /**
- * this method deletes all but the approved tags html tags
- * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
- */
- public static String approveHTMLTags(String haystack){
- try {
- String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
- String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
- String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
-
- // kill all the bad tags that have attributes
- String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
- RE regex = new RE(s,RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the bad tags that are attributeless
- regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which have a javascript attribute like onLoad
- regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which include a url to an unacceptable protocol
- regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- return haystack;
- } catch(REException ex){
- ex.printStackTrace();
- return null;
- }
- }
-
/**
* createHTML ruft alle regex-methoden zum unwandeln eines nicht
public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
content=convertNewline2Break(content);
content=convertNewline2P(content);
- content=createMailLinks(content,producerDocRoot,mailImage);
- content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
+ content=createMailLinks(content, producerDocRoot,mailImage);
+ content=createURLLinks(content, null, producerDocRoot, extImage, intImage);
+
return content;
}
+ /**
+ * Converts mir's horrible internal date format (yyyy-MM-dd HH:mm:ss+zz) into a java Date
+ *
+ * @param anInternalDate
+ */
+ public static Date convertMirInternalDateToDate(String anInternalDate) {
+ Calendar calendar = new GregorianCalendar();
+
+ int year;
+ int month;
+ int day;
+ int hours;
+ int minutes;
+ int seconds;
+ int timezoneOffset;
+
+ year = Integer.parseInt(anInternalDate.substring(0,4));
+ month = Integer.parseInt(anInternalDate.substring(5,7));
+ day = Integer.parseInt(anInternalDate.substring(8,10));
+ hours = Integer.parseInt(anInternalDate.substring(11,13));
+ minutes = Integer.parseInt(anInternalDate.substring(14,16));
+ seconds = Integer.parseInt(anInternalDate.substring(17,19));
+
+ timezoneOffset = Integer.parseInt(anInternalDate.substring(20,22));
+ if (anInternalDate.charAt(19) == '-') {
+ timezoneOffset = -timezoneOffset;
+ }
+
+ calendar.setTimeZone(UTC);
+ calendar.set(year, month-1, day, hours, minutes, seconds);
+ calendar.add(Calendar.HOUR, -timezoneOffset);
+
+ return calendar.getTime();
+ }
+
}