/*
- * Copyright (C) 2001, 2002 The Mir-coders group
+ * Copyright (C) 2005 The Mir-coders group
*
* This file is part of Mir.
*
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, The Mir-coders gives permission to link
- * the code of this program with any library licensed under the Apache Software License,
- * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
- * (or with modified versions of the above that use the same license as the above),
- * and distribute linked combinations including the two. You must obey the
- * GNU General Public License in all respects for all of the code used other than
- * the above mentioned libraries. If you modify this file, you may extend this
+ * the code of this program with any library licensed under the Apache Software License.
+ * You must obey the GNU General Public License in all respects for all of the code used
+ * other than the above mentioned libraries. If you modify this file, you may extend this
* exception to your version of the file, but you are not obligated to do so.
* If you do not wish to do so, delete this exception statement from your version.
*/
import gnu.regexp.RE;
import gnu.regexp.REException;
-import java.io.File;
import java.text.NumberFormat;
import java.util.Calendar;
-import java.util.*;
-import java.util.StringTokenizer;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.TimeZone;
/**
- * Statische Hilfsmethoden zur Stringbehandlung
- *
- * @version $Id: StringUtil.java,v 1.33.2.1 2003/06/23 15:24:06 zapata Exp $
- * @author mir-coders group
- *
*/
public final class StringUtil {
+ private static TimeZone UTC = TimeZone.getTimeZone("UTC");
private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags,
re_tables, re_forbiddenTags;
//precompile regex
re_newline2br = new RE("(\r?\n){1}");
re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
- re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
+ re_mail = new RE("\\b([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)\\b");
re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
re_tags = new RE("<[^>]*>",RE.REG_ICASE);
re_tables = new RE("<[ \t\r\n/]*(table|td|tr)[ \t\r\n]*>",RE.REG_ICASE);
- re_forbiddenTags = new RE("<[ \t\r\n/]*(body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
+ re_forbiddenTags = new RE("<[ \t\r\n/]*(html|meta|body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
}
catch (REException e){
System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
+
return webdbDate.toString();
}
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
+
return webdbDate.toString();
}
webdbDate.append("-");
webdbDate.append(pad2(theDate.get(Calendar.DATE)));
webdbDate.append("T");
- webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
+ webdbDate.append(pad2(theDate.get(Calendar.HOUR_OF_DAY)));
webdbDate.append(":");
webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
webdbDate.append(":");
}
/**
- * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
- * @param s
+ * Replaces in <code>haystack</code> matching <code>pattern</code> by <code>substitute</code>
+ * @param haystack
* @param pattern
* @param substitute
- * @return String mit den Ersetzungen
+ * @return String with replacements.
*/
public static String regexpReplace(String haystack, String pattern, String substitute) {
try {
* in den html-tag <p>
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String convertNewline2P(String haystack) {
+ private static String convertNewline2P(String haystack) {
return re_brbr2p.substituteAll(haystack,"\n</p><p>");
}
* in den html-tag <br>
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String convertNewline2Break(String haystack) {
+ private static String convertNewline2Break(String haystack) {
return re_newline2br.substituteAll(haystack,"$0<br />");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createMailLinks(String haystack) {
+ private static String createMailLinks(String haystack) {
return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
+ private static String createMailLinks(String haystack, String imageRoot, String mailImage) {
return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
}
* in einen klickbaren link um
* nur sinnvoll, wenn text nicht im html-format eingegeben
*/
- public static String createURLLinks(String haystack) {
+ private static String createURLLinks(String haystack) {
return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
}
/**
* this routine takes text in url format and makes
* a clickaeble "<href>" link removing any "illegal" html tags
- * @param haystack, the url
- * @param title, the href link text
- * @param imagRoot, the place to find icons
- * @param extImage, the url of the icon to show next to the link
+ * @param haystack the url
+ * @param title the href link text
+ * @param imageRoot the place to find icons
+ * @param extImage the url of the icon to show next to the link
* @return a String containing the url
*/
- public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
+ private static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
if (title == null) {
return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
- } else {
- title = removeHTMLTags(title);
- return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
}
+ title = removeHTMLTags(title);
+ return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
}
/**
* this routine takes text in url format and makes
* a clickaeble "<href>" link removing any "illegal" html tags
- * @param haystack, the url
- * @param imageRoot, the place to find icons
- * @param extImage, the url of the icon to show next to the link
- * @param intImage, unused
+ * @param haystack the url
+ * @param imageRoot the place to find icons
+ * @param extImage the url of the icon to show next to the link
+ * @param intImage unused
* @return a String containing the url
*/
- public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
+ private static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
return createURLLinks(haystack, title, imageRoot, extImage);
}
/**
* this method deletes all html tags
*/
- public static final String removeHTMLTags(String haystack){
+ public static String removeHTMLTags(String haystack){
return re_tags.substituteAll(haystack,"");
}
- /**
- * this method deletes all but the approved tags html tags
- * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
- */
- public static String approveHTMLTags(String haystack){
- try {
- String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
- String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
- String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
-
- // kill all the bad tags that have attributes
- String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
- RE regex = new RE(s,RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the bad tags that are attributeless
- regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which have a javascript attribute like onLoad
- regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which include a url to an unacceptable protocol
- regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- return haystack;
- } catch(REException ex){
- ex.printStackTrace();
- return null;
- }
- }
-
/**
* createHTML ruft alle regex-methoden zum unwandeln eines nicht
public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
content=convertNewline2Break(content);
content=convertNewline2P(content);
- content=createMailLinks(content,producerDocRoot,mailImage);
- content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
+ content=createMailLinks(content, producerDocRoot,mailImage);
+ content=createURLLinks(content, null, producerDocRoot, extImage, intImage);
+
return content;
}
* Converts mir's horrible internal date format (yyyy-MM-dd HH:mm:ss+zz) into a java Date
*
* @param anInternalDate
- * @return
*/
public static Date convertMirInternalDateToDate(String anInternalDate) {
Calendar calendar = new GregorianCalendar();
seconds = Integer.parseInt(anInternalDate.substring(17,19));
timezoneOffset = Integer.parseInt(anInternalDate.substring(20,22));
- if (anInternalDate.charAt(19) == '-')
+ if (anInternalDate.charAt(19) == '-') {
timezoneOffset = -timezoneOffset;
+ }
- calendar.setTimeZone(TimeZone.getTimeZone("UTC"));
+ calendar.setTimeZone(UTC);
calendar.set(year, month-1, day, hours, minutes, seconds);
calendar.add(Calendar.HOUR, -timezoneOffset);