2 * Copyright (C) 2001, 2002 The Mir-coders group
\r
4 * This file is part of Mir.
\r
6 * Mir is free software; you can redistribute it and/or modify
\r
7 * it under the terms of the GNU General Public License as published by
\r
8 * the Free Software Foundation; either version 2 of the License, or
\r
9 * (at your option) any later version.
\r
11 * Mir is distributed in the hope that it will be useful,
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
\r
14 * GNU General Public License for more details.
\r
16 * You should have received a copy of the GNU General Public License
\r
17 * along with Mir; if not, write to the Free Software
\r
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
20 * In addition, as a special exception, The Mir-coders gives permission to link
\r
21 * the code of this program with any library licensed under the Apache Software License,
\r
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
\r
23 * (or with modified versions of the above that use the same license as the above),
\r
24 * and distribute linked combinations including the two. You must obey the
\r
25 * GNU General Public License in all respects for all of the code used other than
\r
26 * the above mentioned libraries. If you modify this file, you may extend this
\r
27 * exception to your version of the file, but you are not obligated to do so.
\r
28 * If you do not wish to do so, delete this exception statement from your version.
\r
32 import java.text.NumberFormat;
\r
33 import java.util.Calendar;
\r
34 import java.util.Date;
\r
35 import java.util.GregorianCalendar;
\r
36 import java.util.TimeZone;
\r
38 import gnu.regexp.RE;
\r
39 import gnu.regexp.REException;
\r
42 * Statische Hilfsmethoden zur Stringbehandlung
\r
44 * @version $Id: StringUtil.java,v 1.33.2.3 2003/07/03 22:47:02 zapata Exp $
\r
45 * @author mir-coders group
\r
48 public final class StringUtil {
\r
50 private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags,
\r
51 re_tables, re_forbiddenTags;
\r
53 private StringUtil() { } // this avoids contruction
\r
58 re_newline2br = new RE("(\r?\n){1}");
\r
59 re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
\r
60 re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
\r
61 re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
\r
62 re_tags = new RE("<[^>]*>",RE.REG_ICASE);
\r
63 re_tables = new RE("<[ \t\r\n/]*(table|td|tr)[ \t\r\n]*>",RE.REG_ICASE);
\r
64 re_forbiddenTags = new RE("<[ \t\r\n/]*(html|meta|body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
\r
66 catch (REException e){
\r
67 System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
\r
72 * Formats a number with the specified minimum and maximum number of digits.
\r
74 public static synchronized String zeroPaddingNumber(long value, int minDigits,
\r
77 NumberFormat numberFormat = NumberFormat.getInstance();
\r
78 numberFormat.setMinimumIntegerDigits(minDigits);
\r
79 numberFormat.setMaximumIntegerDigits(maxDigits);
\r
80 return numberFormat.format(value);
\r
84 * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
\r
86 * @return 8-ziffriger String (yyyymmdd)
\r
89 public static final String date2webdbDate (GregorianCalendar theDate) {
\r
90 StringBuffer webdbDate = new StringBuffer();
\r
91 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
\r
92 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
\r
93 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
\r
94 return webdbDate.toString();
\r
98 * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
\r
100 * @return 12-ziffriger String (yyyymmdd)
\r
103 public static final String date2webdbDateTime (GregorianCalendar theDate) {
\r
104 StringBuffer webdbDate = new StringBuffer();
\r
105 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
\r
106 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
\r
107 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
\r
108 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
\r
109 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
\r
110 return webdbDate.toString();
\r
114 * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
\r
116 * @return w3approved datetime
\r
119 public static final String date2w3DateTime (GregorianCalendar theDate) {
\r
120 StringBuffer webdbDate = new StringBuffer();
\r
121 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
\r
122 webdbDate.append("-");
\r
123 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
\r
124 webdbDate.append("-");
\r
125 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
\r
126 webdbDate.append("T");
\r
127 webdbDate.append(pad2(theDate.get(Calendar.HOUR_OF_DAY)));
\r
128 webdbDate.append(":");
\r
129 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
\r
130 webdbDate.append(":");
\r
131 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
\r
132 //assumes you are an hour-multiple away from UTC....
\r
133 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
\r
135 webdbDate.append("-");
\r
138 webdbDate.append("+");
\r
140 webdbDate.append(pad2(Math.abs(offset)));
\r
141 webdbDate.append(":00");
\r
142 return webdbDate.toString();
\r
146 * wandelt Calendar in dd.mm.yyyy / hh.mm um
\r
148 * @return String mit (dd.mm.yyyy / hh.mm um)
\r
150 public static String date2readableDateTime (GregorianCalendar theDate) {
\r
151 String readable = "";
\r
153 readable += pad2(theDate.get(Calendar.DATE));
\r
154 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
\r
155 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
\r
156 hour = theDate.get(Calendar.HOUR);
\r
157 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
\r
159 readable += " / " + pad2(hour);
\r
160 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
\r
165 * deleteForbiddenTags
\r
166 * this method deletes all <script>, <body> and <head>-tags
\r
168 public static final String deleteForbiddenTags(String haystack) {
\r
169 return re_forbiddenTags.substituteAll(haystack,"");
\r
173 * deleteHTMLTableTags
\r
174 * this method deletes all <table>, <tr> and <td>-tags
\r
176 public static final String deleteHTMLTableTags(String haystack) {
\r
177 return re_tables.substituteAll(haystack,"");
\r
181 * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
\r
185 * @return String mit <code>/yyyy/mm/dd</code>
\r
187 public static final String webdbDate2path (String webdbDate) {
\r
188 StringBuffer path = new StringBuffer();
\r
189 path.append("/").append(webdbDate.substring(0, 4));
\r
190 path.append("/").append(webdbDate.substring(4, 6));
\r
193 //path.append("/").append(webdbDate.substring(6, 8));
\r
194 return path.toString();
\r
198 * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
\r
201 * @param substitute
\r
202 * @return String mit den Ersetzungen
\r
204 public static String regexpReplace(String haystack, String pattern, String substitute) {
\r
206 RE regex = new RE(pattern);
\r
207 return regex.substituteAll(haystack,substitute);
\r
208 } catch(REException ex){
\r
214 * L?scht <code>/</code> am Ende des Strings, falls vorhanden
\r
216 * @return String ohne <code>/</code> am Ende
\r
218 public static final String removeSlash (String path) {
\r
219 return path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
\r
224 * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
\r
225 * @return zwistellige Zahl
\r
227 public static String pad2 (int number) {
\r
228 return number < 10 ? "0" + number : String.valueOf(number);
\r
232 * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
\r
234 * @return 3-stellige Zahl
\r
236 public static String pad3 (int number) {
\r
237 return number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
\r
241 * Liefert Default-Wert def zur?ck, wenn String <code>s</code>
\r
242 * kein Integer ist.
\r
246 * @return geparster int aus s oder def
\r
248 public static int parseInt(String s, int def) {
\r
249 if (s == null) return def;
\r
251 return Integer.parseInt(s);
\r
252 } catch (NumberFormatException e) {
\r
259 * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
\r
260 * in den html-tag <p>
\r
261 * nur sinnvoll, wenn text nicht im html-format eingegeben
\r
263 public static String convertNewline2P(String haystack) {
\r
264 return re_brbr2p.substituteAll(haystack,"\n</p><p>");
\r
268 * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
\r
269 * in den html-tag <br>
\r
270 * nur sinnvoll, wenn text nicht im html-format eingegeben
\r
272 public static String convertNewline2Break(String haystack) {
\r
273 return re_newline2br.substituteAll(haystack,"$0<br />");
\r
277 * createMailLinks wandelt text im email-adressenformat
\r
278 * in einen klickbaren link um
\r
279 * nur sinnvoll, wenn text nicht im html-format eingegeben
\r
281 public static String createMailLinks(String haystack) {
\r
282 return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
\r
287 * createMailLinks wandelt text im email-adressenformat
\r
288 * in einen klickbaren link um
\r
289 * nur sinnvoll, wenn text nicht im html-format eingegeben
\r
291 public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
\r
292 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
\r
297 * createURLLinks wandelt text im url-format
\r
298 * in einen klickbaren link um
\r
299 * nur sinnvoll, wenn text nicht im html-format eingegeben
\r
301 public static String createURLLinks(String haystack) {
\r
302 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
\r
306 * this routine takes text in url format and makes
\r
307 * a clickaeble "<href>" link removing any "illegal" html tags
\r
308 * @param haystack, the url
\r
309 * @param title, the href link text
\r
310 * @param imagRoot, the place to find icons
\r
311 * @param extImage, the url of the icon to show next to the link
\r
312 * @return a String containing the url
\r
314 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
\r
315 if (title == null) {
\r
316 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
\r
318 title = removeHTMLTags(title);
\r
319 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
\r
324 * this routine takes text in url format and makes
\r
325 * a clickaeble "<href>" link removing any "illegal" html tags
\r
326 * @param haystack, the url
\r
327 * @param imageRoot, the place to find icons
\r
328 * @param extImage, the url of the icon to show next to the link
\r
329 * @param intImage, unused
\r
330 * @return a String containing the url
\r
332 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
\r
333 return createURLLinks(haystack, title, imageRoot, extImage);
\r
337 * this method deletes all html tags
\r
339 public static final String removeHTMLTags(String haystack){
\r
340 return re_tags.substituteAll(haystack,"");
\r
344 * this method deletes all but the approved tags html tags
\r
345 * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
\r
347 public static String approveHTMLTags(String haystack){
\r
349 String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
\r
350 String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
\r
351 String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
\r
353 // kill all the bad tags that have attributes
\r
354 String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
\r
355 RE regex = new RE(s,RE.REG_ICASE);
\r
356 haystack = regex.substituteAll(haystack,"");
\r
358 // kill all the bad tags that are attributeless
\r
359 regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
\r
360 haystack = regex.substituteAll(haystack,"");
\r
362 // kill all the tags which have a javascript attribute like onLoad
\r
363 regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
\r
364 haystack = regex.substituteAll(haystack,"");
\r
366 // kill all the tags which include a url to an unacceptable protocol
\r
367 regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
\r
368 haystack = regex.substituteAll(haystack,"");
\r
371 } catch(REException ex){
\r
372 ex.printStackTrace();
\r
379 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
\r
380 * htmlcodierten string auf und returnt einen htmlcodierten String
\r
382 public static String createHTML(String content){
\r
383 content=convertNewline2Break(content);
\r
384 content=convertNewline2P(content);
\r
385 content=createMailLinks(content);
\r
386 content=createURLLinks(content);
\r
392 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
\r
393 * htmlcodierten string auf und returnt einen htmlcodierten String
\r
395 public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
\r
396 content=convertNewline2Break(content);
\r
397 content=convertNewline2P(content);
\r
398 content=createMailLinks(content,producerDocRoot,mailImage);
\r
399 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
\r
404 * Converts mir's horrible internal date format (yyyy-MM-dd HH:mm:ss+zz) into a java Date
\r
406 * @param anInternalDate
\r
409 public static Date convertMirInternalDateToDate(String anInternalDate) {
\r
410 Calendar calendar = new GregorianCalendar();
\r
418 int timezoneOffset;
\r
420 year = Integer.parseInt(anInternalDate.substring(0,4));
\r
421 month = Integer.parseInt(anInternalDate.substring(5,7));
\r
422 day = Integer.parseInt(anInternalDate.substring(8,10));
\r
423 hours = Integer.parseInt(anInternalDate.substring(11,13));
\r
424 minutes = Integer.parseInt(anInternalDate.substring(14,16));
\r
425 seconds = Integer.parseInt(anInternalDate.substring(17,19));
\r
427 timezoneOffset = Integer.parseInt(anInternalDate.substring(20,22));
\r
428 if (anInternalDate.charAt(19) == '-')
\r
429 timezoneOffset = -timezoneOffset;
\r
431 calendar.setTimeZone(TimeZone.getTimeZone("UTC"));
\r
432 calendar.set(year, month-1, day, hours, minutes, seconds);
\r
433 calendar.add(Calendar.HOUR, -timezoneOffset);
\r
435 return calendar.getTime();
\r