2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with the com.oreilly.servlet library, any library
22 * licensed under the Apache Software License, The Sun (tm) Java Advanced
23 * Imaging library (JAI), The Sun JIMI library (or with modified versions of
24 * the above that use the same license as the above), and distribute linked
25 * combinations including the two. You must obey the GNU General Public
26 * License in all respects for all of the code used other than the above
27 * mentioned libraries. If you modify this file, you may extend this exception
28 * to your version of the file, but you are not obligated to do so. If you do
29 * not wish to do so, delete this exception statement from your version.
35 import gnu.regexp.REException;
38 import java.text.NumberFormat;
39 import java.util.Calendar;
40 import java.util.GregorianCalendar;
41 import java.util.StringTokenizer;
44 * Statische Hilfsmethoden zur Stringbehandlung
46 * @version $Id: StringUtil.java,v 1.32 2003/01/25 17:45:18 idfx Exp $
47 * @author mir-coders group
50 public final class StringUtil {
52 private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags,
53 re_tables, re_forbiddenTags;
55 private StringUtil() { } // this avoids contruction
60 re_newline2br = new RE("(\r?\n){1}");
61 re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
62 re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+)\\.([a-zA-Z0-9_.-]+)");
63 re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
64 re_tags = new RE("<[^>]*>",RE.REG_ICASE);
65 re_tables = new RE("<[ \t\r\n/]*(table|td|tr)[ \t\r\n]*>",RE.REG_ICASE);
66 re_forbiddenTags = new RE("<[ \t\r\n/]*(body|head|script)[ \t\r\n]*>",RE.REG_ICASE);
68 catch (REException e){
69 System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
74 * Formats a number with the specified minimum and maximum number of digits.
76 public static synchronized String zeroPaddingNumber(long value, int minDigits,
79 NumberFormat numberFormat = NumberFormat.getInstance();
80 numberFormat.setMinimumIntegerDigits(minDigits);
81 numberFormat.setMaximumIntegerDigits(maxDigits);
82 return numberFormat.format(value);
86 * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
88 * @return 8-ziffriger String (yyyymmdd)
91 public static final String date2webdbDate (GregorianCalendar theDate) {
92 StringBuffer webdbDate = new StringBuffer();
93 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
94 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
95 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
96 return webdbDate.toString();
100 * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
102 * @return 12-ziffriger String (yyyymmdd)
105 public static final String date2webdbDateTime (GregorianCalendar theDate) {
106 StringBuffer webdbDate = new StringBuffer();
107 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
108 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
109 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
110 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
111 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
112 return webdbDate.toString();
116 * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
118 * @return w3approved datetime
121 public static final String date2w3DateTime (GregorianCalendar theDate) {
122 StringBuffer webdbDate = new StringBuffer();
123 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
124 webdbDate.append("-");
125 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
126 webdbDate.append("-");
127 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
128 webdbDate.append("T");
129 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
130 webdbDate.append(":");
131 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
132 webdbDate.append(":");
133 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
134 //assumes you are an hour-multiple away from UTC....
135 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
137 webdbDate.append("-");
140 webdbDate.append("+");
142 webdbDate.append(pad2(Math.abs(offset)));
143 webdbDate.append(":00");
144 return webdbDate.toString();
148 * wandelt Calendar in dd.mm.yyyy / hh.mm um
150 * @return String mit (dd.mm.yyyy / hh.mm um)
152 public static String date2readableDateTime (GregorianCalendar theDate) {
153 String readable = "";
155 readable += pad2(theDate.get(Calendar.DATE));
156 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
157 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
158 hour = theDate.get(Calendar.HOUR);
159 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
161 readable += " / " + pad2(hour);
162 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
167 * deleteForbiddenTags
168 * this method deletes all <script>, <body> and <head>-tags
170 public static final String deleteForbiddenTags(String haystack) {
171 return re_forbiddenTags.substituteAll(haystack,"");
175 * deleteHTMLTableTags
176 * this method deletes all <table>, <tr> and <td>-tags
178 public static final String deleteHTMLTableTags(String haystack) {
179 return re_tables.substituteAll(haystack,"");
183 * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
187 * @return String mit <code>/yyyy/mm/dd</code>
189 public static final String webdbDate2path (String webdbDate) {
190 StringBuffer path = new StringBuffer();
191 path.append("/").append(webdbDate.substring(0, 4));
192 path.append("/").append(webdbDate.substring(4, 6));
195 //path.append("/").append(webdbDate.substring(6, 8));
196 return path.toString();
200 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
201 * to dd.mm.yyyy hh:mm
203 public static String dateToReadableDate(String date) {
204 StringBuffer returnDate = new StringBuffer();
207 returnDate.append(date.substring(8,10)).append('.');
208 returnDate.append(date.substring(5,7)).append('.');
209 returnDate.append(date.substring(0,4)).append(' ');
210 returnDate.append(date.substring(11,16));
212 return returnDate.toString();
216 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
219 public static String dateToYear (String date) {
220 StringBuffer returnDate = new StringBuffer();
223 returnDate.append(date.substring(0,4));
225 return returnDate.toString();
229 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
232 public static String dateToMonth (String date) {
233 StringBuffer returnDate = new StringBuffer();
235 if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
236 else returnDate.append(date.substring(6,7));
238 return returnDate.toString();
242 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
245 public static String dateToDayOfMonth (String date) {
246 StringBuffer returnDate = new StringBuffer();
248 if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
249 else returnDate.append(date.substring(9,10));
251 return returnDate.toString();
255 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
258 public static String dateToTime (String date) {
259 StringBuffer returnDate = new StringBuffer();
261 returnDate.append(date.substring(11,16));
263 return returnDate.toString();
267 * Splits the provided CSV text into a list. stolen wholesale from
268 * from Jakarta Turbine StrinUtils.java -mh
270 * @param text The CSV list of values to split apart.
271 * @param separator The separator character.
272 * @return The list of values.
274 public static String[] split(String text, String separator)
276 StringTokenizer st = new StringTokenizer(text, separator);
277 String[] values = new String[st.countTokens()];
279 while (st.hasMoreTokens())
281 values[pos++] = st.nextToken();
287 * Joins the elements of the provided array into a single string
288 * containing a list of CSV elements. Stolen wholesale from Jakarta
289 * Turbine StringUtils.java. -mh
291 * @param list The list of values to join together.
292 * @param separator The separator character.
293 * @return The CSV text.
295 public static String join(String[] list, String separator)
297 StringBuffer csv = new StringBuffer();
298 for (int i = 0; i < list.length; i++)
302 csv.append(separator);
306 return csv.toString();
310 * Wandelet String in byte[] um.
312 * @return byte[] des String
315 public static byte[] stringToBytes(String s) {
316 String crlf = System.getProperty("line.separator");
317 if (!crlf.equals("\n"))
318 s = replace(s, "\n", crlf);
319 // byte[] buf = new byte[s.length()];
320 byte[] buf = s.getBytes();
325 * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
329 * @return String mit den Ersetzungen
331 public static String replace(String s, String pattern, String substitute) {
332 int i = 0, pLen = pattern.length(), sLen = substitute.length();
333 StringBuffer buf = new StringBuffer(s.length());
335 int j = s.indexOf(pattern, i);
337 buf.append(s.substring(i));
340 buf.append(s.substring(i, j));
341 buf.append(substitute);
345 return buf.toString();
349 * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
353 * @return String mit den Ersetzungen
355 public static String regexpReplace(String haystack, String pattern, String substitute) {
357 RE regex = new RE(pattern);
358 return regex.substituteAll(haystack,substitute);
359 } catch(REException ex){
368 * F?gt einen Separator an den Pfad an
370 * @return Pfad mit Separator am Ende
372 public static final String addSeparator (String path) {
373 return path.length() == 0 || path.endsWith(File.separator) ? path : path
374 + File.separatorChar;
378 * F?gt ein <code>/</code> ans ende des Strings and
380 * @return Pfad mit <code>/</code> am Ende
382 public static final String addSlash (String path) {
383 return path.length() == 0 || path.endsWith("/") ? path : path + '/';
387 * L?scht <code>/</code> am Ende des Strings, falls vorhanden
389 * @return String ohne <code>/</code> am Ende
391 public static final String removeSlash (String path) {
392 return path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
397 * Checks to see if the path is absolute by looking for a leading file
402 public static boolean isAbsolutePath (String path) {
403 return path.startsWith(File.separator);
407 * L?scht Slash am Anfang des Strings
411 public static String removeFirstSlash (String path) {
412 return path.startsWith("/") ? path.substring(1) : path;
416 * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
417 * @return zwistellige Zahl
419 public static String pad2 (int number) {
420 return number < 10 ? "0" + number : String.valueOf(number);
424 * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
426 * @return 3-stellige Zahl
428 public static String pad3 (int number) {
429 return number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
433 * Konvertiert Unix-Linefeeds in Win-Linefeeds
435 * @return Konvertierter String
437 public static String unixLineFeedsToWin(String s) {
440 i = s.indexOf('\n', i+1);
442 if ((i == 0 || s.charAt(i-1) != '\r') &&
443 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
444 s = s.substring(0, i)+'\r'+s.substring(i);
452 * schnellere Variante der String.toLowerCase()-Routine
454 * @return String in Kleinbuchsten
456 public static String toLowerCase(String s) {
458 char[] a = new char[l];
459 for (int i = 0; i < l; i++)
460 a[i] = Character.toLowerCase(s.charAt(i));
461 return new String(a);
465 * Findet <code>element</code> im String-Array <code>array</code>
468 * @return Fundstelle als int oder -1
470 public static int indexOf(String[] array, String element) {
472 for (int i = 0; i < array.length; i++)
473 if (array[i].equals(element))
479 * Testet auf Vorkommen von <code>element</code> in <code>array</code>
480 * @param array String-Array
482 * @return true wenn <code>element</code> vorkommt, sonst false
484 public static boolean contains(String[] array, String element) {
485 return indexOf(array, element) >= 0;
489 * Ermittelt CRC-Pr?fsumme von String <code>s</code>
491 * @return CRC-Pr?fsumme
493 public static int getCRC(String s) {
495 char val[] = s.toCharArray();
496 int len = val.length;
498 for (int i = 0 ; i < len; i++) {
500 h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
503 return (h << 8) | (len & 0xff);
507 * Liefert Default-Wert def zur?ck, wenn String <code>s</code>
512 * @return geparster int aus s oder def
514 public static int parseInt(String s, int def) {
515 if (s == null) return def;
517 return Integer.parseInt(s);
518 } catch (NumberFormatException e) {
524 * Liefert Defaultwert def zur?ck, wenn s nicht zu einem float geparsed werden kann.
527 * @return geparster float oder def
529 public static float parseFloat(String s, float def) {
530 if (s == null) return def;
532 return new Float(s).floatValue();
533 } catch (NumberFormatException e) {
539 * Findet Ende eines Satzes in String <code>text</code>
542 * @return index des Satzendes, oder -1
544 public static int findEndOfSentence(String text, int startIndex) {
546 int i = text.indexOf('.', startIndex);
547 if (i < 0) return -1;
548 if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
549 (i+1 >= text.length()
550 || text.charAt(i+1) == ' '
551 || text.charAt(i+1) == '\n'
552 || text.charAt(i+1) == '\t'))
559 * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
562 * @return Index des Wortendes, oder -1
564 public static int findEndOfWord(String text, int startIndex) {
565 int i = text.indexOf(' ', startIndex),
566 j = text.indexOf('\n', startIndex);
567 if (i < 0) i = text.length();
568 if (j < 0) j = text.length();
569 return Math.min(i, j);
574 * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
575 * in den html-tag <p>
576 * nur sinnvoll, wenn text nicht im html-format eingegeben
578 public static String convertNewline2P(String haystack) {
579 return re_brbr2p.substituteAll(haystack,"\n</p><p>");
583 * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
584 * in den html-tag <br>
585 * nur sinnvoll, wenn text nicht im html-format eingegeben
587 public static String convertNewline2Break(String haystack) {
588 return re_newline2br.substituteAll(haystack,"$0<br />");
592 * createMailLinks wandelt text im email-adressenformat
593 * in einen klickbaren link um
594 * nur sinnvoll, wenn text nicht im html-format eingegeben
596 public static String createMailLinks(String haystack) {
597 return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
602 * createMailLinks wandelt text im email-adressenformat
603 * in einen klickbaren link um
604 * nur sinnvoll, wenn text nicht im html-format eingegeben
606 public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
607 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
612 * createURLLinks wandelt text im url-format
613 * in einen klickbaren link um
614 * nur sinnvoll, wenn text nicht im html-format eingegeben
616 public static String createURLLinks(String haystack) {
617 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
621 * this routine takes text in url format and makes
622 * a clickaeble "<href>" link removing any "illegal" html tags
623 * @param haystack, the url
624 * @param title, the href link text
625 * @param imagRoot, the place to find icons
626 * @param extImage, the url of the icon to show next to the link
627 * @return a String containing the url
629 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
631 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
633 title = removeHTMLTags(title);
634 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
639 * this routine takes text in url format and makes
640 * a clickaeble "<href>" link removing any "illegal" html tags
641 * @param haystack, the url
642 * @param imageRoot, the place to find icons
643 * @param extImage, the url of the icon to show next to the link
644 * @param intImage, unused
645 * @return a String containing the url
647 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
648 return createURLLinks(haystack, title, imageRoot, extImage);
652 * this method deletes all html tags
654 public static final String removeHTMLTags(String haystack){
655 return re_tags.substituteAll(haystack,"");
659 * this method deletes all but the approved tags html tags
660 * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
662 public static String approveHTMLTags(String haystack){
664 String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
665 String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
666 String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
668 // kill all the bad tags that have attributes
669 String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
670 RE regex = new RE(s,RE.REG_ICASE);
671 haystack = regex.substituteAll(haystack,"");
673 // kill all the bad tags that are attributeless
674 regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
675 haystack = regex.substituteAll(haystack,"");
677 // kill all the tags which have a javascript attribute like onLoad
678 regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
679 haystack = regex.substituteAll(haystack,"");
681 // kill all the tags which include a url to an unacceptable protocol
682 regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
683 haystack = regex.substituteAll(haystack,"");
686 } catch(REException ex){
687 ex.printStackTrace();
694 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
695 * htmlcodierten string auf und returnt einen htmlcodierten String
697 public static String createHTML(String content){
698 content=convertNewline2Break(content);
699 content=convertNewline2P(content);
700 content=createMailLinks(content);
701 content=createURLLinks(content);
707 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
708 * htmlcodierten string auf und returnt einen htmlcodierten String
710 public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
711 content=convertNewline2Break(content);
712 content=convertNewline2P(content);
713 content=createMailLinks(content,producerDocRoot,mailImage);
714 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);