2 * put your module comment here
14 * Statische Hilfsmethoden zur Stringbehandlung
19 public final class StringUtil {
21 private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
23 private StringUtil() { } // this avoids contruction
28 re_newline2br = new RE("(\r?\n){1}");
29 re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
30 re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31 re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32 re_tags = new RE("<[^>]*>",RE.REG_ICASE);
34 catch (REException e){
35 System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
41 * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
43 * @return 8-ziffriger String (yyyymmdd)
46 public static final String date2webdbDate (GregorianCalendar theDate) {
47 StringBuffer webdbDate = new StringBuffer();
48 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51 return webdbDate.toString();
55 * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
57 * @return 12-ziffriger String (yyyymmdd)
60 public static final String date2webdbDateTime (GregorianCalendar theDate) {
61 StringBuffer webdbDate = new StringBuffer();
62 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67 return webdbDate.toString();
71 * wandelt Calendar in dd.mm.yyyy / hh.mm um
73 * @return String mit (dd.mm.yyyy / hh.mm um)
75 public static String date2readableDateTime (GregorianCalendar theDate) {
78 readable += pad2(theDate.get(Calendar.DATE));
79 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
80 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
81 hour = theDate.get(Calendar.HOUR);
82 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
84 readable += " / " + pad2(hour);
85 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
90 * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
94 * @return String mit <code>/yyyy/mm/dd</code>
96 public static final String webdbDate2path (String webdbDate) {
97 StringBuffer path = new StringBuffer();
98 path.append("/").append(webdbDate.substring(0, 4));
99 path.append("/").append(webdbDate.substring(4, 6));
102 //path.append("/").append(webdbDate.substring(6, 8));
103 return path.toString();
107 * wandelt Calendar in dd.mm.yyyy um
110 * @return String mit <code>dd.mm.yyyy</code>
112 public static final String webdbDate2readableDate (String webdbDate) {
114 date += webdbDate.substring(6, 8);
115 date += "." + webdbDate.substring(4, 6);
116 date += "." + webdbDate.substring(0, 4);
122 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
123 * to dd.mm.yyyy hh:mm
125 public static String dateToReadableDate(String date) {
126 StringBuffer returnDate = new StringBuffer();
129 returnDate.append(date.substring(8,10)).append('.');
130 returnDate.append(date.substring(5,7)).append('.');
131 returnDate.append(date.substring(0,4)).append(' ');
132 returnDate.append(date.substring(11,16));
134 return returnDate.toString();
138 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
141 public static String dateToYear (String date) {
142 StringBuffer returnDate = new StringBuffer();
145 returnDate.append(date.substring(0,4));
147 return returnDate.toString();
151 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
154 public static String dateToMonth (String date) {
155 StringBuffer returnDate = new StringBuffer();
157 if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
158 else returnDate.append(date.substring(6,7));
160 return returnDate.toString();
164 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
167 public static String dateToDayOfMonth (String date) {
168 StringBuffer returnDate = new StringBuffer();
170 if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
171 else returnDate.append(date.substring(9,10));
173 return returnDate.toString();
177 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
180 public static String dateToTime (String date) {
181 StringBuffer returnDate = new StringBuffer();
183 returnDate.append(date.substring(11,16));
185 return returnDate.toString();
190 * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
192 * @return gequoteter String
194 public static String quoteIfNecessary(String s) {
195 for (int i = 0; i < s.length(); i++)
196 if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
197 return quote(s, '"');
202 * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
203 * "gefährliche" Zeichen innerhalb des Strings
204 * Quotes special SQL-characters in <code>s</code>
206 * @return geqoteter String
208 public static String quote(String s)
210 //String s2 = quote(s, '\'');
213 //Because of '?-Bug in Postgresql-JDBC-Driver
214 StringBuffer temp = new StringBuffer();
215 for(int i=0;i<s.length();i++){
216 if(s.charAt(i)=='\''){
217 temp.append("´");
219 temp.append(s.charAt(i));
222 String s2 = temp.toString();
225 s2 = quote(s2, '\"');
230 * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
231 * "gefährliche" Zeichen innerhalb des Strings
233 * @param s String, der gequoted werden soll
234 * @param quoteChar zu quotendes Zeichen
235 * @return gequoteter String
237 public static String quote(String s, char quoteChar)
239 StringBuffer buf = new StringBuffer(s.length());
241 while (pos < s.length()) {
242 int i = s.indexOf(quoteChar, pos);
243 if (i < 0) i = s.length();
244 buf.append(s.substring(pos, i));
246 if (pos < s.length()) {
248 buf.append(quoteChar);
252 return buf.toString();
256 * replaces dangerous characters in <code>s</code>
260 public static String unquote(String s)
263 StringBuffer buf = new StringBuffer(s.length());
265 String searchString = "\\"+quoteChar;
266 while (pos < s.length()) {
267 int i = s.indexOf(searchString, pos);
268 if (i < 0) i = s.length();
269 buf.append(s.substring(pos, i));
272 return buf.toString();
276 * Wandelet String in byte[] um.
278 * @return byte[] des String
281 public static byte[] stringToBytes(String s) {
282 String crlf = System.getProperty("line.separator");
283 if (!crlf.equals("\n"))
284 s = replace(s, "\n", crlf);
285 // byte[] buf = new byte[s.length()];
286 byte[] buf = s.getBytes();
291 * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
295 * @return String mit den Ersetzungen
297 public static String replace(String s, String pattern, String substitute) {
298 int i = 0, pLen = pattern.length(), sLen = substitute.length();
299 StringBuffer buf = new StringBuffer(s.length());
301 int j = s.indexOf(pattern, i);
303 buf.append(s.substring(i));
306 buf.append(s.substring(i, j));
307 buf.append(substitute);
311 return buf.toString();
315 * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
319 * @return String mit den Ersetzungen
321 public static String regexpReplace(String haystack, String pattern, String substitute) {
323 RE regex = new RE(pattern);
324 return regex.substituteAll(haystack,substitute);
325 } catch(REException ex){
334 * Fügt einen Separator an den Pfad an
336 * @return Pfad mit Separator am Ende
338 public static final String addSeparator (String path) {
339 return path.length() == 0 || path.endsWith(File.separator) ? path : path
340 + File.separatorChar;
344 * Fügt ein <code>/</code> ans ende des Strings and
346 * @return Pfad mit <code>/</code> am Ende
348 public static final String addSlash (String path) {
349 return path.length() == 0 || path.endsWith("/") ? path : path + '/';
353 * Löscht <code>/</code> am Ende des Strings, falls vorhanden
355 * @return String ohne <code>/</code> am Ende
357 public static final String removeSlash (String path) {
358 return path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
363 * Checks to see if the path is absolute by looking for a leading file
368 public static boolean isAbsolutePath (String path) {
369 return path.startsWith(File.separator);
373 * Löscht Slash am Anfang des Strings
377 public static String removeFirstSlash (String path) {
378 return path.startsWith("/") ? path.substring(1) : path;
382 * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
383 * @return zwistellige Zahl
385 public static String pad2 (int number) {
386 return number < 10 ? "0" + number : String.valueOf(number);
390 * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
392 * @return 3-stellige Zahl
394 public static String pad3 (int number) {
395 return number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
399 * Konvertiert Unix-Linefeeds in Win-Linefeeds
401 * @return Konvertierter String
403 public static String unixLineFeedsToWin(String s) {
406 i = s.indexOf('\n', i+1);
408 if ((i == 0 || s.charAt(i-1) != '\r') &&
409 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
410 s = s.substring(0, i)+'\r'+s.substring(i);
419 * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
420 * und Spaces werden zu Underscores
422 * @return gültige Url
424 public static String convert2url(String s) {
426 StringBuffer buf = new StringBuffer();
427 for(int i = 0; i < s.length(); i++ ) {
428 switch( s.charAt( i ) ) {
430 buf.append( "oe" ); break;
432 buf.append( "ae" ); break;
434 buf.append( "ue" ); break;
436 buf.append( "a" ); break;
439 buf.append( "_" ); break;
441 if( buf.charAt( buf.length() - 1 ) != '_' ) {
446 buf.append( s.charAt( i ) );
449 return buf.toString();
453 * wandelt Sonderzeichen in Quotes um
455 * @return Kovertierter String
457 public static String encodeHtml(String s) {
458 StringBuffer buf = new StringBuffer();
459 for(int i=0;i < s.length(); i++ ) {
461 /** @todo looks inefficient, to ask for index of every char, in
462 * case of failure it runs to the end.*/
463 if (s.charAt(i)=='&') {
464 // convert html to xml-parsable representation
465 if( s.indexOf( "ö", i ) == i ) {
466 buf.append( "ö" ); i += 5;
469 if( s.indexOf( "ä", i ) == i ) {
470 buf.append( "ä" ); i += 5;
473 if( s.indexOf( "ü", i ) == i ) {
474 buf.append( "ü" ); i += 5;
477 if( s.indexOf( "Ö", i ) == i ) {
478 buf.append( "Ö" ); i += 5;
481 if( s.indexOf( "Ä", i ) == i ) {
482 buf.append( "Ä" ); i += 5;
485 if( s.indexOf( "Ü", i ) == i ) {
486 buf.append( "Ü" ); i += 5;
489 if( s.indexOf( "ß", i ) == i ) {
490 buf.append( "ß" ); i += 6;
494 /** @todo should only escape outside of tags */
496 if( s.indexOf( """, i ) == i ) {
497 buf.append( "ß" ); i += 5;
500 if( s.indexOf( "–", i ) == i ) {
501 buf.append( "–" ); i += 6;
504 if( s.indexOf( "—", i ) == i ) {
505 buf.append( "—" ); i += 6;
508 if( s.indexOf( "“", i ) == i ) {
509 buf.append( "“" ); i += 6;
512 if( s.indexOf( "”", i ) == i ) {
513 buf.append( "”" ); i += 6;
516 if( s.indexOf( "„", i ) == i ) {
517 buf.append( "„" ); i += 6;
521 //looks pretty stupid
522 if( s.indexOf( "<", i ) == i ) {
523 buf.append( "<" ); i += 3;
526 if( s.indexOf( ">", i ) == i ) {
527 buf.append( ">" ); i += 3;
530 if( s.indexOf( "´", i ) == i ) {
531 buf.append( "´" ); i += 6;
534 if( s.indexOf( " ", i ) == i ) {
535 buf.append( " " ); i += 5;
539 //if( s.indexOf( "&", i ) == i ) {
540 // buf.append( "&" ); i += 0;
544 // convert umlauts an other special charakters
545 switch( s.charAt(i) ) {
546 case 'ö': buf.append( "ö" ); break;
547 case 'ä': buf.append( "ä" ); break;
548 case 'ü': buf.append( "ü" ); break;
549 case 'Ö': buf.append( "Ö" ); break;
550 case 'Ä': buf.append( "Ä" ); break;
551 case 'Ü': buf.append( "Ü" ); break;
552 case 'ß': buf.append( "ß" ); break;
553 case 'é': buf.append( "é" ); break;
554 case 'è': buf.append( "è" ); break;
555 case 'á': buf.append( "á" ); break;
556 case 'à': buf.append( "à" ); break;
557 case 'â': buf.append( "â" ); break;
558 case 'ã': buf.append( "ã" ); break;
559 case '¬': buf.append( "¬" ); break;
560 case '¹': buf.append( "¹" ); break;
561 case '²': buf.append( "²" ); break;
562 case '³': buf.append( "³" ); break;
563 case '¼': buf.append( "¼" ); break;
564 case '½': buf.append( "½" ); break;
565 case '¾': buf.append( "¾" ); break;
566 case '¶': buf.append( "¶" ); break;
567 case 'æ': buf.append( "æ" ); break;
568 case 'ð': buf.append( "ð" ); break;
569 case '|': buf.append( "¦" ); break;
570 case '·': buf.append( "·" ); break;
571 case '°': buf.append( "°" ); break;
572 case '§': buf.append( "§" ); break;
573 case 'ø': buf.append( "ø" ); break;
574 case 'ç': buf.append( "ç" ); break;
575 case '¤': buf.append( "¤" ); break;
576 case 'ª': buf.append( "ª" ); break;
577 case 'Ç': buf.append( "Ç" ); break;
578 case 'Ã': buf.append( "Ã" ); break;
579 case 'Â': buf.append( "Â" ); break;
580 case 'Æ': buf.append( "Æ" ); break;
581 case '©': buf.append( "©" ); break;
582 case '®': buf.append( "®" ); break;
583 case '¥': buf.append( "¥" ); break;
584 case 'Þ': buf.append( "þ" ); break;
585 case '¯': buf.append( "¯" ); break;
586 case 'Ð': buf.append( "Ð" ); break;
587 case 'º': buf.append( "º" ); break;
588 case '¡': buf.append( "¡" ); break;
589 case '£': buf.append( "£" ); break;
590 case '±': buf.append( "±" ); break;
591 case '¿': buf.append( "¿" ); break;
592 case 'Ø': buf.append( "Ø" ); break;
593 case 'Á': buf.append( "À" ); break;
594 case 'À': buf.append( "Á" ); break;
595 case 'É': buf.append( "È" ); break;
596 case 'È': buf.append( "É" ); break;
597 case 'ù': buf.append( "ú" ); break;
598 case 'ñ': buf.append( "ñ" ); break;
599 case 'Ñ': buf.append( "Ñ" ); break;
600 case 'µ': buf.append( "µ" ); break;
601 case 'Í': buf.append( "Ì" ); break;
602 case 'Ì': buf.append( "Í" ); break;
603 case 'í': buf.append( "ì" ); break;
604 case 'ì': buf.append( "í" ); break;
605 case 'î': buf.append( "î" ); break;
606 case 'Î': buf.append( "Î" ); break;
607 case 'ó': buf.append( "ó" ); break;
608 case 'Ó': buf.append( "Ò" ); break;
609 case 'ò': buf.append( "Î" ); break;
610 case 'Ò': buf.append( "Ó" ); break;
611 case 'ô': buf.append( "ô" ); break;
612 case 'Ô': buf.append( "Ô" ); break;
613 case 'õ': buf.append( "õ" ); break;
614 case 'Õ': buf.append( "Õ" ); break;
615 case 'ý': buf.append( "ý" ); break;
616 case 'Ý': buf.append( "Ý" ); break;
617 case 'û': buf.append( "û" ); break;
618 case 'Û': buf.append( "Û" ); break;
619 case 'ú': buf.append( "ù" ); break;
620 case 'Ú': buf.append( "Ù" ); break;
621 case 'Ù': buf.append( "Ú" ); break;
622 case 'Ê': buf.append( "Ê" ); break;
623 case 'ê': buf.append( "ê" ); break;
624 case 'å': buf.append( "å" ); break;
625 case 'Å': buf.append( "Å" ); break;
626 case 'ë': buf.append( "ë" ); break;
627 case 'Ë': buf.append( "Ë" ); break;
628 case 'ÿ': buf.append( "ÿ" ); break;
629 case 'ï': buf.append( "ï" ); break;
630 case 'Ï': buf.append( "Ï" ); break;
631 case '«': buf.append( "«" ); break;
632 case '»': buf.append( "»" ); break;
633 case '\'': buf.append( "´" ); break;
634 case '\"': buf.append( """ ); break;
635 //case '\u8211': buf.append( "–" ); break;
636 //case '\u8212': buf.append( "—" ); break;
637 //case '\u8220': buf.append( "“" ); break;
638 //case '\u8221': buf.append( "”" ); break;
639 //case '\u8222': buf.append( "„" ); break;
640 //case '\"': buf.append( """ ); break;
641 default: buf.append( s.charAt(i) );
645 return buf.toString();
649 public static String decodeHTMLinTags(String s){
650 StringBuffer buffer = new StringBuffer();
651 boolean start = false;
652 boolean stop = false;
657 for(int i=0;i<s.length();i++){
658 if(s.charAt(i)=='<'){
661 } else if(s.charAt(i)=='>'){
666 buffer.append(s.substring(temp,startIndex));
667 buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
668 i= temp= stopIndex+1;
674 buffer.append(s.substring(stopIndex+1));
675 return buffer.toString();
681 public static String replaceQuot(String s) {
682 StringBuffer buffer = new StringBuffer();
683 for(int j = 0; j < s.length();j++){
684 if(s.charAt(j)=='&'){
685 if(s.indexOf( """,j) == j) {
686 buffer.append( "\"" );
690 buffer.append(s.charAt(j));
693 return buffer.toString();
696 /** wandelt Quotes in Sonderzeichen um
699 public static String decodeHtml(String s) {
700 StringBuffer buf = new StringBuffer();
701 for(int i=0;i < s.length(); i++ ) {
702 if( s.indexOf( "ö", i ) == i ) {
703 buf.append( "ö" ); i += 5;
706 if( s.indexOf( "ä", i ) == i ) {
707 buf.append( "ä" ); i += 5;
710 if( s.indexOf( "ü", i ) == i ) {
711 buf.append( "ü" ); i += 5;
714 if( s.indexOf( "Ö", i ) == i ) {
715 buf.append( "Ö" ); i += 5;
718 if( s.indexOf( "Ä", i ) == i ) {
719 buf.append( "Ä" ); i += 5;
722 if( s.indexOf( "Ü", i ) == i ) {
723 buf.append( "Ü" ); i += 5;
726 if( s.indexOf( "ß", i ) == i ) {
727 buf.append( "ß" ); i += 6;
730 if( s.indexOf( """, i ) == i ) {
731 buf.append( "\"" ); i += 5;
734 buf.append( s.charAt(i) );
736 return buf.toString();
741 * schnellere Variante der String.toLowerCase()-Routine
743 * @return String in Kleinbuchsten
745 public static String toLowerCase(String s) {
747 char[] a = new char[l];
748 for (int i = 0; i < l; i++)
749 a[i] = Character.toLowerCase(s.charAt(i));
750 return new String(a);
754 * Findet <code>element</code> im String-Array <code>array</code>
757 * @return Fundstelle als int oder -1
759 public static int indexOf(String[] array, String element) {
761 for (int i = 0; i < array.length; i++)
762 if (array[i].equals(element))
768 * Testet auf Vorkommen von <code>element</code> in <code>array</code>
769 * @param array String-Array
771 * @return true wenn <code>element</code> vorkommt, sonst false
773 public static boolean contains(String[] array, String element) {
774 return indexOf(array, element) >= 0;
778 * Ermittelt CRC-Prüfsumme von String <code>s</code>
780 * @return CRC-Prüfsumme
782 public static int getCRC(String s) {
784 char val[] = s.toCharArray();
785 int len = val.length;
787 for (int i = 0 ; i < len; i++) {
789 h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
792 return (h << 8) | (len & 0xff);
796 * Liefert Default-Wert def zurück, wenn String <code>s</code>
801 * @return geparster int aus s oder def
803 public static int parseInt(String s, int def) {
804 if (s == null) return def;
806 return Integer.parseInt(s);
807 } catch (NumberFormatException e) {
813 * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
816 * @return geparster float oder def
818 public static float parseFloat(String s, float def) {
819 if (s == null) return def;
821 return new Float(s).floatValue();
822 } catch (NumberFormatException e) {
828 * Findet Ende eines Satzes in String <code>text</code>
831 * @return index des Satzendes, oder -1
833 public static int findEndOfSentence(String text, int startIndex) {
835 int i = text.indexOf('.', startIndex);
836 if (i < 0) return -1;
837 if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
838 (i+1 >= text.length()
839 || text.charAt(i+1) == ' '
840 || text.charAt(i+1) == '\n'
841 || text.charAt(i+1) == '\t'))
848 * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
851 * @return Index des Wortendes, oder -1
853 public static int findEndOfWord(String text, int startIndex) {
854 int i = text.indexOf(' ', startIndex),
855 j = text.indexOf('\n', startIndex);
856 if (i < 0) i = text.length();
857 if (j < 0) j = text.length();
858 return Math.min(i, j);
863 * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
864 * in den html-tag <p>
865 * nur sinnvoll, wenn text nicht im html-format eingegeben
867 public static String convertNewline2P(String haystack) {
868 return re_brbr2p.substituteAll(haystack,"\n</p><p>");
872 * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
873 * in den html-tag <br>
874 * nur sinnvoll, wenn text nicht im html-format eingegeben
876 public static String convertNewline2Break(String haystack) {
877 return re_newline2br.substituteAll(haystack,"$0<br>");
881 * createMailLinks wandelt text im email-adressenformat
882 * in einen klickbaren link um
883 * nur sinnvoll, wenn text nicht im html-format eingegeben
885 public static String createMailLinks(String haystack) {
886 return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
891 * createMailLinks wandelt text im email-adressenformat
892 * in einen klickbaren link um
893 * nur sinnvoll, wenn text nicht im html-format eingegeben
895 public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
896 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
901 * createURLLinks wandelt text im url-format
902 * in einen klickbaren link um
903 * nur sinnvoll, wenn text nicht im html-format eingegeben
905 public static String createURLLinks(String haystack) {
906 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
910 * this routine takes text in url format and makes
911 * a clickaeble "<href>" link removing any "illegal" html tags
912 * @param haystack, the url
913 * @param title, the href link text
914 * @param imagRoot, the place to find icons
915 * @param extImage, the url of the icon to show next to the link
916 * @return a String containing the url
918 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
920 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
922 title = removeHTMLTags(title);
923 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
928 * this routine takes text in url format and makes
929 * a clickaeble "<href>" link removing any "illegal" html tags
930 * @param haystack, the url
931 * @param imageRoot, the place to find icons
932 * @param extImage, the url of the icon to show next to the link
933 * @param intImage, unused
934 * @return a String containing the url
936 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
937 return createURLLinks(haystack, title, imageRoot, extImage);
941 * this routine takes text in url format and makes
942 * an image link removing any "illegal" html tags
943 * @param haystack, the url
944 * @param title, the image alt text, can be null
945 * @param height, height of the image
946 * @param width, width of the image
947 * @return a String containing the url
949 public static String createIMGLinks(String haystack, String title, String height,String width) {
951 title = removeHTMLTags(title);
952 return re_url.substituteAll(haystack,"<img hspace=\"10\" vspace=\"6\" align=\"left\" src=\"$0\" width=\""+width+"\" height=\""+height+"\" alt=\""+title+"\"/> <br><i>"+title+"</i>");
954 return re_url.substituteAll(haystack,"<img hspace=\"10\" vspace=\"6\" align=\"left\" src=\"$0\" width=\""+width+"\" height=\""+height+"\" alt=\"\"/> ");
960 * deleteForbiddenTags
961 * this method deletes all <script>, <body> and <head>-tags
963 public static final String deleteForbiddenTags(String haystack) {
965 RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
966 haystack = regex.substituteAll(haystack,"");
967 regex = new RE("<head>(.*?)</head>");
968 haystack = regex.substituteAll(haystack,"");
969 regex = new RE("<[ \t\r\n/]*body(.*?)>");
970 haystack = regex.substituteAll(haystack,"");
972 } catch(REException ex){
978 * this method deletes all html tags
980 public static final String removeHTMLTags(String haystack){
981 return re_tags.substituteAll(haystack,"");
986 * this method deletes all but the approved tags html tags
987 * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
989 public static String approveHTMLTags(String haystack){
991 String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
992 String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
993 String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
995 // kill all the bad tags that have attributes
996 String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
997 RE regex = new RE(s,RE.REG_ICASE);
998 haystack = regex.substituteAll(haystack,"");
1000 // kill all the bad tags that are attributeless
1001 regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
1002 haystack = regex.substituteAll(haystack,"");
1004 // kill all the tags which have a javascript attribute like onLoad
1005 regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
1006 haystack = regex.substituteAll(haystack,"");
1008 // kill all the tags which include a url to an unacceptable protocol
1009 regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
1010 haystack = regex.substituteAll(haystack,"");
1013 } catch(REException ex){
1014 ex.printStackTrace();
1021 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
1022 * htmlcodierten string auf und returnt einen htmlcodierten String
1024 public static String createHTML(String content){
1025 content=convertNewline2Break(content);
1026 content=convertNewline2P(content);
1027 content=createMailLinks(content);
1028 content=createURLLinks(content);
1034 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
1035 * htmlcodierten string auf und returnt einen htmlcodierten String
1037 public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
1038 content=convertNewline2Break(content);
1039 content=convertNewline2P(content);
1040 content=createMailLinks(content,producerDocRoot,mailImage);
1041 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);