2 * put your module comment here
14 * Statische Hilfsmethoden zur Stringbehandlung
19 public final class StringUtil {
21 private static RE re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
23 private StringUtil() { } // this avoids contruction
28 re_newline2br = new RE("(\r?\n){1}");
29 re_brbr2p = new RE("(<br>\r?\n<br>){1,}");
30 re_mail = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31 re_url = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32 re_tags = new RE("<[^>]*>",RE.REG_ICASE);
34 catch (REException e){
35 System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
41 * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
43 * @return 8-ziffriger String (yyyymmdd)
46 public static final String date2webdbDate (GregorianCalendar theDate) {
47 StringBuffer webdbDate = new StringBuffer();
48 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51 return webdbDate.toString();
55 * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
57 * @return 12-ziffriger String (yyyymmdd)
60 public static final String date2webdbDateTime (GregorianCalendar theDate) {
61 StringBuffer webdbDate = new StringBuffer();
62 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67 return webdbDate.toString();
71 * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
73 * @return w3approved datetime
76 public static final String date2w3DateTime (GregorianCalendar theDate) {
77 StringBuffer webdbDate = new StringBuffer();
78 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
79 webdbDate.append("-");
80 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
81 webdbDate.append("-");
82 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
83 webdbDate.append("T");
84 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
85 webdbDate.append(":");
86 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
87 webdbDate.append(":");
88 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
89 //assumes you are an hour-multiple away from UTC....
90 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
92 webdbDate.append("-");
95 webdbDate.append("+");
97 webdbDate.append(pad2(Math.abs(offset)));
98 webdbDate.append(":00");
99 return webdbDate.toString();
103 * wandelt Calendar in dd.mm.yyyy / hh.mm um
105 * @return String mit (dd.mm.yyyy / hh.mm um)
107 public static String date2readableDateTime (GregorianCalendar theDate) {
108 String readable = "";
110 readable += pad2(theDate.get(Calendar.DATE));
111 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
112 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
113 hour = theDate.get(Calendar.HOUR);
114 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
116 readable += " / " + pad2(hour);
117 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
122 * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
126 * @return String mit <code>/yyyy/mm/dd</code>
128 public static final String webdbDate2path (String webdbDate) {
129 StringBuffer path = new StringBuffer();
130 path.append("/").append(webdbDate.substring(0, 4));
131 path.append("/").append(webdbDate.substring(4, 6));
134 //path.append("/").append(webdbDate.substring(6, 8));
135 return path.toString();
139 * wandelt Calendar in dd.mm.yyyy um
142 * @return String mit <code>dd.mm.yyyy</code>
144 public static final String webdbDate2readableDate (String webdbDate) {
146 date += webdbDate.substring(6, 8);
147 date += "." + webdbDate.substring(4, 6);
148 date += "." + webdbDate.substring(0, 4);
154 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
155 * to dd.mm.yyyy hh:mm
157 public static String dateToReadableDate(String date) {
158 StringBuffer returnDate = new StringBuffer();
161 returnDate.append(date.substring(8,10)).append('.');
162 returnDate.append(date.substring(5,7)).append('.');
163 returnDate.append(date.substring(0,4)).append(' ');
164 returnDate.append(date.substring(11,16));
166 return returnDate.toString();
170 * converts string from format: yyyy-mm-dd__hh:mm:ss.dddddd+TZ
171 * to yyyy-mm-ddThh:mm:ss+TZ:00 (w3 format for Dublin Core)
173 public static String webdbdateToDCDate(String date) {
174 StringBuffer returnDate = new StringBuffer();
177 returnDate.append(date.substring(0,10));
178 returnDate.append("T");
179 returnDate.append(date.substring(11,19));
180 String tzInfo=date.substring(26,29);
181 if (tzInfo.equals("+00")){
182 //UTC gets a special code in w3 dates
183 returnDate.append("Z");
186 //need to see what a newfoundland postgres
187 //timestamp looks like before making this robust
188 returnDate.append(tzInfo);
189 returnDate.append(":00");
193 return returnDate.toString();
198 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
201 public static String dateToYear (String date) {
202 StringBuffer returnDate = new StringBuffer();
205 returnDate.append(date.substring(0,4));
207 return returnDate.toString();
211 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
214 public static String dateToMonth (String date) {
215 StringBuffer returnDate = new StringBuffer();
217 if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
218 else returnDate.append(date.substring(6,7));
220 return returnDate.toString();
224 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
227 public static String dateToDayOfMonth (String date) {
228 StringBuffer returnDate = new StringBuffer();
230 if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
231 else returnDate.append(date.substring(9,10));
233 return returnDate.toString();
237 * converts string from format: yyyy-mm-dd__hh:mm:ss.d
240 public static String dateToTime (String date) {
241 StringBuffer returnDate = new StringBuffer();
243 returnDate.append(date.substring(11,16));
245 return returnDate.toString();
249 * Splits the provided CSV text into a list. stolen wholesale from
250 * from Jakarta Turbine StrinUtils.java -mh
252 * @param text The CSV list of values to split apart.
253 * @param separator The separator character.
254 * @return The list of values.
256 public static String[] split(String text, String separator)
258 StringTokenizer st = new StringTokenizer(text, separator);
259 String[] values = new String[st.countTokens()];
261 while (st.hasMoreTokens())
263 values[pos++] = st.nextToken();
269 * Joins the elements of the provided array into a single string
270 * containing a list of CSV elements. Stolen wholesale from Jakarta
271 * Turbine StringUtils.java. -mh
273 * @param list The list of values to join together.
274 * @param separator The separator character.
275 * @return The CSV text.
277 public static String join(String[] list, String separator)
279 StringBuffer csv = new StringBuffer();
280 for (int i = 0; i < list.length; i++)
284 csv.append(separator);
288 return csv.toString();
293 * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
295 * @return gequoteter String
297 public static String quoteIfNecessary(String s) {
298 for (int i = 0; i < s.length(); i++)
299 if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
300 return quote(s, '"');
305 * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
306 * "gefährliche" Zeichen innerhalb des Strings
307 * Quotes special SQL-characters in <code>s</code>
309 * @return geqoteter String
311 public static String quote(String s)
313 //String s2 = quote(s, '\'');
314 //Quickhack ÊÊ Ê Ê Ê Ê Ê Ê
315 //Because of '?-Bug in Postgresql-JDBC-Driver
316 StringBuffer temp = new StringBuffer();
317 for(int i=0;i<s.length();i++){
318 if(s.charAt(i)=='\''){
319 temp.append("'");
321 temp.append(s.charAt(i));
324 String s2 = temp.toString();
327 s2 = quote(s2, '\"');
332 * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
333 * "gefährliche" Zeichen innerhalb des Strings
335 * @param s String, der gequoted werden soll
336 * @param quoteChar zu quotendes Zeichen
337 * @return gequoteter String
339 public static String quote(String s, char quoteChar)
341 StringBuffer buf = new StringBuffer(s.length());
343 while (pos < s.length()) {
344 int i = s.indexOf(quoteChar, pos);
345 if (i < 0) i = s.length();
346 buf.append(s.substring(pos, i));
348 if (pos < s.length()) {
350 buf.append(quoteChar);
354 return buf.toString();
358 * replaces dangerous characters in <code>s</code>
362 public static String unquote(String s)
365 StringBuffer buf = new StringBuffer(s.length());
367 String searchString = "\\"+quoteChar;
368 while (pos < s.length()) {
369 int i = s.indexOf(searchString, pos);
370 if (i < 0) i = s.length();
371 buf.append(s.substring(pos, i));
374 return buf.toString();
378 * Wandelet String in byte[] um.
380 * @return byte[] des String
383 public static byte[] stringToBytes(String s) {
384 String crlf = System.getProperty("line.separator");
385 if (!crlf.equals("\n"))
386 s = replace(s, "\n", crlf);
387 // byte[] buf = new byte[s.length()];
388 byte[] buf = s.getBytes();
393 * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
397 * @return String mit den Ersetzungen
399 public static String replace(String s, String pattern, String substitute) {
400 int i = 0, pLen = pattern.length(), sLen = substitute.length();
401 StringBuffer buf = new StringBuffer(s.length());
403 int j = s.indexOf(pattern, i);
405 buf.append(s.substring(i));
408 buf.append(s.substring(i, j));
409 buf.append(substitute);
413 return buf.toString();
417 * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
421 * @return String mit den Ersetzungen
423 public static String regexpReplace(String haystack, String pattern, String substitute) {
425 RE regex = new RE(pattern);
426 return regex.substituteAll(haystack,substitute);
427 } catch(REException ex){
436 * Fügt einen Separator an den Pfad an
438 * @return Pfad mit Separator am Ende
440 public static final String addSeparator (String path) {
441 return path.length() == 0 || path.endsWith(File.separator) ? path : path
442 + File.separatorChar;
446 * Fügt ein <code>/</code> ans ende des Strings and
448 * @return Pfad mit <code>/</code> am Ende
450 public static final String addSlash (String path) {
451 return path.length() == 0 || path.endsWith("/") ? path : path + '/';
455 * Löscht <code>/</code> am Ende des Strings, falls vorhanden
457 * @return String ohne <code>/</code> am Ende
459 public static final String removeSlash (String path) {
460 return path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
465 * Checks to see if the path is absolute by looking for a leading file
470 public static boolean isAbsolutePath (String path) {
471 return path.startsWith(File.separator);
475 * Löscht Slash am Anfang des Strings
479 public static String removeFirstSlash (String path) {
480 return path.startsWith("/") ? path.substring(1) : path;
484 * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
485 * @return zwistellige Zahl
487 public static String pad2 (int number) {
488 return number < 10 ? "0" + number : String.valueOf(number);
492 * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
494 * @return 3-stellige Zahl
496 public static String pad3 (int number) {
497 return number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
501 * Konvertiert Unix-Linefeeds in Win-Linefeeds
503 * @return Konvertierter String
505 public static String unixLineFeedsToWin(String s) {
508 i = s.indexOf('\n', i+1);
510 if ((i == 0 || s.charAt(i-1) != '\r') &&
511 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
512 s = s.substring(0, i)+'\r'+s.substring(i);
521 * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
522 * und Spaces werden zu Underscores
524 * @return gültige Url
526 public static String convert2url(String s) {
528 StringBuffer buf = new StringBuffer();
529 for(int i = 0; i < s.length(); i++ ) {
530 switch( s.charAt( i ) ) {
532 buf.append( "oe" ); break;
534 buf.append( "ae" ); break;
536 buf.append( "ue" ); break;
538 buf.append( "a" ); break;
541 buf.append( "_" ); break;
543 if( buf.charAt( buf.length() - 1 ) != '_' ) {
548 buf.append( s.charAt( i ) );
551 return buf.toString();
555 * wandelt Sonderzeichen in Quotes um
557 * @return Kovertierter String
559 public static String encodeHtml(String s) {
560 StringBuffer buf = new StringBuffer();
561 for(int i=0;i < s.length(); i++ ) {
563 /** @todo looks inefficient, to ask for index of every char, in
564 * case of failure it runs to the end.*/
565 if (s.charAt(i)=='&') {
566 // convert html to xml-parsable representation
567 if( s.indexOf( "ö", i ) == i ) {
568 buf.append( "ö" ); i += 5;
571 if( s.indexOf( "ä", i ) == i ) {
572 buf.append( "ä" ); i += 5;
575 if( s.indexOf( "ü", i ) == i ) {
576 buf.append( "ü" ); i += 5;
579 if( s.indexOf( "Ö", i ) == i ) {
580 buf.append( "Ö" ); i += 5;
583 if( s.indexOf( "Ä", i ) == i ) {
584 buf.append( "Ä" ); i += 5;
587 if( s.indexOf( "Ü", i ) == i ) {
588 buf.append( "Ü" ); i += 5;
591 if( s.indexOf( "ß", i ) == i ) {
592 buf.append( "ß" ); i += 6;
596 /** @todo should only escape outside of tags */
598 if( s.indexOf( """, i ) == i ) {
599 buf.append( "ß" ); i += 5;
602 if( s.indexOf( "–", i ) == i ) {
603 buf.append( "–" ); i += 6;
606 if( s.indexOf( "—", i ) == i ) {
607 buf.append( "—" ); i += 6;
610 if( s.indexOf( "“", i ) == i ) {
611 buf.append( "“" ); i += 6;
614 if( s.indexOf( "”", i ) == i ) {
615 buf.append( "”" ); i += 6;
618 if( s.indexOf( "„", i ) == i ) {
619 buf.append( "„" ); i += 6;
623 //looks pretty stupid
624 if( s.indexOf( "<", i ) == i ) {
625 buf.append( "<" ); i += 3;
628 if( s.indexOf( ">", i ) == i ) {
629 buf.append( ">" ); i += 3;
632 if( s.indexOf( "´", i ) == i ) {
633 buf.append( "´" ); i += 6;
636 if( s.indexOf( " ", i ) == i ) {
637 buf.append( " " ); i += 5;
641 //if( s.indexOf( "&", i ) == i ) {
642 // buf.append( "&" ); i += 0;
646 // convert umlauts an other special charakters
647 switch( s.charAt(i) ) {
648 case 'ö': buf.append( "ö" ); break;
649 case 'ä': buf.append( "ä" ); break;
650 case 'ü': buf.append( "ü" ); break;
651 case 'Ö': buf.append( "Ö" ); break;
652 case 'Ä': buf.append( "Ä" ); break;
653 case 'Ü': buf.append( "Ü" ); break;
654 case 'ß': buf.append( "ß" ); break;
655 case 'é': buf.append( "é" ); break;
656 case 'è': buf.append( "è" ); break;
657 case 'á': buf.append( "á" ); break;
658 case 'à': buf.append( "à" ); break;
659 case 'â': buf.append( "â" ); break;
660 case 'ã': buf.append( "ã" ); break;
661 case '¬': buf.append( "¬" ); break;
662 case '¹': buf.append( "¹" ); break;
663 case '²': buf.append( "²" ); break;
664 case '³': buf.append( "³" ); break;
665 case '¼': buf.append( "¼" ); break;
666 case '½': buf.append( "½" ); break;
667 case '¾': buf.append( "¾" ); break;
668 case '¶': buf.append( "¶" ); break;
669 case 'æ': buf.append( "æ" ); break;
670 case 'ð': buf.append( "ð" ); break;
671 case '|': buf.append( "¦" ); break;
672 case '·': buf.append( "·" ); break;
673 case '°': buf.append( "°" ); break;
674 case '§': buf.append( "§" ); break;
675 case 'ø': buf.append( "ø" ); break;
676 case 'ç': buf.append( "ç" ); break;
677 case '¤': buf.append( "¤" ); break;
678 case 'ª': buf.append( "ª" ); break;
679 case 'Ç': buf.append( "Ç" ); break;
680 case 'Ã': buf.append( "Ã" ); break;
681 case 'Â': buf.append( "Â" ); break;
682 case 'Æ': buf.append( "Æ" ); break;
683 case '©': buf.append( "©" ); break;
684 case '®': buf.append( "®" ); break;
685 case '¥': buf.append( "¥" ); break;
686 case 'Þ': buf.append( "þ" ); break;
687 case '¯': buf.append( "¯" ); break;
688 case 'Ð': buf.append( "Ð" ); break;
689 case 'º': buf.append( "º" ); break;
690 case '¡': buf.append( "¡" ); break;
691 case '£': buf.append( "£" ); break;
692 case '±': buf.append( "±" ); break;
693 case '¿': buf.append( "¿" ); break;
694 case 'Ø': buf.append( "Ø" ); break;
695 case 'Á': buf.append( "À" ); break;
696 case 'À': buf.append( "Á" ); break;
697 case 'É': buf.append( "È" ); break;
698 case 'È': buf.append( "É" ); break;
699 case 'ù': buf.append( "ú" ); break;
700 case 'ñ': buf.append( "ñ" ); break;
701 case 'Ñ': buf.append( "Ñ" ); break;
702 case 'µ': buf.append( "µ" ); break;
703 case 'Í': buf.append( "Ì" ); break;
704 case 'Ì': buf.append( "Í" ); break;
705 case 'í': buf.append( "ì" ); break;
706 case 'ì': buf.append( "í" ); break;
707 case 'î': buf.append( "î" ); break;
708 case 'Î': buf.append( "Î" ); break;
709 case 'ó': buf.append( "ó" ); break;
710 case 'Ó': buf.append( "Ò" ); break;
711 case 'ò': buf.append( "Î" ); break;
712 case 'Ò': buf.append( "Ó" ); break;
713 case 'ô': buf.append( "ô" ); break;
714 case 'Ô': buf.append( "Ô" ); break;
715 case 'õ': buf.append( "õ" ); break;
716 case 'Õ': buf.append( "Õ" ); break;
717 case 'ý': buf.append( "ý" ); break;
718 case 'Ý': buf.append( "Ý" ); break;
719 case 'û': buf.append( "û" ); break;
720 case 'Û': buf.append( "Û" ); break;
721 case 'ú': buf.append( "ù" ); break;
722 case 'Ú': buf.append( "Ù" ); break;
723 case 'Ù': buf.append( "Ú" ); break;
724 case 'Ê': buf.append( "Ê" ); break;
725 case 'ê': buf.append( "ê" ); break;
726 case 'å': buf.append( "å" ); break;
727 case 'Å': buf.append( "Å" ); break;
728 case 'ë': buf.append( "ë" ); break;
729 case 'Ë': buf.append( "Ë" ); break;
730 case 'ÿ': buf.append( "ÿ" ); break;
731 case 'ï': buf.append( "ï" ); break;
732 case 'Ï': buf.append( "Ï" ); break;
733 case '«': buf.append( "«" ); break;
734 case '»': buf.append( "»" ); break;
735 case '\'': buf.append( "´" ); break;
736 case '\"': buf.append( """ ); break;
737 //case '\u8211': buf.append( "–" ); break;
738 //case '\u8212': buf.append( "—" ); break;
739 //case '\u8220': buf.append( "“" ); break;
740 //case '\u8221': buf.append( "”" ); break;
741 //case '\u8222': buf.append( "„" ); break;
742 //case '\"': buf.append( """ ); break;
743 default: buf.append( s.charAt(i) );
747 return buf.toString();
751 public static String decodeHTMLinTags(String s){
752 StringBuffer buffer = new StringBuffer();
753 boolean start = false;
754 boolean stop = false;
759 for(int i=0;i<s.length();i++){
760 if(s.charAt(i)=='<'){
763 } else if(s.charAt(i)=='>'){
768 buffer.append(s.substring(temp,startIndex));
769 buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
770 i= temp= stopIndex+1;
776 buffer.append(s.substring(stopIndex+1));
777 return buffer.toString();
783 public static String replaceQuot(String s) {
784 StringBuffer buffer = new StringBuffer();
785 for(int j = 0; j < s.length();j++){
786 if(s.charAt(j)=='&'){
787 if(s.indexOf( """,j) == j) {
788 buffer.append( "\"" );
792 buffer.append(s.charAt(j));
795 return buffer.toString();
798 /** wandelt Quotes in Sonderzeichen um
801 public static String decodeHtml(String s) {
802 StringBuffer buf = new StringBuffer();
803 for(int i=0;i < s.length(); i++ ) {
804 if( s.indexOf( "ö", i ) == i ) {
805 buf.append( "ö" ); i += 5;
808 if( s.indexOf( "ä", i ) == i ) {
809 buf.append( "ä" ); i += 5;
812 if( s.indexOf( "ü", i ) == i ) {
813 buf.append( "ü" ); i += 5;
816 if( s.indexOf( "Ö", i ) == i ) {
817 buf.append( "Ö" ); i += 5;
820 if( s.indexOf( "Ä", i ) == i ) {
821 buf.append( "Ä" ); i += 5;
824 if( s.indexOf( "Ü", i ) == i ) {
825 buf.append( "Ü" ); i += 5;
828 if( s.indexOf( "ß", i ) == i ) {
829 buf.append( "ß" ); i += 6;
832 if( s.indexOf( """, i ) == i ) {
833 buf.append( "\"" ); i += 5;
836 buf.append( s.charAt(i) );
838 return buf.toString();
843 * schnellere Variante der String.toLowerCase()-Routine
845 * @return String in Kleinbuchsten
847 public static String toLowerCase(String s) {
849 char[] a = new char[l];
850 for (int i = 0; i < l; i++)
851 a[i] = Character.toLowerCase(s.charAt(i));
852 return new String(a);
856 * Findet <code>element</code> im String-Array <code>array</code>
859 * @return Fundstelle als int oder -1
861 public static int indexOf(String[] array, String element) {
863 for (int i = 0; i < array.length; i++)
864 if (array[i].equals(element))
870 * Testet auf Vorkommen von <code>element</code> in <code>array</code>
871 * @param array String-Array
873 * @return true wenn <code>element</code> vorkommt, sonst false
875 public static boolean contains(String[] array, String element) {
876 return indexOf(array, element) >= 0;
880 * Ermittelt CRC-Prüfsumme von String <code>s</code>
882 * @return CRC-Prüfsumme
884 public static int getCRC(String s) {
886 char val[] = s.toCharArray();
887 int len = val.length;
889 for (int i = 0 ; i < len; i++) {
891 h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
894 return (h << 8) | (len & 0xff);
898 * Liefert Default-Wert def zurück, wenn String <code>s</code>
903 * @return geparster int aus s oder def
905 public static int parseInt(String s, int def) {
906 if (s == null) return def;
908 return Integer.parseInt(s);
909 } catch (NumberFormatException e) {
915 * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
918 * @return geparster float oder def
920 public static float parseFloat(String s, float def) {
921 if (s == null) return def;
923 return new Float(s).floatValue();
924 } catch (NumberFormatException e) {
930 * Findet Ende eines Satzes in String <code>text</code>
933 * @return index des Satzendes, oder -1
935 public static int findEndOfSentence(String text, int startIndex) {
937 int i = text.indexOf('.', startIndex);
938 if (i < 0) return -1;
939 if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
940 (i+1 >= text.length()
941 || text.charAt(i+1) == ' '
942 || text.charAt(i+1) == '\n'
943 || text.charAt(i+1) == '\t'))
950 * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
953 * @return Index des Wortendes, oder -1
955 public static int findEndOfWord(String text, int startIndex) {
956 int i = text.indexOf(' ', startIndex),
957 j = text.indexOf('\n', startIndex);
958 if (i < 0) i = text.length();
959 if (j < 0) j = text.length();
960 return Math.min(i, j);
965 * convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
966 * in den html-tag <p>
967 * nur sinnvoll, wenn text nicht im html-format eingegeben
969 public static String convertNewline2P(String haystack) {
970 return re_brbr2p.substituteAll(haystack,"\n</p><p>");
974 * convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
975 * in den html-tag <br>
976 * nur sinnvoll, wenn text nicht im html-format eingegeben
978 public static String convertNewline2Break(String haystack) {
979 return re_newline2br.substituteAll(haystack,"$0<br />");
983 * createMailLinks wandelt text im email-adressenformat
984 * in einen klickbaren link um
985 * nur sinnvoll, wenn text nicht im html-format eingegeben
987 public static String createMailLinks(String haystack) {
988 return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
993 * createMailLinks wandelt text im email-adressenformat
994 * in einen klickbaren link um
995 * nur sinnvoll, wenn text nicht im html-format eingegeben
997 public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
998 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/> <a href=\"mailto:$0\">$0</a>");
1003 * createURLLinks wandelt text im url-format
1004 * in einen klickbaren link um
1005 * nur sinnvoll, wenn text nicht im html-format eingegeben
1007 public static String createURLLinks(String haystack) {
1008 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
1012 * this routine takes text in url format and makes
1013 * a clickaeble "<href>" link removing any "illegal" html tags
1014 * @param haystack, the url
1015 * @param title, the href link text
1016 * @param imagRoot, the place to find icons
1017 * @param extImage, the url of the icon to show next to the link
1018 * @return a String containing the url
1020 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
1021 if (title == null) {
1022 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">$0</a>");
1024 title = removeHTMLTags(title);
1025 return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/> <a href=\"$0\">"+title+"</a>");
1030 * this routine takes text in url format and makes
1031 * a clickaeble "<href>" link removing any "illegal" html tags
1032 * @param haystack, the url
1033 * @param imageRoot, the place to find icons
1034 * @param extImage, the url of the icon to show next to the link
1035 * @param intImage, unused
1036 * @return a String containing the url
1038 public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
1039 return createURLLinks(haystack, title, imageRoot, extImage);
1043 * deleteForbiddenTags
1044 * this method deletes all <script>, <body> and <head>-tags
1046 public static final String deleteForbiddenTags(String haystack) {
1048 RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
1049 haystack = regex.substituteAll(haystack,"");
1050 regex = new RE("<head>(.*?)</head>");
1051 haystack = regex.substituteAll(haystack,"");
1052 regex = new RE("<[ \t\r\n/]*body(.*?)>");
1053 haystack = regex.substituteAll(haystack,"");
1055 } catch(REException ex){
1061 * this method deletes all html tags
1063 public static final String removeHTMLTags(String haystack){
1064 return re_tags.substituteAll(haystack,"");
1069 * this method deletes all but the approved tags html tags
1070 * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
1072 public static String approveHTMLTags(String haystack){
1074 String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
1075 String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
1076 String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
1078 // kill all the bad tags that have attributes
1079 String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
1080 RE regex = new RE(s,RE.REG_ICASE);
1081 haystack = regex.substituteAll(haystack,"");
1083 // kill all the bad tags that are attributeless
1084 regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
1085 haystack = regex.substituteAll(haystack,"");
1087 // kill all the tags which have a javascript attribute like onLoad
1088 regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
1089 haystack = regex.substituteAll(haystack,"");
1091 // kill all the tags which include a url to an unacceptable protocol
1092 regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
1093 haystack = regex.substituteAll(haystack,"");
1096 } catch(REException ex){
1097 ex.printStackTrace();
1104 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
1105 * htmlcodierten string auf und returnt einen htmlcodierten String
1107 public static String createHTML(String content){
1108 content=convertNewline2Break(content);
1109 content=convertNewline2P(content);
1110 content=createMailLinks(content);
1111 content=createURLLinks(content);
1117 * createHTML ruft alle regex-methoden zum unwandeln eines nicht
1118 * htmlcodierten string auf und returnt einen htmlcodierten String
1120 public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
1121 content=convertNewline2Break(content);
1122 content=convertNewline2P(content);
1123 content=createMailLinks(content,producerDocRoot,mailImage);
1124 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);