remove the unused and horrid encodeHTML method. saves us a couple hundred lines of...
[mir.git] / source / mir / misc / StringUtil.java
1 /*
2  * put your module comment here
3  */
4
5
6 package  mir.misc;
7
8 import  java.io.*;
9 import  java.lang.*;
10 import  java.util.*;
11 import  gnu.regexp.*;
12
13 /**
14  * Statische Hilfsmethoden zur Stringbehandlung
15  *
16  * @version 29.6.99
17  * @author RK
18  */
19 public final class StringUtil {
20
21         private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
22
23         private StringUtil() { }  // this avoids contruction
24
25         static {
26                 try {
27                         //precompile regex
28                         re_newline2br = new RE("(\r?\n){1}");
29                         re_brbr2p     = new RE("(<br>\r?\n<br>){1,}");
30                         re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31                         re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32                         re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
33                 }
34                 catch (REException e){
35                         System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
36                 }
37         }
38
39
40         /**
41          * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
42          * @param theDate
43          * @return 8-ziffriger String (yyyymmdd)
44          */
45
46         public static final String date2webdbDate (GregorianCalendar theDate) {
47                 StringBuffer webdbDate = new StringBuffer();
48                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51                 return  webdbDate.toString();
52         }
53
54         /**
55          * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
56          * @param theDate
57          * @return 12-ziffriger String (yyyymmdd)
58          */
59
60         public static final String date2webdbDateTime (GregorianCalendar theDate) {
61                 StringBuffer webdbDate = new StringBuffer();
62                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67                 return  webdbDate.toString();
68         }
69
70         /**
71          * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
72          * @param theDate
73          * @return w3approved datetime
74          */
75
76         public static final String date2w3DateTime (GregorianCalendar theDate) {
77                 StringBuffer webdbDate = new StringBuffer();
78                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
79                 webdbDate.append("-");
80                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
81                 webdbDate.append("-");
82                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
83                 webdbDate.append("T");
84                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
85                 webdbDate.append(":");
86                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
87                 webdbDate.append(":");
88                 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
89                 //assumes you are an hour-multiple away from UTC....
90                 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
91                 if (offset < 0){
92                 webdbDate.append("-");
93                 }
94                 else{
95                 webdbDate.append("+");
96                 }
97                 webdbDate.append(pad2(Math.abs(offset)));
98                 webdbDate.append(":00");
99                 return  webdbDate.toString();
100         }
101
102         /**
103          * wandelt Calendar in dd.mm.yyyy / hh.mm um
104          * @param theDate
105          * @return String mit (dd.mm.yyyy / hh.mm um)
106          */
107         public static String date2readableDateTime (GregorianCalendar theDate) {
108                 String readable = "";
109                 int hour;
110                 readable += pad2(theDate.get(Calendar.DATE));
111                 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
112                 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
113                 hour = theDate.get(Calendar.HOUR);
114                 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
115                         hour += 12;
116                 readable += " / " + pad2(hour);
117                 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
118                 return  readable;
119         }
120
121         /**
122          * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
123          * getrennt ist.
124          *
125          * @param webdbDate
126          * @return String mit <code>/yyyy/mm/dd</code>
127          */
128         public static final String webdbDate2path (String webdbDate) {
129                 StringBuffer path = new StringBuffer();
130                 path.append("/").append(webdbDate.substring(0, 4));
131                 path.append("/").append(webdbDate.substring(4, 6));
132                 path.append("/");
133                 //who did this?
134                 //path.append("/").append(webdbDate.substring(6, 8));
135                 return  path.toString();
136         }
137
138         /**
139          * wandelt Calendar in dd.mm.yyyy um
140          *
141          * @param theDate
142          * @return String mit  <code>dd.mm.yyyy</code>
143          */
144         public static final String webdbDate2readableDate (String webdbDate) {
145                 String date = "";
146                 date += webdbDate.substring(6, 8);
147                 date += "." + webdbDate.substring(4, 6);
148                 date += "." + webdbDate.substring(0, 4);
149                 return  date;
150         }
151
152
153         /**
154          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
155          * to dd.mm.yyyy hh:mm
156          */
157         public static String dateToReadableDate(String date) {
158                 StringBuffer returnDate = new StringBuffer();
159                 if (date!=null) {
160
161                         returnDate.append(date.substring(8,10)).append('.');
162                         returnDate.append(date.substring(5,7)).append('.');
163                         returnDate.append(date.substring(0,4)).append(' ');
164                         returnDate.append(date.substring(11,16));
165                 }
166                 return returnDate.toString();
167         }
168         
169         /**
170          * converts string from format: yyyy-mm-dd__hh:mm:ss.dddddd+TZ
171          * to yyyy-mm-ddThh:mm:ss+TZ:00 (w3 format for Dublin Core)
172          */
173         public static String webdbdateToDCDate(String date) {
174                 StringBuffer returnDate = new StringBuffer();
175                 if (date!=null) {
176
177                         returnDate.append(date.substring(0,10));
178                         returnDate.append("T");
179                         returnDate.append(date.substring(11,19));
180                         String tzInfo=date.substring(26,29);
181                         if (tzInfo.equals("+00")){
182                             //UTC gets a special code in w3 dates
183                             returnDate.append("Z");
184                         }
185                         else{
186                             //need to see what a newfoundland postgres
187                             //timestamp looks like before making this robust
188                             returnDate.append(tzInfo);
189                             returnDate.append(":00");
190                         }
191                         
192                 }
193                 return returnDate.toString();
194         }
195
196
197         /**
198          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
199          * to yyyy
200          */
201         public static String dateToYear (String date) {
202                 StringBuffer returnDate = new StringBuffer();
203                 if (date!=null) {
204
205                         returnDate.append(date.substring(0,4));
206                 }
207                 return returnDate.toString();
208         }
209
210         /**
211          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
212          * to [m]m
213          */
214         public static String dateToMonth (String date) {
215                 StringBuffer returnDate = new StringBuffer();
216                 if (date!=null) {
217                         if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
218                         else returnDate.append(date.substring(6,7));
219                 }
220                 return returnDate.toString();
221         }
222
223         /**
224          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
225          * to [d]d
226          */
227         public static String dateToDayOfMonth (String date) {
228                 StringBuffer returnDate = new StringBuffer();
229                 if (date!=null) {
230                         if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
231                         else returnDate.append(date.substring(9,10));
232                 }
233                 return returnDate.toString();
234         }
235
236         /**
237          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
238          * to hh:mm
239          */
240         public static String dateToTime (String date) {
241                 StringBuffer returnDate = new StringBuffer();
242                 if (date!=null) {
243                         returnDate.append(date.substring(11,16));
244                 }
245                 return returnDate.toString();
246         }
247
248     /**
249      * Splits the provided CSV text into a list. stolen wholesale from
250      * from Jakarta Turbine StrinUtils.java -mh
251      *
252      * @param text      The CSV list of values to split apart.
253      * @param separator The separator character.
254      * @return          The list of values.
255      */
256     public static String[] split(String text, String separator)
257     {
258         StringTokenizer st = new StringTokenizer(text, separator);
259         String[] values = new String[st.countTokens()];
260         int pos = 0;
261         while (st.hasMoreTokens())
262         {
263             values[pos++] = st.nextToken();
264         }
265         return values;
266     }
267
268     /**
269      * Joins the elements of the provided array into a single string
270      * containing a list of CSV elements. Stolen wholesale from Jakarta
271      * Turbine StringUtils.java. -mh
272      *
273      * @param list      The list of values to join together.
274      * @param separator The separator character.
275      * @return          The CSV text.
276      */
277     public static String join(String[] list, String separator)
278     {
279         StringBuffer csv = new StringBuffer();
280         for (int i = 0; i < list.length; i++)
281         {
282             if (i > 0)
283             {
284                 csv.append(separator);
285             }
286             csv.append(list[i]);
287         }
288         return csv.toString();
289     }
290
291
292         /**
293          * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
294          *
295          * @return gequoteter String
296          */
297          public static String quoteIfNecessary(String s) {
298                 for (int i = 0; i < s.length(); i++)
299                         if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
300                                 return quote(s, '"');
301                 return s;
302         }
303
304          /**
305          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
306          * "gefährliche" Zeichen innerhalb des Strings
307          * Quotes special SQL-characters in <code>s</code>
308          *
309          * @return geqoteter String
310          */
311         public static String quote(String s)
312         {
313                 //String s2 = quote(s, '\'');
314                 //Quickhack     ÃŠÃŠ ÃŠ ÃŠ ÃŠ ÃŠ ÃŠ ÃŠ
315                 //Because of '?-Bug in Postgresql-JDBC-Driver
316                 StringBuffer temp = new StringBuffer();
317                 for(int i=0;i<s.length();i++){
318                         if(s.charAt(i)=='\''){
319                                 temp.append("&#39;");
320                         } else {
321                                 temp.append(s.charAt(i));
322                         }
323                 }
324                 String s2 = temp.toString();
325                 //end Quickhack
326                 
327                 s2 = quote(s2, '\"');
328                 return s2;
329         }
330
331         /**
332          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
333          * "gefährliche" Zeichen innerhalb des Strings
334          *
335          * @param s String, der gequoted werden soll
336          * @param quoteChar zu quotendes Zeichen
337          * @return gequoteter String
338          */
339         public static String quote(String s, char quoteChar)
340         {
341                 StringBuffer buf = new StringBuffer(s.length());
342                 int pos = 0;
343                 while (pos < s.length()) {
344                         int i = s.indexOf(quoteChar, pos);
345                         if (i < 0) i = s.length();
346                         buf.append(s.substring(pos, i));
347                         pos = i;
348                         if (pos < s.length()) {
349                                 buf.append('\\');
350                                 buf.append(quoteChar);
351                                 pos++;
352                         }
353                 }
354                 return buf.toString();
355         }
356
357         /**
358          * replaces dangerous characters in <code>s</code>
359          *
360          */
361
362         public static String unquote(String s)
363         {
364                 char quoteChar='\'';
365                 StringBuffer buf = new StringBuffer(s.length());
366                 int pos = 0;
367                 String searchString = "\\"+quoteChar;
368                 while (pos < s.length()) {
369                         int i = s.indexOf(searchString, pos);
370                         if (i < 0) i = s.length();
371                         buf.append(s.substring(pos, i));
372                         pos = i+1;
373                 }
374                 return buf.toString();
375         }
376
377         /**
378          * Wandelet String in byte[] um.
379          * @param s
380          * @return byte[] des String
381          */
382
383         public static byte[] stringToBytes(String s) {
384                 String crlf = System.getProperty("line.separator");
385                 if (!crlf.equals("\n"))
386                         s = replace(s, "\n", crlf);
387                 // byte[] buf = new byte[s.length()];
388                 byte[] buf = s.getBytes();
389                 return buf;
390         }
391
392                 /**
393          * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
394          * @param s
395          * @param pattern
396          * @param substitute
397          * @return String mit den Ersetzungen
398          */
399         public static String replace(String s, String pattern, String substitute) {
400                 int i = 0, pLen = pattern.length(), sLen = substitute.length();
401                 StringBuffer buf = new StringBuffer(s.length());
402                 while (true) {
403                         int j = s.indexOf(pattern, i);
404                         if (j < 0) {
405                                 buf.append(s.substring(i));
406                                 break;
407                         } else {
408                                 buf.append(s.substring(i, j));
409                                 buf.append(substitute);
410                                 i = j+pLen;
411                         }
412                 }
413                 return buf.toString();
414         }
415
416         /**
417          * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
418          * @param s
419          * @param pattern
420          * @param substitute
421          * @return String mit den Ersetzungen
422          */
423         public static String regexpReplace(String haystack, String pattern, String substitute) {
424                 try {
425                         RE regex = new RE(pattern);
426                         return regex.substituteAll(haystack,substitute);
427                 } catch(REException ex){
428                         return null;
429                 }
430         }
431
432
433
434
435         /**
436          * Fügt einen Separator an den Pfad an
437          * @param path
438          * @return Pfad mit Separator am Ende
439          */
440         public static final String addSeparator (String path) {
441                 return  path.length() == 0 || path.endsWith(File.separator) ? path : path
442                                 + File.separatorChar;
443         }
444
445         /**
446          * Fügt ein <code>/</code> ans ende des Strings and
447          * @param path
448          * @return Pfad mit <code>/</code> am Ende
449          */
450         public static final String addSlash (String path) {
451                 return  path.length() == 0 || path.endsWith("/") ? path : path + '/';
452         }
453
454         /**
455          * Löscht <code>/</code> am Ende des Strings, falls vorhanden
456          * @param path
457          * @return String ohne <code>/</code> am Ende
458          */
459         public static final String removeSlash (String path) {
460                 return  path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
461                                 - 1) : path;
462         }
463
464         /**
465          * Checks to see if the path is absolute by looking for a leading file
466          * separater
467          * @param path
468          * @return
469          */
470         public static boolean isAbsolutePath (String path) {
471                 return  path.startsWith(File.separator);
472         }
473
474         /**
475          * Löscht Slash am Anfang des Strings
476          * @param path
477          * @return
478          */
479         public static String removeFirstSlash (String path) {
480                 return  path.startsWith("/") ? path.substring(1) : path;
481         }
482
483         /**
484          * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
485          * @return zwistellige Zahl
486          */
487         public static String pad2 (int number) {
488                 return  number < 10 ? "0" + number : String.valueOf(number);
489         }
490
491         /**
492          * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
493          *
494          * @return 3-stellige Zahl
495          */
496         public static String pad3 (int number) {
497                 return  number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
498         }
499
500         /**
501          * Konvertiert Unix-Linefeeds in Win-Linefeeds
502          * @param s
503          * @return Konvertierter String
504          */
505         public static String unixLineFeedsToWin(String s) {
506                 int i = -1;
507                 while (true) {
508                         i = s.indexOf('\n', i+1);
509                         if (i < 0) break;
510                         if ((i == 0 || s.charAt(i-1) != '\r') &&
511                                 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
512                                 s = s.substring(0, i)+'\r'+s.substring(i);
513                                 i++;
514                         }
515                 }
516                 return s;
517         }
518
519
520         /**
521          * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
522          * und Spaces werden zu Underscores
523          *
524          * @return gültige Url
525          */
526         public static String convert2url(String s) {
527                 s = toLowerCase(s);
528                 StringBuffer buf = new StringBuffer();
529                 for(int i = 0; i < s.length(); i++ ) {
530                                 switch( s.charAt( i ) ) {
531                                 case 'ö':
532                         buf.append( "oe" ); break;
533                                 case 'ä':
534                         buf.append( "ae" ); break;
535                                 case 'ü':
536                         buf.append( "ue" ); break;
537                                 case 'ã':
538                         buf.append( "a" ); break;
539                                 case '´':
540                                 case '.':
541                         buf.append( "_" ); break;
542                                 case ' ':
543                         if( buf.charAt( buf.length() - 1 ) != '_' ) {
544                                         buf.append( "_" );
545                         }
546                         break;
547                                 default:
548                         buf.append( s.charAt( i ) );
549                                 }
550                 }
551                 return buf.toString();
552         }
553
554
555         public static String decodeHTMLinTags(String s){
556                 StringBuffer buffer = new StringBuffer();
557                 boolean start = false;
558                 boolean stop = false;
559                 int startIndex = 0;
560                 int stopIndex = 0;
561                 int temp = 0;
562
563                 for(int i=0;i<s.length();i++){
564                         if(s.charAt(i)=='<'){
565                                 start = true;
566                                 startIndex = i;
567                         } else if(s.charAt(i)=='>'){
568                                 stop = true;
569                                 stopIndex = i;
570
571                                 if(start && stop){
572                                         buffer.append(s.substring(temp,startIndex));
573                                         buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
574                                         i= temp= stopIndex+1;
575                                         start= stop= false;
576                                 }
577                         }
578                 }
579                 if(stopIndex>0){
580                         buffer.append(s.substring(stopIndex+1));
581                         return buffer.toString();
582                 } else {
583                         return s;
584                 }
585         }
586
587         public static String replaceQuot(String s) {
588                 StringBuffer buffer = new StringBuffer();
589                 for(int j = 0; j < s.length();j++){
590                         if(s.charAt(j)=='&'){
591                                 if(s.indexOf( "&quot;",j) == j) {
592                                         buffer.append( "\"" );
593                                         j += 5;
594                                 }//if
595                         } else {
596                                 buffer.append(s.charAt(j));
597                         }//else
598                 }//for
599                 return buffer.toString();
600         }
601
602         /** wandelt Quotes in Sonderzeichen um
603          */
604         /**
605         public static String decodeHtml(String s) {
606                 StringBuffer buf = new StringBuffer();
607                 for(int i=0;i < s.length(); i++ ) {
608                         if( s.indexOf( "&ouml;", i ) == i ) {
609                                 buf.append( "ö" ); i += 5;
610                                 continue;
611                         }
612                         if( s.indexOf( "&auml;", i ) == i ) {
613                                 buf.append( "ä" ); i += 5;
614                                 continue;
615                         }
616                         if( s.indexOf( "&uuml;", i ) == i ) {
617                                 buf.append( "ü" ); i += 5;
618                                 continue;
619                         }
620                         if( s.indexOf( "&Ouml;", i ) == i ) {
621                                 buf.append( "Ö" ); i += 5;
622                                 continue;
623                         }
624                         if( s.indexOf( "&Auml;", i ) == i ) {
625                                 buf.append( "Ä" ); i += 5;
626                                 continue;
627                         }
628                         if( s.indexOf( "&Uuml;", i ) == i ) {
629                                 buf.append( "Ãœ" ); i += 5;
630                                 continue;
631                         }
632                         if( s.indexOf( "&szlig;", i ) == i ) {
633                                 buf.append( "ß" ); i += 6;
634                                 continue;
635                         }
636                         if( s.indexOf( "&quot;", i ) == i ) {
637                                 buf.append( "\"" ); i += 5;
638                                 continue;
639                         }
640                         buf.append( s.charAt(i) );
641                 }
642                 return buf.toString();
643         }
644          */
645
646         /**
647          * schnellere Variante der String.toLowerCase()-Routine
648          *
649          * @return String in Kleinbuchsten
650          */
651         public static String toLowerCase(String s) {
652                 int l = s.length();
653                 char[] a = new char[l];
654                 for (int i = 0; i < l; i++)
655                         a[i] = Character.toLowerCase(s.charAt(i));
656                 return new String(a);
657         }
658
659                 /**
660          * Findet <code>element</code> im String-Array <code>array</code>
661          * @param array
662          * @param element
663          * @return Fundstelle als int oder -1
664          */
665         public static int indexOf(String[] array, String element) {
666                 if (array != null)
667                         for (int i = 0; i < array.length; i++)
668                                 if (array[i].equals(element))
669                                         return i;
670                 return -1;
671         }
672
673         /**
674          * Testet auf Vorkommen von <code>element</code> in <code>array</code>
675          * @param array String-Array
676          * @param element
677          * @return true wenn <code>element</code> vorkommt, sonst false
678          */
679         public static boolean contains(String[] array, String element) {
680                 return indexOf(array, element) >= 0;
681         }
682
683                 /**
684          * Ermittelt CRC-Prüfsumme von String <code>s</code>
685          * @param s
686          * @return CRC-Prüfsumme
687          */
688         public static int getCRC(String s) {
689                 int h = 0;
690                 char val[] = s.toCharArray();
691                 int len = val.length;
692
693                 for (int i = 0 ; i < len; i++) {
694                         h &= 0x7fffffff;
695                         h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
696                 }
697
698                 return (h << 8) | (len & 0xff);
699         }
700
701                 /**
702          * Liefert Default-Wert def zurück, wenn String <code>s</code>
703          * kein Integer ist.
704          *
705          * @param s
706          * @param def
707          * @return geparster int aus s oder def
708          */
709         public static int parseInt(String s, int def) {
710                 if (s == null) return def;
711                 try {
712                         return Integer.parseInt(s);
713                 } catch (NumberFormatException e) {
714                         return def;
715                 }
716         }
717
718         /**
719          * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
720          * @param s
721          * @param def
722          * @return geparster float oder def
723          */
724         public static float parseFloat(String s, float def) {
725                 if (s == null) return def;
726                 try {
727                         return new Float(s).floatValue();
728                 } catch (NumberFormatException e) {
729                         return def;
730                 }
731         }
732
733                 /**
734          * Findet Ende eines Satzes in String <code>text</code>
735          * @param text
736          * @param startIndex
737          * @return index des Satzendes, oder -1
738          */
739         public static int findEndOfSentence(String text, int startIndex) {
740                  while (true) {
741                          int i = text.indexOf('.', startIndex);
742                          if (i < 0) return -1;
743                          if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
744                                         (i+1 >= text.length()
745                                         || text.charAt(i+1) == ' '
746                                         || text.charAt(i+1) == '\n'
747                                         || text.charAt(i+1) == '\t'))
748                                         return i+1;
749                          startIndex = i+1;
750                  }
751         }
752
753                 /**
754          * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
755          * @param text
756          * @param startIndex
757          * @return Index des Wortendes, oder -1
758          */
759         public static int findEndOfWord(String text, int startIndex) {
760                 int i = text.indexOf(' ', startIndex),
761                         j = text.indexOf('\n', startIndex);
762                 if (i < 0) i = text.length();
763                 if (j < 0) j = text.length();
764                 return Math.min(i, j);
765         }
766
767
768         /**
769          *  convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
770          *  in den html-tag <p>
771          *  nur sinnvoll, wenn text nicht im html-format eingegeben
772          */
773         public static String convertNewline2P(String haystack) {
774                         return re_brbr2p.substituteAll(haystack,"\n</p><p>");
775         }
776
777         /**
778          *  convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
779          *  in den html-tag <br>
780          *  nur sinnvoll, wenn text nicht im html-format eingegeben
781          */
782         public static String convertNewline2Break(String haystack) {
783                 return re_newline2br.substituteAll(haystack,"$0<br />");
784         }
785
786         /**
787          *  createMailLinks wandelt text im email-adressenformat
788          *  in einen klickbaren link um
789          *  nur sinnvoll, wenn text nicht im html-format eingegeben
790          */
791         public static String createMailLinks(String haystack) {
792                         return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
793         }
794
795
796         /**
797          *  createMailLinks wandelt text im email-adressenformat
798          *  in einen klickbaren link um
799          *  nur sinnvoll, wenn text nicht im html-format eingegeben
800          */
801         public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
802                 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
803         }
804
805
806         /**
807          *  createURLLinks wandelt text im url-format
808          *  in einen klickbaren link um
809          *  nur sinnvoll, wenn text nicht im html-format eingegeben
810          */
811         public static String createURLLinks(String haystack) {
812                 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
813         }
814
815         /**
816          * this routine takes text in url format and makes
817          * a clickaeble "<href>" link removing any "illegal" html tags
818          * @param haystack, the url
819          * @param title, the href link text
820          * @param imagRoot, the place to find icons
821          * @param extImage, the url of the icon to show next to the link
822          * @return a String containing the url
823          */
824         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
825                 if (title == null) {
826                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
827                 } else {
828                         title = removeHTMLTags(title);
829                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">"+title+"</a>");
830                 }
831         }
832
833         /**
834          * this routine takes text in url format and makes
835          * a clickaeble "<href>" link removing any "illegal" html tags
836          * @param haystack, the url
837          * @param imageRoot, the place to find icons
838          * @param extImage, the url of the icon to show next to the link
839          * @param intImage, unused
840          * @return a String containing the url
841          */
842         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
843                 return createURLLinks(haystack, title, imageRoot, extImage);
844         }
845
846          /**
847          *  deleteForbiddenTags
848          *  this method deletes all <script>, <body> and <head>-tags
849          */
850         public static final String deleteForbiddenTags(String haystack) {
851                 try {
852                         RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
853                         haystack = regex.substituteAll(haystack,"");
854                         regex = new RE("<head>(.*?)</head>");
855                         haystack = regex.substituteAll(haystack,"");
856                         regex = new RE("<[ \t\r\n/]*body(.*?)>");
857                         haystack = regex.substituteAll(haystack,"");
858                         return haystack;
859                 } catch(REException ex){
860                         return null;
861                 }
862         }
863
864         /**
865          * this method deletes all html tags
866          */
867         public static final String removeHTMLTags(String haystack){
868                         return re_tags.substituteAll(haystack,"");
869         }
870
871
872         /**
873          * this method deletes all but the approved tags html tags
874          * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
875          */
876         public static String approveHTMLTags(String haystack){
877                 try {
878                         String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
879                         String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
880                         String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
881
882                         // kill all the bad tags that have attributes
883                         String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
884                         RE regex = new RE(s,RE.REG_ICASE);
885                         haystack = regex.substituteAll(haystack,"");
886
887                         // kill all the bad tags that are attributeless
888                         regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
889                         haystack = regex.substituteAll(haystack,"");
890
891                         // kill all the tags which have a javascript attribute like onLoad
892                         regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
893                         haystack = regex.substituteAll(haystack,"");
894
895                         // kill all the tags which include a url to an unacceptable protocol
896                         regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
897                         haystack = regex.substituteAll(haystack,"");
898
899                         return haystack;
900                 } catch(REException ex){
901                         ex.printStackTrace();
902                         return null;
903                 }
904         }
905
906
907         /**
908          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
909          *  htmlcodierten string auf und returnt einen htmlcodierten String
910          */
911         public static String createHTML(String content){
912                 content=convertNewline2Break(content);
913                 content=convertNewline2P(content);
914                 content=createMailLinks(content);
915                 content=createURLLinks(content);
916                 return content;
917         }
918
919
920         /**
921          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
922          *  htmlcodierten string auf und returnt einen htmlcodierten String
923          */
924         public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
925                 content=convertNewline2Break(content);
926                 content=convertNewline2P(content);
927                 content=createMailLinks(content,producerDocRoot,mailImage);
928                 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
929                 return content;
930         }
931
932 }
933