first cut of merge of STABLE-pre1_0 into HEAD. I won't even guarantee that it
[mir.git] / source / mir / misc / StringUtil.java
1 /*
2  * put your module comment here
3  */
4
5
6 package  mir.misc;
7
8 import  java.io.*;
9 import  java.lang.*;
10 import  java.util.*;
11 import  gnu.regexp.*;
12
13 /**
14  * Statische Hilfsmethoden zur Stringbehandlung
15  *
16  * @version 29.6.99
17  * @author RK
18  */
19 public final class StringUtil {
20
21         private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
22
23         private StringUtil() { }  // this avoids contruction
24
25         static {
26                 try {
27                         //precompile regex
28                         re_newline2br = new RE("(\r?\n){1}");
29                         re_brbr2p     = new RE("(<br>\r?\n<br>){1,}");
30                         re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31                         re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32                         re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
33                 }
34                 catch (REException e){
35                         System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
36                 }
37         }
38
39
40         /**
41          * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
42          * @param theDate
43          * @return 8-ziffriger String (yyyymmdd)
44          */
45
46         public static final String date2webdbDate (GregorianCalendar theDate) {
47                 StringBuffer webdbDate = new StringBuffer();
48                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51                 return  webdbDate.toString();
52         }
53
54         /**
55          * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
56          * @param theDate
57          * @return 12-ziffriger String (yyyymmdd)
58          */
59
60         public static final String date2webdbDateTime (GregorianCalendar theDate) {
61                 StringBuffer webdbDate = new StringBuffer();
62                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67                 return  webdbDate.toString();
68         }
69
70         /**
71          * wandelt Calendar in dd.mm.yyyy / hh.mm um
72          * @param theDate
73          * @return String mit (dd.mm.yyyy / hh.mm um)
74          */
75         public static String date2readableDateTime (GregorianCalendar theDate) {
76                 String readable = "";
77                 int hour;
78                 readable += pad2(theDate.get(Calendar.DATE));
79                 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
80                 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
81                 hour = theDate.get(Calendar.HOUR);
82                 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
83                         hour += 12;
84                 readable += " / " + pad2(hour);
85                 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
86                 return  readable;
87         }
88
89         /**
90          * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
91          * getrennt ist.
92          *
93          * @param webdbDate
94          * @return String mit <code>/yyyy/mm/dd</code>
95          */
96         public static final String webdbDate2path (String webdbDate) {
97                 StringBuffer path = new StringBuffer();
98                 path.append("/").append(webdbDate.substring(0, 4));
99                 path.append("/").append(webdbDate.substring(4, 6));
100                 path.append("/");
101                 //who did this?
102                 //path.append("/").append(webdbDate.substring(6, 8));
103                 return  path.toString();
104         }
105
106         /**
107          * wandelt Calendar in dd.mm.yyyy um
108          *
109          * @param theDate
110          * @return String mit  <code>dd.mm.yyyy</code>
111          */
112         public static final String webdbDate2readableDate (String webdbDate) {
113                 String date = "";
114                 date += webdbDate.substring(6, 8);
115                 date += "." + webdbDate.substring(4, 6);
116                 date += "." + webdbDate.substring(0, 4);
117                 return  date;
118         }
119
120
121         /**
122          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
123          * to dd.mm.yyyy hh:mm
124          */
125         public static String dateToReadableDate(String date) {
126                 StringBuffer returnDate = new StringBuffer();
127                 if (date!=null) {
128
129                         returnDate.append(date.substring(8,10)).append('.');
130                         returnDate.append(date.substring(5,7)).append('.');
131                         returnDate.append(date.substring(0,4)).append(' ');
132                         returnDate.append(date.substring(11,16));
133                 }
134                 return returnDate.toString();
135         }
136
137         /**
138          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
139          * to yyyy
140          */
141         public static String dateToYear (String date) {
142                 StringBuffer returnDate = new StringBuffer();
143                 if (date!=null) {
144
145                         returnDate.append(date.substring(0,4));
146                 }
147                 return returnDate.toString();
148         }
149
150         /**
151          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
152          * to [m]m
153          */
154         public static String dateToMonth (String date) {
155                 StringBuffer returnDate = new StringBuffer();
156                 if (date!=null) {
157                         if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
158                         else returnDate.append(date.substring(6,7));
159                 }
160                 return returnDate.toString();
161         }
162
163         /**
164          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
165          * to [d]d
166          */
167         public static String dateToDayOfMonth (String date) {
168                 StringBuffer returnDate = new StringBuffer();
169                 if (date!=null) {
170                         if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
171                         else returnDate.append(date.substring(9,10));
172                 }
173                 return returnDate.toString();
174         }
175
176         /**
177          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
178          * to hh:mm
179          */
180         public static String dateToTime (String date) {
181                 StringBuffer returnDate = new StringBuffer();
182                 if (date!=null) {
183                         returnDate.append(date.substring(11,16));
184                 }
185                 return returnDate.toString();
186         }
187
188     /**
189      * Splits the provided CSV text into a list. stolen wholesale from 
190      * from Jakarta Turbine StrinUtils.java -mh
191      *
192      * @param text      The CSV list of values to split apart.
193      * @param separator The separator character.
194      * @return          The list of values.
195      */
196     public static String[] split(String text, String separator)
197     {
198         StringTokenizer st = new StringTokenizer(text, separator);
199         String[] values = new String[st.countTokens()];
200         int pos = 0;
201         while (st.hasMoreTokens())
202         {
203             values[pos++] = st.nextToken();
204         }
205         return values;
206     }
207
208     /**
209      * Joins the elements of the provided array into a single string
210      * containing a list of CSV elements. Stolen wholesale from Jakarta
211      * Turbine StringUtils.java. -mh
212      *
213      * @param list      The list of values to join together.
214      * @param separator The separator character.
215      * @return          The CSV text.
216      */
217     public static String join(String[] list, String separator)
218     {
219         StringBuffer csv = new StringBuffer();
220         for (int i = 0; i < list.length; i++)
221         {
222             if (i > 0)
223             {
224                 csv.append(separator);
225             }
226             csv.append(list[i]);
227         }
228         return csv.toString();
229     }
230
231
232         /**
233          * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
234          *
235          * @return gequoteter String
236          */
237          public static String quoteIfNecessary(String s) {
238                 for (int i = 0; i < s.length(); i++)
239                         if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
240                                 return quote(s, '"');
241                 return s;
242         }
243
244          /**
245          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
246          * "gefährliche" Zeichen innerhalb des Strings
247          * Quotes special SQL-characters in <code>s</code>
248          *
249          * @return geqoteter String
250          */
251         public static String quote(String s)
252         {
253                 //String s2 = quote(s, '\'');
254
255                 //Quickhack
256                 //Because of '?-Bug in Postgresql-JDBC-Driver
257                 StringBuffer temp = new StringBuffer();
258                 for(int i=0;i<s.length();i++){
259                         if(s.charAt(i)=='\''){
260                                 temp.append("&acute;");
261                         } else {
262                                 temp.append(s.charAt(i));
263                         }
264                 }
265                 String s2 = temp.toString();
266                 //Quickhack end
267
268                 s2 = quote(s2, '\"');
269                 return s2;
270         }
271
272         /**
273          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
274          * "gefährliche" Zeichen innerhalb des Strings
275          *
276          * @param s String, der gequoted werden soll
277          * @param quoteChar zu quotendes Zeichen
278          * @return gequoteter String
279          */
280         public static String quote(String s, char quoteChar)
281         {
282                 StringBuffer buf = new StringBuffer(s.length());
283                 int pos = 0;
284                 while (pos < s.length()) {
285                         int i = s.indexOf(quoteChar, pos);
286                         if (i < 0) i = s.length();
287                         buf.append(s.substring(pos, i));
288                         pos = i;
289                         if (pos < s.length()) {
290                                 buf.append('\\');
291                                 buf.append(quoteChar);
292                                 pos++;
293                         }
294                 }
295                 return buf.toString();
296         }
297
298         /**
299          * replaces dangerous characters in <code>s</code>
300          *
301          */
302
303         public static String unquote(String s)
304         {
305                 char quoteChar='\'';
306                 StringBuffer buf = new StringBuffer(s.length());
307                 int pos = 0;
308                 String searchString = "\\"+quoteChar;
309                 while (pos < s.length()) {
310                         int i = s.indexOf(searchString, pos);
311                         if (i < 0) i = s.length();
312                         buf.append(s.substring(pos, i));
313                         pos = i+1;
314                 }
315                 return buf.toString();
316         }
317
318         /**
319          * Wandelet String in byte[] um.
320          * @param s
321          * @return byte[] des String
322          */
323
324         public static byte[] stringToBytes(String s) {
325                 String crlf = System.getProperty("line.separator");
326                 if (!crlf.equals("\n"))
327                         s = replace(s, "\n", crlf);
328                 // byte[] buf = new byte[s.length()];
329                 byte[] buf = s.getBytes();
330                 return buf;
331         }
332
333                 /**
334          * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
335          * @param s
336          * @param pattern
337          * @param substitute
338          * @return String mit den Ersetzungen
339          */
340         public static String replace(String s, String pattern, String substitute) {
341                 int i = 0, pLen = pattern.length(), sLen = substitute.length();
342                 StringBuffer buf = new StringBuffer(s.length());
343                 while (true) {
344                         int j = s.indexOf(pattern, i);
345                         if (j < 0) {
346                                 buf.append(s.substring(i));
347                                 break;
348                         } else {
349                                 buf.append(s.substring(i, j));
350                                 buf.append(substitute);
351                                 i = j+pLen;
352                         }
353                 }
354                 return buf.toString();
355         }
356
357         /**
358          * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
359          * @param s
360          * @param pattern
361          * @param substitute
362          * @return String mit den Ersetzungen
363          */
364         public static String regexpReplace(String haystack, String pattern, String substitute) {
365                 try {
366                         RE regex = new RE(pattern);
367                         return regex.substituteAll(haystack,substitute);
368                 } catch(REException ex){
369                         return null;
370                 }
371         }
372
373
374
375
376         /**
377          * Fügt einen Separator an den Pfad an
378          * @param path
379          * @return Pfad mit Separator am Ende
380          */
381         public static final String addSeparator (String path) {
382                 return  path.length() == 0 || path.endsWith(File.separator) ? path : path
383                                 + File.separatorChar;
384         }
385
386         /**
387          * Fügt ein <code>/</code> ans ende des Strings and
388          * @param path
389          * @return Pfad mit <code>/</code> am Ende
390          */
391         public static final String addSlash (String path) {
392                 return  path.length() == 0 || path.endsWith("/") ? path : path + '/';
393         }
394
395         /**
396          * Löscht <code>/</code> am Ende des Strings, falls vorhanden
397          * @param path
398          * @return String ohne <code>/</code> am Ende
399          */
400         public static final String removeSlash (String path) {
401                 return  path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
402                                 - 1) : path;
403         }
404
405         /**
406          * Checks to see if the path is absolute by looking for a leading file
407          * separater
408          * @param path
409          * @return
410          */
411         public static boolean isAbsolutePath (String path) {
412                 return  path.startsWith(File.separator);
413         }
414
415         /**
416          * Löscht Slash am Anfang des Strings
417          * @param path
418          * @return
419          */
420         public static String removeFirstSlash (String path) {
421                 return  path.startsWith("/") ? path.substring(1) : path;
422         }
423
424         /**
425          * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
426          * @return zwistellige Zahl
427          */
428         public static String pad2 (int number) {
429                 return  number < 10 ? "0" + number : String.valueOf(number);
430         }
431
432         /**
433          * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
434          *
435          * @return 3-stellige Zahl
436          */
437         public static String pad3 (int number) {
438                 return  number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
439         }
440
441         /**
442          * Konvertiert Unix-Linefeeds in Win-Linefeeds
443          * @param s
444          * @return Konvertierter String
445          */
446         public static String unixLineFeedsToWin(String s) {
447                 int i = -1;
448                 while (true) {
449                         i = s.indexOf('\n', i+1);
450                         if (i < 0) break;
451                         if ((i == 0 || s.charAt(i-1) != '\r') &&
452                                 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
453                                 s = s.substring(0, i)+'\r'+s.substring(i);
454                                 i++;
455                         }
456                 }
457                 return s;
458         }
459
460
461         /**
462          * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
463          * und Spaces werden zu Underscores
464          *
465          * @return gültige Url
466          */
467         public static String convert2url(String s) {
468                 s = toLowerCase(s);
469                 StringBuffer buf = new StringBuffer();
470                 for(int i = 0; i < s.length(); i++ ) {
471                                 switch( s.charAt( i ) ) {
472                                 case 'ö':
473                         buf.append( "oe" ); break;
474                                 case 'ä':
475                         buf.append( "ae" ); break;
476                                 case 'ü':
477                         buf.append( "ue" ); break;
478                                 case 'ã':
479                         buf.append( "a" ); break;
480                                 case '´':
481                                 case '.':
482                         buf.append( "_" ); break;
483                                 case ' ':
484                         if( buf.charAt( buf.length() - 1 ) != '_' ) {
485                                         buf.append( "_" );
486                         }
487                         break;
488                                 default:
489                         buf.append( s.charAt( i ) );
490                                 }
491                 }
492                 return buf.toString();
493         }
494
495         /**
496          * wandelt Sonderzeichen in Quotes um
497          *
498          * @return Kovertierter String
499          */
500         public static String encodeHtml(String s) {
501                 StringBuffer buf = new StringBuffer();
502                 for(int i=0;i < s.length(); i++ ) {
503
504                         /** @todo looks inefficient, to ask for index of every char, in
505                          *  case of failure it runs to the end.*/
506                         if (s.charAt(i)=='&') {
507                                 // convert html to xml-parsable representation
508                                 if( s.indexOf( "&ouml;", i ) == i ) {
509                                         buf.append( "&#246;" ); i += 5;
510                                         continue;
511                                 }
512                                 if( s.indexOf( "&auml;", i ) == i ) {
513                                         buf.append( "&#228;" ); i += 5;
514                                         continue;
515                                 }
516                                 if( s.indexOf( "&uuml;", i ) == i ) {
517                                         buf.append( "&#252;" ); i += 5;
518                                         continue;
519                                 }
520                                 if( s.indexOf( "&Ouml;", i ) == i ) {
521                                         buf.append( "&#214;" ); i += 5;
522                                         continue;
523                                 }
524                                 if( s.indexOf( "&Auml;", i ) == i ) {
525                                         buf.append( "&#196;" ); i += 5;
526                                         continue;
527                                 }
528                                 if( s.indexOf( "&Uuml;", i ) == i ) {
529                                         buf.append( "&#220;" ); i += 5;
530                                         continue;
531                                 }
532                                 if( s.indexOf( "&szlig;", i ) == i ) {
533                                         buf.append( "&#223;" ); i += 6;
534                                         continue;
535                                 }
536
537                                 /** @todo should only escape outside of tags */
538
539                                 if( s.indexOf( "&quot;", i ) == i ) {
540                                         buf.append( "&#223;" ); i += 5;
541                                         continue;
542                                 }
543                                 if( s.indexOf( "&ndash;", i ) == i ) {
544                                         buf.append( "&#8211;" ); i += 6;
545                                         continue;
546                                 }
547                                 if( s.indexOf( "&mdash;", i ) == i ) {
548                                         buf.append( "&#8212;" ); i += 6;
549                                         continue;
550                                 }
551                                 if( s.indexOf( "&ldquo;", i ) == i ) {
552                                         buf.append( "&#8220;" ); i += 6;
553                                         continue;
554                                 }
555                                 if( s.indexOf( "&rdquo;", i ) == i ) {
556                                         buf.append( "&#8221;" ); i += 6;
557                                         continue;
558                                 }
559                                 if( s.indexOf( "&bdquo;", i ) == i ) {
560                                         buf.append( "&#8222;" ); i += 6;
561                                         continue;
562                                 }
563
564                                 //looks pretty stupid
565                                 if( s.indexOf( "&lt;", i ) == i ) {
566                                         buf.append( "&lt;" ); i += 3;
567                                         continue;
568                                 }
569                                 if( s.indexOf( "&gt;", i ) == i ) {
570                                         buf.append( "&gt;" ); i += 3;
571                                         continue;
572                                 }
573                                 if( s.indexOf( "&acute;", i ) == i ) {
574                                         buf.append( "&acute;" ); i += 6;
575                                         continue;
576                                 }
577                                 if( s.indexOf( "&nbsp;", i ) == i ) {
578                                         buf.append( "&nbsp;" ); i += 5;
579                                         continue;
580                                 }
581                                 //has to be the last
582                                 //if( s.indexOf( "&", i ) == i ) {
583                                 //  buf.append( "&#38;" ); i += 0;
584                                 //  continue;
585                                 //}
586                         }
587                         // convert umlauts an other special charakters
588                         switch( s.charAt(i) ) {
589                                 case 'ö': buf.append( "&#246;" ); break;
590                                 case 'ä': buf.append( "&#228;" ); break;
591                                 case 'ü': buf.append( "&#252;" ); break;
592                                 case 'Ö': buf.append( "&#214;" ); break;
593                                 case 'Ä': buf.append( "&#196;" ); break;
594                                 case 'Ü': buf.append( "&#220;" ); break;
595                                 case 'ß': buf.append( "&#223;" ); break;
596                                 case 'é': buf.append( "&#233;" ); break;
597                                 case 'è': buf.append( "&#232;" ); break;
598                                 case 'á': buf.append( "&#225;" ); break;
599                                 case 'à': buf.append( "&#224;" ); break;
600                                 case 'â': buf.append( "&#226;" ); break;
601                                 case 'ã': buf.append( "&#227;" ); break;
602                                 case '¬': buf.append( "&#172;" ); break;
603                                 case '¹': buf.append( "&#185;" ); break;
604                                 case '²': buf.append( "&#178;" ); break;
605                                 case '³': buf.append( "&#179;" ); break;
606                                 case '¼': buf.append( "&#188;" ); break;
607                                 case '½': buf.append( "&#189;" ); break;
608                                 case '¾': buf.append( "&#190;" ); break;
609                                 case '¶': buf.append( "&#182;" ); break;
610                                 case 'æ': buf.append( "&#230;" ); break;
611                                 case 'ð': buf.append( "&#240;" ); break;
612                                 case '|': buf.append( "&#166;" ); break;
613                                 case '·': buf.append( "&#183;" ); break;
614                                 case '°': buf.append( "&#176;" ); break;
615                                 case '§': buf.append( "&#167;" ); break;
616                                 case 'ø': buf.append( "&#248;" ); break;
617                                 case 'ç': buf.append( "&#231;" ); break;
618                                 case '¤': buf.append( "&#164;" ); break;
619                                 case 'ª': buf.append( "&#170;" ); break;
620                                 case 'Ç': buf.append( "&#199;" ); break;
621                                 case 'Ã': buf.append( "&#195;" ); break;
622                                 case 'Â': buf.append( "&#194;" ); break;
623                                 case 'Æ': buf.append( "&#198;" ); break;
624                                 case '©': buf.append( "&#169;" ); break;
625                                 case '®': buf.append( "&#174;" ); break;
626                                 case '¥': buf.append( "&#165;" ); break;
627                                 case 'Þ': buf.append( "&#254;" ); break;
628                                 case '¯': buf.append( "&#175;" ); break;
629                                 case 'Ð': buf.append( "&#208;" ); break;
630                                 case 'º': buf.append( "&#186;" ); break;
631                                 case '¡': buf.append( "&#161;" ); break;
632                                 case '£': buf.append( "&#163;" ); break;
633                                 case '±': buf.append( "&#177;" ); break;
634                                 case '¿': buf.append( "&#191;" ); break;
635                                 case 'Ø': buf.append( "&#216;" ); break;
636                                 case 'Á': buf.append( "&#192;" ); break;
637                                 case 'À': buf.append( "&#193;" ); break;
638                                 case 'É': buf.append( "&#200;" ); break;
639                                 case 'È': buf.append( "&#201;" ); break;
640                                 case 'ù': buf.append( "&#250;" ); break;
641                                 case 'ñ': buf.append( "&#241;" ); break;
642                                 case 'Ñ': buf.append( "&#209;" ); break;
643                                 case 'µ': buf.append( "&#181;" ); break;
644                                 case 'Í': buf.append( "&#204;" ); break;
645                                 case 'Ì': buf.append( "&#205;" ); break;
646                                 case 'í': buf.append( "&#236;" ); break;
647                                 case 'ì': buf.append( "&#237;" ); break;
648                                 case 'î': buf.append( "&#238;" ); break;
649                                 case 'Î': buf.append( "&#206;" ); break;
650                                 case 'ó': buf.append( "&#243;" ); break;
651                                 case 'Ó': buf.append( "&#210;" ); break;
652                                 case 'ò': buf.append( "&#206;" ); break;
653                                 case 'Ò': buf.append( "&#211;" ); break;
654                                 case 'ô': buf.append( "&#244;" ); break;
655                                 case 'Ô': buf.append( "&#212;" ); break;
656                                 case 'õ': buf.append( "&#245;" ); break;
657                                 case 'Õ': buf.append( "&#213;" ); break;
658                                 case 'ý': buf.append( "&#253;" ); break;
659                                 case 'Ý': buf.append( "&#221;" ); break;
660                                 case 'û': buf.append( "&#251;" ); break;
661                                 case 'Û': buf.append( "&#219;" ); break;
662                                 case 'ú': buf.append( "&#249;" ); break;
663                                 case 'Ú': buf.append( "&#217;" ); break;
664                                 case 'Ù': buf.append( "&#218;" ); break;
665                                 case 'Ê': buf.append( "&#202;" ); break;
666                                 case 'ê': buf.append( "&#234;" ); break;
667                                 case 'å': buf.append( "&#229;" ); break;
668                                 case 'Å': buf.append( "&#197;" ); break;
669                                 case 'ë': buf.append( "&#235;" ); break;
670                                 case 'Ë': buf.append( "&#203;" ); break;
671                                 case 'ÿ': buf.append( "&#255;" ); break;
672                                 case 'ï': buf.append( "&#239;" ); break;
673                                 case 'Ï': buf.append( "&#207;" ); break;
674                                 case '«': buf.append( "&#171;" ); break;
675                                 case '»': buf.append( "&#187;" ); break;
676                                 case '\'': buf.append( "&acute;" ); break;
677                                 case '\"': buf.append( "&quot;" ); break;
678                                 //case '\u8211': buf.append( "&#8211;" ); break;
679                                 //case '\u8212': buf.append( "&#8212;" ); break;
680                                 //case '\u8220': buf.append( "&#8220;" ); break;
681                                 //case '\u8221': buf.append( "&#8221;" ); break;
682                                 //case '\u8222': buf.append( "&#8222;" ); break;
683                                 //case '\"': buf.append( "&#34;" ); break;
684                                 default: buf.append( s.charAt(i) );
685                         }
686
687                 }
688                 return buf.toString();
689         }
690
691
692         public static String decodeHTMLinTags(String s){
693                 StringBuffer buffer = new StringBuffer();
694                 boolean start = false;
695                 boolean stop = false;
696                 int startIndex = 0;
697                 int stopIndex = 0;
698                 int temp = 0;
699
700                 for(int i=0;i<s.length();i++){
701                         if(s.charAt(i)=='<'){
702                                 start = true;
703                                 startIndex = i;
704                         } else if(s.charAt(i)=='>'){
705                                 stop = true;
706                                 stopIndex = i;
707
708                                 if(start && stop){
709                                         buffer.append(s.substring(temp,startIndex));
710                                         buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
711                                         i= temp= stopIndex+1;
712                                         start= stop= false;
713                                 }
714                         }
715                 }
716                 if(stopIndex>0){
717                         buffer.append(s.substring(stopIndex+1));
718                         return buffer.toString();
719                 } else {
720                         return s;
721                 }
722         }
723
724         public static String replaceQuot(String s) {
725                 StringBuffer buffer = new StringBuffer();
726                 for(int j = 0; j < s.length();j++){
727                         if(s.charAt(j)=='&'){
728                                 if(s.indexOf( "&quot;",j) == j) {
729                                         buffer.append( "\"" );
730                                         j += 5;
731                                 }//if
732                         } else {
733                                 buffer.append(s.charAt(j));
734                         }//else
735                 }//for
736                 return buffer.toString();
737         }
738
739         /** wandelt Quotes in Sonderzeichen um
740          */
741         /**
742         public static String decodeHtml(String s) {
743                 StringBuffer buf = new StringBuffer();
744                 for(int i=0;i < s.length(); i++ ) {
745                         if( s.indexOf( "&ouml;", i ) == i ) {
746                                 buf.append( "ö" ); i += 5;
747                                 continue;
748                         }
749                         if( s.indexOf( "&auml;", i ) == i ) {
750                                 buf.append( "ä" ); i += 5;
751                                 continue;
752                         }
753                         if( s.indexOf( "&uuml;", i ) == i ) {
754                                 buf.append( "ü" ); i += 5;
755                                 continue;
756                         }
757                         if( s.indexOf( "&Ouml;", i ) == i ) {
758                                 buf.append( "Ö" ); i += 5;
759                                 continue;
760                         }
761                         if( s.indexOf( "&Auml;", i ) == i ) {
762                                 buf.append( "Ä" ); i += 5;
763                                 continue;
764                         }
765                         if( s.indexOf( "&Uuml;", i ) == i ) {
766                                 buf.append( "Ü" ); i += 5;
767                                 continue;
768                         }
769                         if( s.indexOf( "&szlig;", i ) == i ) {
770                                 buf.append( "ß" ); i += 6;
771                                 continue;
772                         }
773                         if( s.indexOf( "&quot;", i ) == i ) {
774                                 buf.append( "\"" ); i += 5;
775                                 continue;
776                         }
777                         buf.append( s.charAt(i) );
778                 }
779                 return buf.toString();
780         }
781          */
782
783         /**
784          * schnellere Variante der String.toLowerCase()-Routine
785          *
786          * @return String in Kleinbuchsten
787          */
788         public static String toLowerCase(String s) {
789                 int l = s.length();
790                 char[] a = new char[l];
791                 for (int i = 0; i < l; i++)
792                         a[i] = Character.toLowerCase(s.charAt(i));
793                 return new String(a);
794         }
795
796                 /**
797          * Findet <code>element</code> im String-Array <code>array</code>
798          * @param array
799          * @param element
800          * @return Fundstelle als int oder -1
801          */
802         public static int indexOf(String[] array, String element) {
803                 if (array != null)
804                         for (int i = 0; i < array.length; i++)
805                                 if (array[i].equals(element))
806                                         return i;
807                 return -1;
808         }
809
810         /**
811          * Testet auf Vorkommen von <code>element</code> in <code>array</code>
812          * @param array String-Array
813          * @param element
814          * @return true wenn <code>element</code> vorkommt, sonst false
815          */
816         public static boolean contains(String[] array, String element) {
817                 return indexOf(array, element) >= 0;
818         }
819
820                 /**
821          * Ermittelt CRC-Prüfsumme von String <code>s</code>
822          * @param s
823          * @return CRC-Prüfsumme
824          */
825         public static int getCRC(String s) {
826                 int h = 0;
827                 char val[] = s.toCharArray();
828                 int len = val.length;
829
830                 for (int i = 0 ; i < len; i++) {
831                         h &= 0x7fffffff;
832                         h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
833                 }
834
835                 return (h << 8) | (len & 0xff);
836         }
837
838                 /**
839          * Liefert Default-Wert def zurück, wenn String <code>s</code>
840          * kein Integer ist.
841          *
842          * @param s
843          * @param def
844          * @return geparster int aus s oder def
845          */
846         public static int parseInt(String s, int def) {
847                 if (s == null) return def;
848                 try {
849                         return Integer.parseInt(s);
850                 } catch (NumberFormatException e) {
851                         return def;
852                 }
853         }
854
855         /**
856          * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
857          * @param s
858          * @param def
859          * @return geparster float oder def
860          */
861         public static float parseFloat(String s, float def) {
862                 if (s == null) return def;
863                 try {
864                         return new Float(s).floatValue();
865                 } catch (NumberFormatException e) {
866                         return def;
867                 }
868         }
869
870                 /**
871          * Findet Ende eines Satzes in String <code>text</code>
872          * @param text
873          * @param startIndex
874          * @return index des Satzendes, oder -1
875          */
876         public static int findEndOfSentence(String text, int startIndex) {
877                  while (true) {
878                          int i = text.indexOf('.', startIndex);
879                          if (i < 0) return -1;
880                          if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
881                                         (i+1 >= text.length()
882                                         || text.charAt(i+1) == ' '
883                                         || text.charAt(i+1) == '\n'
884                                         || text.charAt(i+1) == '\t'))
885                                         return i+1;
886                          startIndex = i+1;
887                  }
888         }
889
890                 /**
891          * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
892          * @param text
893          * @param startIndex
894          * @return Index des Wortendes, oder -1
895          */
896         public static int findEndOfWord(String text, int startIndex) {
897                 int i = text.indexOf(' ', startIndex),
898                         j = text.indexOf('\n', startIndex);
899                 if (i < 0) i = text.length();
900                 if (j < 0) j = text.length();
901                 return Math.min(i, j);
902         }
903
904
905         /**
906          *  convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
907          *  in den html-tag <p>
908          *  nur sinnvoll, wenn text nicht im html-format eingegeben
909          */
910         public static String convertNewline2P(String haystack) {
911                         return re_brbr2p.substituteAll(haystack,"\n</p><p>");
912         }
913
914         /**
915          *  convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
916          *  in den html-tag <br>
917          *  nur sinnvoll, wenn text nicht im html-format eingegeben
918          */
919         public static String convertNewline2Break(String haystack) {
920                 return re_newline2br.substituteAll(haystack,"$0<br>");
921         }
922
923         /**
924          *  createMailLinks wandelt text im email-adressenformat
925          *  in einen klickbaren link um
926          *  nur sinnvoll, wenn text nicht im html-format eingegeben
927          */
928         public static String createMailLinks(String haystack) {
929                         return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
930         }
931
932
933         /**
934          *  createMailLinks wandelt text im email-adressenformat
935          *  in einen klickbaren link um
936          *  nur sinnvoll, wenn text nicht im html-format eingegeben
937          */
938         public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
939                 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
940         }
941
942
943         /**
944          *  createURLLinks wandelt text im url-format
945          *  in einen klickbaren link um
946          *  nur sinnvoll, wenn text nicht im html-format eingegeben
947          */
948         public static String createURLLinks(String haystack) {
949                 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
950         }
951
952         /**
953          * this routine takes text in url format and makes
954          * a clickaeble "<href>" link removing any "illegal" html tags
955          * @param haystack, the url
956          * @param title, the href link text
957          * @param imagRoot, the place to find icons
958          * @param extImage, the url of the icon to show next to the link
959          * @return a String containing the url
960          */
961         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
962                 if (title == null) {
963                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
964                 } else {
965                         title = removeHTMLTags(title);
966                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">"+title+"</a>");
967                 }
968         }
969
970         /**
971          * this routine takes text in url format and makes
972          * a clickaeble "<href>" link removing any "illegal" html tags
973          * @param haystack, the url
974          * @param imageRoot, the place to find icons
975          * @param extImage, the url of the icon to show next to the link
976          * @param intImage, unused
977          * @return a String containing the url
978          */
979         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
980                 return createURLLinks(haystack, title, imageRoot, extImage);
981         }
982
983         /**
984          * this routine takes text in url format and makes
985          * an image link removing any "illegal" html tags
986          * @param haystack, the url
987          * @param title, the image alt text, can be null
988          * @param height, height of the image
989          * @param width, width of the image
990          * @return a String containing the url
991          */
992         public static String createIMGLinks(String haystack, String title,
993                                         String height,String width) {
994         String wh="";
995         if ( (height != null) && (width != null) ) 
996         {
997             wh = "width=\""+width+"\" height=\""+height+"\""; 
998         }
999                 if (title != null) {
1000                         title = removeHTMLTags(title);
1001                         return re_url.substituteAll(haystack,
1002                                         "<img hspace=\"10\" vspace=\"6\" "+
1003                                         "align=\"left\" src=\"$0\" "+wh+
1004                                         " alt=\""+title+"\"/>&#160;<br><i>"+
1005                                         title+"</i>");
1006                 } else {
1007                         return re_url.substituteAll(haystack,
1008                                         "<img hspace=\"10\" vspace=\"6\" "+
1009                                         "align=\"left\" src=\"$0\" "+wh+
1010                                         " alt=\"\"/>&#160;");
1011                 }
1012         }
1013
1014
1015          /**
1016          *  deleteForbiddenTags
1017          *  this method deletes all <script>, <body> and <head>-tags
1018          */
1019         public static final String deleteForbiddenTags(String haystack) {
1020                 try {
1021                         RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
1022                         haystack = regex.substituteAll(haystack,"");
1023                         regex = new RE("<head>(.*?)</head>");
1024                         haystack = regex.substituteAll(haystack,"");
1025                         regex = new RE("<[ \t\r\n/]*body(.*?)>");
1026                         haystack = regex.substituteAll(haystack,"");
1027                         return haystack;
1028                 } catch(REException ex){
1029                         return null;
1030                 }
1031         }
1032
1033         /**
1034          * this method deletes all html tags
1035          */
1036         public static final String removeHTMLTags(String haystack){
1037                         return re_tags.substituteAll(haystack,"");
1038         }
1039
1040
1041         /**
1042          * this method deletes all but the approved tags html tags
1043          * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
1044          */
1045         public static String approveHTMLTags(String haystack){
1046                 try {
1047                         String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
1048                         String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
1049                         String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
1050
1051                         // kill all the bad tags that have attributes
1052                         String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
1053                         RE regex = new RE(s,RE.REG_ICASE);
1054                         haystack = regex.substituteAll(haystack,"");
1055
1056                         // kill all the bad tags that are attributeless
1057                         regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
1058                         haystack = regex.substituteAll(haystack,"");
1059
1060                         // kill all the tags which have a javascript attribute like onLoad
1061                         regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
1062                         haystack = regex.substituteAll(haystack,"");
1063
1064                         // kill all the tags which include a url to an unacceptable protocol
1065                         regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
1066                         haystack = regex.substituteAll(haystack,"");
1067
1068                         return haystack;
1069                 } catch(REException ex){
1070                         ex.printStackTrace();
1071                         return null;
1072                 }
1073         }
1074
1075
1076         /**
1077          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1078          *  htmlcodierten string auf und returnt einen htmlcodierten String
1079          */
1080         public static String createHTML(String content){
1081                 content=convertNewline2Break(content);
1082                 content=convertNewline2P(content);
1083                 content=createMailLinks(content);
1084                 content=createURLLinks(content);
1085                 return content;
1086         }
1087
1088
1089         /**
1090          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1091          *  htmlcodierten string auf und returnt einen htmlcodierten String
1092          */
1093         public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
1094                 content=convertNewline2Break(content);
1095                 content=convertNewline2P(content);
1096                 content=createMailLinks(content,producerDocRoot,mailImage);
1097                 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
1098                 return content;
1099         }
1100
1101 }
1102