bug fix...was far too late last night to be trying to count letters...
[mir.git] / source / mir / misc / StringUtil.java
1 /*
2  * put your module comment here
3  */
4
5
6 package  mir.misc;
7
8 import  java.io.*;
9 import  java.lang.*;
10 import  java.util.*;
11 import  gnu.regexp.*;
12
13 /**
14  * Statische Hilfsmethoden zur Stringbehandlung
15  *
16  * @version 29.6.99
17  * @author RK
18  */
19 public final class StringUtil {
20
21         private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
22
23         private StringUtil() { }  // this avoids contruction
24
25         static {
26                 try {
27                         //precompile regex
28                         re_newline2br = new RE("(\r?\n){1}");
29                         re_brbr2p     = new RE("(<br>\r?\n<br>){1,}");
30                         re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31                         re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32                         re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
33                 }
34                 catch (REException e){
35                         System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
36                 }
37         }
38
39
40         /**
41          * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
42          * @param theDate
43          * @return 8-ziffriger String (yyyymmdd)
44          */
45
46         public static final String date2webdbDate (GregorianCalendar theDate) {
47                 StringBuffer webdbDate = new StringBuffer();
48                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51                 return  webdbDate.toString();
52         }
53
54         /**
55          * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
56          * @param theDate
57          * @return 12-ziffriger String (yyyymmdd)
58          */
59
60         public static final String date2webdbDateTime (GregorianCalendar theDate) {
61                 StringBuffer webdbDate = new StringBuffer();
62                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67                 return  webdbDate.toString();
68         }
69
70         /**
71          * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyymmddThhmmssTZ)
72          * @param theDate
73          * @return w3approved datetime
74          */
75
76         public static final String date2w3DateTime (GregorianCalendar theDate) {
77                 StringBuffer webdbDate = new StringBuffer();
78                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
79                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
80                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
81                 webdbDate.append("T");
82                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
83                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
84                 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
85                 //assumes you are an hour-multiple away from UTC.... 
86                 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
87                 if (offset < 0){
88                 webdbDate.append("-");
89                 }
90                 else{
91                 webdbDate.append("+");
92                 }
93                 webdbDate.append(pad2(Math.abs(offset)));
94                 webdbDate.append("00");
95                 return  webdbDate.toString();
96         }
97
98         /**
99          * wandelt Calendar in dd.mm.yyyy / hh.mm um
100          * @param theDate
101          * @return String mit (dd.mm.yyyy / hh.mm um)
102          */
103         public static String date2readableDateTime (GregorianCalendar theDate) {
104                 String readable = "";
105                 int hour;
106                 readable += pad2(theDate.get(Calendar.DATE));
107                 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
108                 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
109                 hour = theDate.get(Calendar.HOUR);
110                 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
111                         hour += 12;
112                 readable += " / " + pad2(hour);
113                 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
114                 return  readable;
115         }
116
117         /**
118          * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
119          * getrennt ist.
120          *
121          * @param webdbDate
122          * @return String mit <code>/yyyy/mm/dd</code>
123          */
124         public static final String webdbDate2path (String webdbDate) {
125                 StringBuffer path = new StringBuffer();
126                 path.append("/").append(webdbDate.substring(0, 4));
127                 path.append("/").append(webdbDate.substring(4, 6));
128                 path.append("/");
129                 //who did this?
130                 //path.append("/").append(webdbDate.substring(6, 8));
131                 return  path.toString();
132         }
133
134         /**
135          * wandelt Calendar in dd.mm.yyyy um
136          *
137          * @param theDate
138          * @return String mit  <code>dd.mm.yyyy</code>
139          */
140         public static final String webdbDate2readableDate (String webdbDate) {
141                 String date = "";
142                 date += webdbDate.substring(6, 8);
143                 date += "." + webdbDate.substring(4, 6);
144                 date += "." + webdbDate.substring(0, 4);
145                 return  date;
146         }
147
148
149         /**
150          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
151          * to dd.mm.yyyy hh:mm
152          */
153         public static String dateToReadableDate(String date) {
154                 StringBuffer returnDate = new StringBuffer();
155                 if (date!=null) {
156
157                         returnDate.append(date.substring(8,10)).append('.');
158                         returnDate.append(date.substring(5,7)).append('.');
159                         returnDate.append(date.substring(0,4)).append(' ');
160                         returnDate.append(date.substring(11,16));
161                 }
162                 return returnDate.toString();
163         }
164         
165         /**
166          * converts string from format: yyyy-mm-dd__hh:mm:ss.dddddd+TZ
167          * to yyyymmddThhmmss+TZ:00 (w3 format for Dublin Core)
168          */
169         public static String webdbdateToDCDate(String date) {
170                 StringBuffer returnDate = new StringBuffer();
171                 if (date!=null) {
172
173                         returnDate.append(date.substring(0,4));
174                         returnDate.append(date.substring(5,7));
175                         returnDate.append(date.substring(8,10));
176                         returnDate.append("T");
177                         returnDate.append(date.substring(11,13));
178                         returnDate.append(date.substring(14,16));
179                         returnDate.append(date.substring(17,19));
180                         returnDate.append(date.substring(20,22));
181                         returnDate.append(":00");
182                 }
183                 return returnDate.toString();
184         }
185
186
187         /**
188          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
189          * to yyyy
190          */
191         public static String dateToYear (String date) {
192                 StringBuffer returnDate = new StringBuffer();
193                 if (date!=null) {
194
195                         returnDate.append(date.substring(0,4));
196                 }
197                 return returnDate.toString();
198         }
199
200         /**
201          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
202          * to [m]m
203          */
204         public static String dateToMonth (String date) {
205                 StringBuffer returnDate = new StringBuffer();
206                 if (date!=null) {
207                         if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
208                         else returnDate.append(date.substring(6,7));
209                 }
210                 return returnDate.toString();
211         }
212
213         /**
214          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
215          * to [d]d
216          */
217         public static String dateToDayOfMonth (String date) {
218                 StringBuffer returnDate = new StringBuffer();
219                 if (date!=null) {
220                         if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
221                         else returnDate.append(date.substring(9,10));
222                 }
223                 return returnDate.toString();
224         }
225
226         /**
227          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
228          * to hh:mm
229          */
230         public static String dateToTime (String date) {
231                 StringBuffer returnDate = new StringBuffer();
232                 if (date!=null) {
233                         returnDate.append(date.substring(11,16));
234                 }
235                 return returnDate.toString();
236         }
237
238     /**
239      * Splits the provided CSV text into a list. stolen wholesale from 
240      * from Jakarta Turbine StrinUtils.java -mh
241      *
242      * @param text      The CSV list of values to split apart.
243      * @param separator The separator character.
244      * @return          The list of values.
245      */
246     public static String[] split(String text, String separator)
247     {
248         StringTokenizer st = new StringTokenizer(text, separator);
249         String[] values = new String[st.countTokens()];
250         int pos = 0;
251         while (st.hasMoreTokens())
252         {
253             values[pos++] = st.nextToken();
254         }
255         return values;
256     }
257
258     /**
259      * Joins the elements of the provided array into a single string
260      * containing a list of CSV elements. Stolen wholesale from Jakarta
261      * Turbine StringUtils.java. -mh
262      *
263      * @param list      The list of values to join together.
264      * @param separator The separator character.
265      * @return          The CSV text.
266      */
267     public static String join(String[] list, String separator)
268     {
269         StringBuffer csv = new StringBuffer();
270         for (int i = 0; i < list.length; i++)
271         {
272             if (i > 0)
273             {
274                 csv.append(separator);
275             }
276             csv.append(list[i]);
277         }
278         return csv.toString();
279     }
280
281
282         /**
283          * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
284          *
285          * @return gequoteter String
286          */
287          public static String quoteIfNecessary(String s) {
288                 for (int i = 0; i < s.length(); i++)
289                         if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
290                                 return quote(s, '"');
291                 return s;
292         }
293
294          /**
295          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
296          * "gefährliche" Zeichen innerhalb des Strings
297          * Quotes special SQL-characters in <code>s</code>
298          *
299          * @return geqoteter String
300          */
301         public static String quote(String s)
302         {
303                 String s2 = quote(s, '\'');
304                 s2 = quote(s2, '\"');
305                 return s2;
306         }
307
308         /**
309          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
310          * "gefährliche" Zeichen innerhalb des Strings
311          *
312          * @param s String, der gequoted werden soll
313          * @param quoteChar zu quotendes Zeichen
314          * @return gequoteter String
315          */
316         public static String quote(String s, char quoteChar)
317         {
318                 StringBuffer buf = new StringBuffer(s.length());
319                 int pos = 0;
320                 while (pos < s.length()) {
321                         int i = s.indexOf(quoteChar, pos);
322                         if (i < 0) i = s.length();
323                         buf.append(s.substring(pos, i));
324                         pos = i;
325                         if (pos < s.length()) {
326                                 buf.append('\\');
327                                 buf.append(quoteChar);
328                                 pos++;
329                         }
330                 }
331                 return buf.toString();
332         }
333
334         /**
335          * replaces dangerous characters in <code>s</code>
336          *
337          */
338
339         public static String unquote(String s)
340         {
341                 char quoteChar='\'';
342                 StringBuffer buf = new StringBuffer(s.length());
343                 int pos = 0;
344                 String searchString = "\\"+quoteChar;
345                 while (pos < s.length()) {
346                         int i = s.indexOf(searchString, pos);
347                         if (i < 0) i = s.length();
348                         buf.append(s.substring(pos, i));
349                         pos = i+1;
350                 }
351                 return buf.toString();
352         }
353
354         /**
355          * Wandelet String in byte[] um.
356          * @param s
357          * @return byte[] des String
358          */
359
360         public static byte[] stringToBytes(String s) {
361                 String crlf = System.getProperty("line.separator");
362                 if (!crlf.equals("\n"))
363                         s = replace(s, "\n", crlf);
364                 // byte[] buf = new byte[s.length()];
365                 byte[] buf = s.getBytes();
366                 return buf;
367         }
368
369                 /**
370          * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
371          * @param s
372          * @param pattern
373          * @param substitute
374          * @return String mit den Ersetzungen
375          */
376         public static String replace(String s, String pattern, String substitute) {
377                 int i = 0, pLen = pattern.length(), sLen = substitute.length();
378                 StringBuffer buf = new StringBuffer(s.length());
379                 while (true) {
380                         int j = s.indexOf(pattern, i);
381                         if (j < 0) {
382                                 buf.append(s.substring(i));
383                                 break;
384                         } else {
385                                 buf.append(s.substring(i, j));
386                                 buf.append(substitute);
387                                 i = j+pLen;
388                         }
389                 }
390                 return buf.toString();
391         }
392
393         /**
394          * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
395          * @param s
396          * @param pattern
397          * @param substitute
398          * @return String mit den Ersetzungen
399          */
400         public static String regexpReplace(String haystack, String pattern, String substitute) {
401                 try {
402                         RE regex = new RE(pattern);
403                         return regex.substituteAll(haystack,substitute);
404                 } catch(REException ex){
405                         return null;
406                 }
407         }
408
409
410
411
412         /**
413          * Fügt einen Separator an den Pfad an
414          * @param path
415          * @return Pfad mit Separator am Ende
416          */
417         public static final String addSeparator (String path) {
418                 return  path.length() == 0 || path.endsWith(File.separator) ? path : path
419                                 + File.separatorChar;
420         }
421
422         /**
423          * Fügt ein <code>/</code> ans ende des Strings and
424          * @param path
425          * @return Pfad mit <code>/</code> am Ende
426          */
427         public static final String addSlash (String path) {
428                 return  path.length() == 0 || path.endsWith("/") ? path : path + '/';
429         }
430
431         /**
432          * Löscht <code>/</code> am Ende des Strings, falls vorhanden
433          * @param path
434          * @return String ohne <code>/</code> am Ende
435          */
436         public static final String removeSlash (String path) {
437                 return  path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
438                                 - 1) : path;
439         }
440
441         /**
442          * Checks to see if the path is absolute by looking for a leading file
443          * separater
444          * @param path
445          * @return
446          */
447         public static boolean isAbsolutePath (String path) {
448                 return  path.startsWith(File.separator);
449         }
450
451         /**
452          * Löscht Slash am Anfang des Strings
453          * @param path
454          * @return
455          */
456         public static String removeFirstSlash (String path) {
457                 return  path.startsWith("/") ? path.substring(1) : path;
458         }
459
460         /**
461          * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
462          * @return zwistellige Zahl
463          */
464         public static String pad2 (int number) {
465                 return  number < 10 ? "0" + number : String.valueOf(number);
466         }
467
468         /**
469          * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
470          *
471          * @return 3-stellige Zahl
472          */
473         public static String pad3 (int number) {
474                 return  number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
475         }
476
477         /**
478          * Konvertiert Unix-Linefeeds in Win-Linefeeds
479          * @param s
480          * @return Konvertierter String
481          */
482         public static String unixLineFeedsToWin(String s) {
483                 int i = -1;
484                 while (true) {
485                         i = s.indexOf('\n', i+1);
486                         if (i < 0) break;
487                         if ((i == 0 || s.charAt(i-1) != '\r') &&
488                                 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
489                                 s = s.substring(0, i)+'\r'+s.substring(i);
490                                 i++;
491                         }
492                 }
493                 return s;
494         }
495
496
497         /**
498          * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
499          * und Spaces werden zu Underscores
500          *
501          * @return gültige Url
502          */
503         public static String convert2url(String s) {
504                 s = toLowerCase(s);
505                 StringBuffer buf = new StringBuffer();
506                 for(int i = 0; i < s.length(); i++ ) {
507                                 switch( s.charAt( i ) ) {
508                                 case 'ö':
509                         buf.append( "oe" ); break;
510                                 case 'ä':
511                         buf.append( "ae" ); break;
512                                 case 'ü':
513                         buf.append( "ue" ); break;
514                                 case 'ã':
515                         buf.append( "a" ); break;
516                                 case '´':
517                                 case '.':
518                         buf.append( "_" ); break;
519                                 case ' ':
520                         if( buf.charAt( buf.length() - 1 ) != '_' ) {
521                                         buf.append( "_" );
522                         }
523                         break;
524                                 default:
525                         buf.append( s.charAt( i ) );
526                                 }
527                 }
528                 return buf.toString();
529         }
530
531         /**
532          * wandelt Sonderzeichen in Quotes um
533          *
534          * @return Kovertierter String
535          */
536         public static String encodeHtml(String s) {
537                 StringBuffer buf = new StringBuffer();
538                 for(int i=0;i < s.length(); i++ ) {
539
540                         /** @todo looks inefficient, to ask for index of every char, in
541                          *  case of failure it runs to the end.*/
542                         if (s.charAt(i)=='&') {
543                                 // convert html to xml-parsable representation
544                                 if( s.indexOf( "&ouml;", i ) == i ) {
545                                         buf.append( "&#246;" ); i += 5;
546                                         continue;
547                                 }
548                                 if( s.indexOf( "&auml;", i ) == i ) {
549                                         buf.append( "&#228;" ); i += 5;
550                                         continue;
551                                 }
552                                 if( s.indexOf( "&uuml;", i ) == i ) {
553                                         buf.append( "&#252;" ); i += 5;
554                                         continue;
555                                 }
556                                 if( s.indexOf( "&Ouml;", i ) == i ) {
557                                         buf.append( "&#214;" ); i += 5;
558                                         continue;
559                                 }
560                                 if( s.indexOf( "&Auml;", i ) == i ) {
561                                         buf.append( "&#196;" ); i += 5;
562                                         continue;
563                                 }
564                                 if( s.indexOf( "&Uuml;", i ) == i ) {
565                                         buf.append( "&#220;" ); i += 5;
566                                         continue;
567                                 }
568                                 if( s.indexOf( "&szlig;", i ) == i ) {
569                                         buf.append( "&#223;" ); i += 6;
570                                         continue;
571                                 }
572
573                                 /** @todo should only escape outside of tags */
574
575                                 if( s.indexOf( "&quot;", i ) == i ) {
576                                         buf.append( "&#223;" ); i += 5;
577                                         continue;
578                                 }
579                                 if( s.indexOf( "&ndash;", i ) == i ) {
580                                         buf.append( "&#8211;" ); i += 6;
581                                         continue;
582                                 }
583                                 if( s.indexOf( "&mdash;", i ) == i ) {
584                                         buf.append( "&#8212;" ); i += 6;
585                                         continue;
586                                 }
587                                 if( s.indexOf( "&ldquo;", i ) == i ) {
588                                         buf.append( "&#8220;" ); i += 6;
589                                         continue;
590                                 }
591                                 if( s.indexOf( "&rdquo;", i ) == i ) {
592                                         buf.append( "&#8221;" ); i += 6;
593                                         continue;
594                                 }
595                                 if( s.indexOf( "&bdquo;", i ) == i ) {
596                                         buf.append( "&#8222;" ); i += 6;
597                                         continue;
598                                 }
599
600                                 //looks pretty stupid
601                                 if( s.indexOf( "&lt;", i ) == i ) {
602                                         buf.append( "&lt;" ); i += 3;
603                                         continue;
604                                 }
605                                 if( s.indexOf( "&gt;", i ) == i ) {
606                                         buf.append( "&gt;" ); i += 3;
607                                         continue;
608                                 }
609                                 if( s.indexOf( "&acute;", i ) == i ) {
610                                         buf.append( "&acute;" ); i += 6;
611                                         continue;
612                                 }
613                                 if( s.indexOf( "&nbsp;", i ) == i ) {
614                                         buf.append( "&nbsp;" ); i += 5;
615                                         continue;
616                                 }
617                                 //has to be the last
618                                 //if( s.indexOf( "&", i ) == i ) {
619                                 //  buf.append( "&#38;" ); i += 0;
620                                 //  continue;
621                                 //}
622                         }
623                         // convert umlauts an other special charakters
624                         switch( s.charAt(i) ) {
625                                 case 'ö': buf.append( "&#246;" ); break;
626                                 case 'ä': buf.append( "&#228;" ); break;
627                                 case 'ü': buf.append( "&#252;" ); break;
628                                 case 'Ö': buf.append( "&#214;" ); break;
629                                 case 'Ä': buf.append( "&#196;" ); break;
630                                 case 'Ü': buf.append( "&#220;" ); break;
631                                 case 'ß': buf.append( "&#223;" ); break;
632                                 case 'é': buf.append( "&#233;" ); break;
633                                 case 'è': buf.append( "&#232;" ); break;
634                                 case 'á': buf.append( "&#225;" ); break;
635                                 case 'à': buf.append( "&#224;" ); break;
636                                 case 'â': buf.append( "&#226;" ); break;
637                                 case 'ã': buf.append( "&#227;" ); break;
638                                 case '¬': buf.append( "&#172;" ); break;
639                                 case '¹': buf.append( "&#185;" ); break;
640                                 case '²': buf.append( "&#178;" ); break;
641                                 case '³': buf.append( "&#179;" ); break;
642                                 case '¼': buf.append( "&#188;" ); break;
643                                 case '½': buf.append( "&#189;" ); break;
644                                 case '¾': buf.append( "&#190;" ); break;
645                                 case '¶': buf.append( "&#182;" ); break;
646                                 case 'æ': buf.append( "&#230;" ); break;
647                                 case 'ð': buf.append( "&#240;" ); break;
648                                 case '|': buf.append( "&#166;" ); break;
649                                 case '·': buf.append( "&#183;" ); break;
650                                 case '°': buf.append( "&#176;" ); break;
651                                 case '§': buf.append( "&#167;" ); break;
652                                 case 'ø': buf.append( "&#248;" ); break;
653                                 case 'ç': buf.append( "&#231;" ); break;
654                                 case '¤': buf.append( "&#164;" ); break;
655                                 case 'ª': buf.append( "&#170;" ); break;
656                                 case 'Ç': buf.append( "&#199;" ); break;
657                                 case 'Ã': buf.append( "&#195;" ); break;
658                                 case 'Â': buf.append( "&#194;" ); break;
659                                 case 'Æ': buf.append( "&#198;" ); break;
660                                 case '©': buf.append( "&#169;" ); break;
661                                 case '®': buf.append( "&#174;" ); break;
662                                 case '¥': buf.append( "&#165;" ); break;
663                                 case 'Þ': buf.append( "&#254;" ); break;
664                                 case '¯': buf.append( "&#175;" ); break;
665                                 case 'Ð': buf.append( "&#208;" ); break;
666                                 case 'º': buf.append( "&#186;" ); break;
667                                 case '¡': buf.append( "&#161;" ); break;
668                                 case '£': buf.append( "&#163;" ); break;
669                                 case '±': buf.append( "&#177;" ); break;
670                                 case '¿': buf.append( "&#191;" ); break;
671                                 case 'Ø': buf.append( "&#216;" ); break;
672                                 case 'Á': buf.append( "&#192;" ); break;
673                                 case 'À': buf.append( "&#193;" ); break;
674                                 case 'É': buf.append( "&#200;" ); break;
675                                 case 'È': buf.append( "&#201;" ); break;
676                                 case 'ù': buf.append( "&#250;" ); break;
677                                 case 'ñ': buf.append( "&#241;" ); break;
678                                 case 'Ñ': buf.append( "&#209;" ); break;
679                                 case 'µ': buf.append( "&#181;" ); break;
680                                 case 'Í': buf.append( "&#204;" ); break;
681                                 case 'Ì': buf.append( "&#205;" ); break;
682                                 case 'í': buf.append( "&#236;" ); break;
683                                 case 'ì': buf.append( "&#237;" ); break;
684                                 case 'î': buf.append( "&#238;" ); break;
685                                 case 'Î': buf.append( "&#206;" ); break;
686                                 case 'ó': buf.append( "&#243;" ); break;
687                                 case 'Ó': buf.append( "&#210;" ); break;
688                                 case 'ò': buf.append( "&#206;" ); break;
689                                 case 'Ò': buf.append( "&#211;" ); break;
690                                 case 'ô': buf.append( "&#244;" ); break;
691                                 case 'Ô': buf.append( "&#212;" ); break;
692                                 case 'õ': buf.append( "&#245;" ); break;
693                                 case 'Õ': buf.append( "&#213;" ); break;
694                                 case 'ý': buf.append( "&#253;" ); break;
695                                 case 'Ý': buf.append( "&#221;" ); break;
696                                 case 'û': buf.append( "&#251;" ); break;
697                                 case 'Û': buf.append( "&#219;" ); break;
698                                 case 'ú': buf.append( "&#249;" ); break;
699                                 case 'Ú': buf.append( "&#217;" ); break;
700                                 case 'Ù': buf.append( "&#218;" ); break;
701                                 case 'Ê': buf.append( "&#202;" ); break;
702                                 case 'ê': buf.append( "&#234;" ); break;
703                                 case 'å': buf.append( "&#229;" ); break;
704                                 case 'Å': buf.append( "&#197;" ); break;
705                                 case 'ë': buf.append( "&#235;" ); break;
706                                 case 'Ë': buf.append( "&#203;" ); break;
707                                 case 'ÿ': buf.append( "&#255;" ); break;
708                                 case 'ï': buf.append( "&#239;" ); break;
709                                 case 'Ï': buf.append( "&#207;" ); break;
710                                 case '«': buf.append( "&#171;" ); break;
711                                 case '»': buf.append( "&#187;" ); break;
712                                 case '\'': buf.append( "&acute;" ); break;
713                                 case '\"': buf.append( "&quot;" ); break;
714                                 //case '\u8211': buf.append( "&#8211;" ); break;
715                                 //case '\u8212': buf.append( "&#8212;" ); break;
716                                 //case '\u8220': buf.append( "&#8220;" ); break;
717                                 //case '\u8221': buf.append( "&#8221;" ); break;
718                                 //case '\u8222': buf.append( "&#8222;" ); break;
719                                 //case '\"': buf.append( "&#34;" ); break;
720                                 default: buf.append( s.charAt(i) );
721                         }
722
723                 }
724                 return buf.toString();
725         }
726
727
728         public static String decodeHTMLinTags(String s){
729                 StringBuffer buffer = new StringBuffer();
730                 boolean start = false;
731                 boolean stop = false;
732                 int startIndex = 0;
733                 int stopIndex = 0;
734                 int temp = 0;
735
736                 for(int i=0;i<s.length();i++){
737                         if(s.charAt(i)=='<'){
738                                 start = true;
739                                 startIndex = i;
740                         } else if(s.charAt(i)=='>'){
741                                 stop = true;
742                                 stopIndex = i;
743
744                                 if(start && stop){
745                                         buffer.append(s.substring(temp,startIndex));
746                                         buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
747                                         i= temp= stopIndex+1;
748                                         start= stop= false;
749                                 }
750                         }
751                 }
752                 if(stopIndex>0){
753                         buffer.append(s.substring(stopIndex+1));
754                         return buffer.toString();
755                 } else {
756                         return s;
757                 }
758         }
759
760         public static String replaceQuot(String s) {
761                 StringBuffer buffer = new StringBuffer();
762                 for(int j = 0; j < s.length();j++){
763                         if(s.charAt(j)=='&'){
764                                 if(s.indexOf( "&quot;",j) == j) {
765                                         buffer.append( "\"" );
766                                         j += 5;
767                                 }//if
768                         } else {
769                                 buffer.append(s.charAt(j));
770                         }//else
771                 }//for
772                 return buffer.toString();
773         }
774
775         /** wandelt Quotes in Sonderzeichen um
776          */
777         /**
778         public static String decodeHtml(String s) {
779                 StringBuffer buf = new StringBuffer();
780                 for(int i=0;i < s.length(); i++ ) {
781                         if( s.indexOf( "&ouml;", i ) == i ) {
782                                 buf.append( "ö" ); i += 5;
783                                 continue;
784                         }
785                         if( s.indexOf( "&auml;", i ) == i ) {
786                                 buf.append( "ä" ); i += 5;
787                                 continue;
788                         }
789                         if( s.indexOf( "&uuml;", i ) == i ) {
790                                 buf.append( "ü" ); i += 5;
791                                 continue;
792                         }
793                         if( s.indexOf( "&Ouml;", i ) == i ) {
794                                 buf.append( "Ö" ); i += 5;
795                                 continue;
796                         }
797                         if( s.indexOf( "&Auml;", i ) == i ) {
798                                 buf.append( "Ä" ); i += 5;
799                                 continue;
800                         }
801                         if( s.indexOf( "&Uuml;", i ) == i ) {
802                                 buf.append( "Ü" ); i += 5;
803                                 continue;
804                         }
805                         if( s.indexOf( "&szlig;", i ) == i ) {
806                                 buf.append( "ß" ); i += 6;
807                                 continue;
808                         }
809                         if( s.indexOf( "&quot;", i ) == i ) {
810                                 buf.append( "\"" ); i += 5;
811                                 continue;
812                         }
813                         buf.append( s.charAt(i) );
814                 }
815                 return buf.toString();
816         }
817          */
818
819         /**
820          * schnellere Variante der String.toLowerCase()-Routine
821          *
822          * @return String in Kleinbuchsten
823          */
824         public static String toLowerCase(String s) {
825                 int l = s.length();
826                 char[] a = new char[l];
827                 for (int i = 0; i < l; i++)
828                         a[i] = Character.toLowerCase(s.charAt(i));
829                 return new String(a);
830         }
831
832                 /**
833          * Findet <code>element</code> im String-Array <code>array</code>
834          * @param array
835          * @param element
836          * @return Fundstelle als int oder -1
837          */
838         public static int indexOf(String[] array, String element) {
839                 if (array != null)
840                         for (int i = 0; i < array.length; i++)
841                                 if (array[i].equals(element))
842                                         return i;
843                 return -1;
844         }
845
846         /**
847          * Testet auf Vorkommen von <code>element</code> in <code>array</code>
848          * @param array String-Array
849          * @param element
850          * @return true wenn <code>element</code> vorkommt, sonst false
851          */
852         public static boolean contains(String[] array, String element) {
853                 return indexOf(array, element) >= 0;
854         }
855
856                 /**
857          * Ermittelt CRC-Prüfsumme von String <code>s</code>
858          * @param s
859          * @return CRC-Prüfsumme
860          */
861         public static int getCRC(String s) {
862                 int h = 0;
863                 char val[] = s.toCharArray();
864                 int len = val.length;
865
866                 for (int i = 0 ; i < len; i++) {
867                         h &= 0x7fffffff;
868                         h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
869                 }
870
871                 return (h << 8) | (len & 0xff);
872         }
873
874                 /**
875          * Liefert Default-Wert def zurück, wenn String <code>s</code>
876          * kein Integer ist.
877          *
878          * @param s
879          * @param def
880          * @return geparster int aus s oder def
881          */
882         public static int parseInt(String s, int def) {
883                 if (s == null) return def;
884                 try {
885                         return Integer.parseInt(s);
886                 } catch (NumberFormatException e) {
887                         return def;
888                 }
889         }
890
891         /**
892          * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
893          * @param s
894          * @param def
895          * @return geparster float oder def
896          */
897         public static float parseFloat(String s, float def) {
898                 if (s == null) return def;
899                 try {
900                         return new Float(s).floatValue();
901                 } catch (NumberFormatException e) {
902                         return def;
903                 }
904         }
905
906                 /**
907          * Findet Ende eines Satzes in String <code>text</code>
908          * @param text
909          * @param startIndex
910          * @return index des Satzendes, oder -1
911          */
912         public static int findEndOfSentence(String text, int startIndex) {
913                  while (true) {
914                          int i = text.indexOf('.', startIndex);
915                          if (i < 0) return -1;
916                          if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
917                                         (i+1 >= text.length()
918                                         || text.charAt(i+1) == ' '
919                                         || text.charAt(i+1) == '\n'
920                                         || text.charAt(i+1) == '\t'))
921                                         return i+1;
922                          startIndex = i+1;
923                  }
924         }
925
926                 /**
927          * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
928          * @param text
929          * @param startIndex
930          * @return Index des Wortendes, oder -1
931          */
932         public static int findEndOfWord(String text, int startIndex) {
933                 int i = text.indexOf(' ', startIndex),
934                         j = text.indexOf('\n', startIndex);
935                 if (i < 0) i = text.length();
936                 if (j < 0) j = text.length();
937                 return Math.min(i, j);
938         }
939
940
941         /**
942          *  convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
943          *  in den html-tag <p>
944          *  nur sinnvoll, wenn text nicht im html-format eingegeben
945          */
946         public static String convertNewline2P(String haystack) {
947                         return re_brbr2p.substituteAll(haystack,"\n</p><p>");
948         }
949
950         /**
951          *  convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
952          *  in den html-tag <br>
953          *  nur sinnvoll, wenn text nicht im html-format eingegeben
954          */
955         public static String convertNewline2Break(String haystack) {
956                 return re_newline2br.substituteAll(haystack,"$0<br />");
957         }
958
959         /**
960          *  createMailLinks wandelt text im email-adressenformat
961          *  in einen klickbaren link um
962          *  nur sinnvoll, wenn text nicht im html-format eingegeben
963          */
964         public static String createMailLinks(String haystack) {
965                         return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
966         }
967
968
969         /**
970          *  createMailLinks wandelt text im email-adressenformat
971          *  in einen klickbaren link um
972          *  nur sinnvoll, wenn text nicht im html-format eingegeben
973          */
974         public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
975                 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
976         }
977
978
979         /**
980          *  createURLLinks wandelt text im url-format
981          *  in einen klickbaren link um
982          *  nur sinnvoll, wenn text nicht im html-format eingegeben
983          */
984         public static String createURLLinks(String haystack) {
985                 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
986         }
987
988         /**
989          * this routine takes text in url format and makes
990          * a clickaeble "<href>" link removing any "illegal" html tags
991          * @param haystack, the url
992          * @param title, the href link text
993          * @param imagRoot, the place to find icons
994          * @param extImage, the url of the icon to show next to the link
995          * @return a String containing the url
996          */
997         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
998                 if (title == null) {
999                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
1000                 } else {
1001                         title = removeHTMLTags(title);
1002                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">"+title+"</a>");
1003                 }
1004         }
1005
1006         /**
1007          * this routine takes text in url format and makes
1008          * a clickaeble "<href>" link removing any "illegal" html tags
1009          * @param haystack, the url
1010          * @param imageRoot, the place to find icons
1011          * @param extImage, the url of the icon to show next to the link
1012          * @param intImage, unused
1013          * @return a String containing the url
1014          */
1015         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
1016                 return createURLLinks(haystack, title, imageRoot, extImage);
1017         }
1018
1019          /**
1020          *  deleteForbiddenTags
1021          *  this method deletes all <script>, <body> and <head>-tags
1022          */
1023         public static final String deleteForbiddenTags(String haystack) {
1024                 try {
1025                         RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
1026                         haystack = regex.substituteAll(haystack,"");
1027                         regex = new RE("<head>(.*?)</head>");
1028                         haystack = regex.substituteAll(haystack,"");
1029                         regex = new RE("<[ \t\r\n/]*body(.*?)>");
1030                         haystack = regex.substituteAll(haystack,"");
1031                         return haystack;
1032                 } catch(REException ex){
1033                         return null;
1034                 }
1035         }
1036
1037         /**
1038          * this method deletes all html tags
1039          */
1040         public static final String removeHTMLTags(String haystack){
1041                         return re_tags.substituteAll(haystack,"");
1042         }
1043
1044
1045         /**
1046          * this method deletes all but the approved tags html tags
1047          * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
1048          */
1049         public static String approveHTMLTags(String haystack){
1050                 try {
1051                         String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
1052                         String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
1053                         String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
1054
1055                         // kill all the bad tags that have attributes
1056                         String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
1057                         RE regex = new RE(s,RE.REG_ICASE);
1058                         haystack = regex.substituteAll(haystack,"");
1059
1060                         // kill all the bad tags that are attributeless
1061                         regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
1062                         haystack = regex.substituteAll(haystack,"");
1063
1064                         // kill all the tags which have a javascript attribute like onLoad
1065                         regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
1066                         haystack = regex.substituteAll(haystack,"");
1067
1068                         // kill all the tags which include a url to an unacceptable protocol
1069                         regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
1070                         haystack = regex.substituteAll(haystack,"");
1071
1072                         return haystack;
1073                 } catch(REException ex){
1074                         ex.printStackTrace();
1075                         return null;
1076                 }
1077         }
1078
1079
1080         /**
1081          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1082          *  htmlcodierten string auf und returnt einen htmlcodierten String
1083          */
1084         public static String createHTML(String content){
1085                 content=convertNewline2Break(content);
1086                 content=convertNewline2P(content);
1087                 content=createMailLinks(content);
1088                 content=createURLLinks(content);
1089                 return content;
1090         }
1091
1092
1093         /**
1094          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1095          *  htmlcodierten string auf und returnt einen htmlcodierten String
1096          */
1097         public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
1098                 content=convertNewline2Break(content);
1099                 content=convertNewline2P(content);
1100                 content=createMailLinks(content,producerDocRoot,mailImage);
1101                 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
1102                 return content;
1103         }
1104
1105 }
1106