fixed the method which takes a webdb_date to a dublin core date to include
[mir.git] / source / mir / misc / StringUtil.java
1 /*
2  * put your module comment here
3  */
4
5
6 package  mir.misc;
7
8 import  java.io.*;
9 import  java.lang.*;
10 import  java.util.*;
11 import  gnu.regexp.*;
12
13 /**
14  * Statische Hilfsmethoden zur Stringbehandlung
15  *
16  * @version 29.6.99
17  * @author RK
18  */
19 public final class StringUtil {
20
21         private static RE   re_newline2br, re_brbr2p, re_mail, re_url, re_tags;
22
23         private StringUtil() { }  // this avoids contruction
24
25         static {
26                 try {
27                         //precompile regex
28                         re_newline2br = new RE("(\r?\n){1}");
29                         re_brbr2p     = new RE("(<br>\r?\n<br>){1,}");
30                         re_mail       = new RE("([a-zA-Z0-9_.-]+)@([a-zA-Z0-9_-]+).([a-zA-Z0-9_.-]+)");
31                         re_url        = new RE("((https://)|(http://)|(ftp://)){1}([a-zA-Z0-9_-]+).([a-zA-Z0-9_.:-]+)/?([^ \t\r\n<>\\)\\]]+[^ \t\r\n.,<>\\)\\]])");
32                         re_tags       = new RE("<[^>]*>",RE.REG_ICASE);
33                 }
34                 catch (REException e){
35                         System.err.println("FATAL: StringUtil: could not precompile REGEX: "+e.toString());
36                 }
37         }
38
39
40         /**
41          * Wandelt Datum in einen 8-ziffrigen String um (yyyymmdd)
42          * @param theDate
43          * @return 8-ziffriger String (yyyymmdd)
44          */
45
46         public static final String date2webdbDate (GregorianCalendar theDate) {
47                 StringBuffer webdbDate = new StringBuffer();
48                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
49                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
50                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
51                 return  webdbDate.toString();
52         }
53
54         /**
55          * Wandelt Calendar in einen 12-ziffrigen String um (yyyymmddhhmm)
56          * @param theDate
57          * @return 12-ziffriger String (yyyymmdd)
58          */
59
60         public static final String date2webdbDateTime (GregorianCalendar theDate) {
61                 StringBuffer webdbDate = new StringBuffer();
62                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
63                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
64                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
65                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
66                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
67                 return  webdbDate.toString();
68         }
69
70         /**
71          * Return a http://www.w3.org/TR/NOTE-datetime formatted date (yyyy-mm-ddThh:mm:ssTZ)
72          * @param theDate
73          * @return w3approved datetime
74          */
75
76         public static final String date2w3DateTime (GregorianCalendar theDate) {
77                 StringBuffer webdbDate = new StringBuffer();
78                 webdbDate.append(String.valueOf(theDate.get(Calendar.YEAR)));
79                 webdbDate.append("-");
80                 webdbDate.append(pad2(theDate.get(Calendar.MONTH) + 1));
81                 webdbDate.append("-");
82                 webdbDate.append(pad2(theDate.get(Calendar.DATE)));
83                 webdbDate.append("T");
84                 webdbDate.append(pad2(theDate.get(Calendar.HOUR)));
85                 webdbDate.append(":");
86                 webdbDate.append(pad2(theDate.get(Calendar.MINUTE)));
87                 webdbDate.append(":");
88                 webdbDate.append(pad2(theDate.get(Calendar.SECOND)));
89                 //assumes you are an hour-multiple away from UTC.... 
90                 int offset=(theDate.get(Calendar.ZONE_OFFSET)/(60*60*1000));
91                 if (offset < 0){
92                 webdbDate.append("-");
93                 }
94                 else{
95                 webdbDate.append("+");
96                 }
97                 webdbDate.append(pad2(Math.abs(offset)));
98                 webdbDate.append(":00");
99                 return  webdbDate.toString();
100         }
101
102         /**
103          * wandelt Calendar in dd.mm.yyyy / hh.mm um
104          * @param theDate
105          * @return String mit (dd.mm.yyyy / hh.mm um)
106          */
107         public static String date2readableDateTime (GregorianCalendar theDate) {
108                 String readable = "";
109                 int hour;
110                 readable += pad2(theDate.get(Calendar.DATE));
111                 readable += "." + pad2(theDate.get(Calendar.MONTH) + 1);
112                 readable += "." + String.valueOf(theDate.get(Calendar.YEAR));
113                 hour = theDate.get(Calendar.HOUR);
114                 if (theDate.get(Calendar.AM_PM) == Calendar.PM)
115                         hour += 12;
116                 readable += " / " + pad2(hour);
117                 readable += ":" + pad2(theDate.get(Calendar.MINUTE));
118                 return  readable;
119         }
120
121         /**
122          * wandelt eine Datum in einen 8-buchstabigen String, der durch <code>/</code>
123          * getrennt ist.
124          *
125          * @param webdbDate
126          * @return String mit <code>/yyyy/mm/dd</code>
127          */
128         public static final String webdbDate2path (String webdbDate) {
129                 StringBuffer path = new StringBuffer();
130                 path.append("/").append(webdbDate.substring(0, 4));
131                 path.append("/").append(webdbDate.substring(4, 6));
132                 path.append("/");
133                 //who did this?
134                 //path.append("/").append(webdbDate.substring(6, 8));
135                 return  path.toString();
136         }
137
138         /**
139          * wandelt Calendar in dd.mm.yyyy um
140          *
141          * @param theDate
142          * @return String mit  <code>dd.mm.yyyy</code>
143          */
144         public static final String webdbDate2readableDate (String webdbDate) {
145                 String date = "";
146                 date += webdbDate.substring(6, 8);
147                 date += "." + webdbDate.substring(4, 6);
148                 date += "." + webdbDate.substring(0, 4);
149                 return  date;
150         }
151
152
153         /**
154          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
155          * to dd.mm.yyyy hh:mm
156          */
157         public static String dateToReadableDate(String date) {
158                 StringBuffer returnDate = new StringBuffer();
159                 if (date!=null) {
160
161                         returnDate.append(date.substring(8,10)).append('.');
162                         returnDate.append(date.substring(5,7)).append('.');
163                         returnDate.append(date.substring(0,4)).append(' ');
164                         returnDate.append(date.substring(11,16));
165                 }
166                 return returnDate.toString();
167         }
168         
169         /**
170          * converts string from format: yyyy-mm-dd__hh:mm:ss.dddddd+TZ
171          * to yyyy-mm-ddThh:mm:ss+TZ:00 (w3 format for Dublin Core)
172          */
173         public static String webdbdateToDCDate(String date) {
174                 StringBuffer returnDate = new StringBuffer();
175                 if (date!=null) {
176
177                         returnDate.append(date.substring(0,10));
178                         returnDate.append("T");
179                         returnDate.append(date.substring(11,19));
180                         String tzInfo=date.substring(26,29);
181                         if (tzInfo.equals("+00")){
182                             //UTC gets a special code in w3 dates
183                             returnDate.append("Z");
184                         }
185                         else{
186                             //need to see what a newfoundland postgres 
187                             //timestamp looks like before making this robust
188                             returnDate.append(tzInfo);
189                             returnDate.append(":00");
190                         }
191                         
192                 }
193                 return returnDate.toString();
194         }
195
196
197         /**
198          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
199          * to yyyy
200          */
201         public static String dateToYear (String date) {
202                 StringBuffer returnDate = new StringBuffer();
203                 if (date!=null) {
204
205                         returnDate.append(date.substring(0,4));
206                 }
207                 return returnDate.toString();
208         }
209
210         /**
211          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
212          * to [m]m
213          */
214         public static String dateToMonth (String date) {
215                 StringBuffer returnDate = new StringBuffer();
216                 if (date!=null) {
217                         if (!date.substring(5,6).equalsIgnoreCase("0")) returnDate.append(date.substring(5,7));
218                         else returnDate.append(date.substring(6,7));
219                 }
220                 return returnDate.toString();
221         }
222
223         /**
224          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
225          * to [d]d
226          */
227         public static String dateToDayOfMonth (String date) {
228                 StringBuffer returnDate = new StringBuffer();
229                 if (date!=null) {
230                         if (!date.substring(8,9).equalsIgnoreCase("0")) returnDate.append(date.substring(8,10));
231                         else returnDate.append(date.substring(9,10));
232                 }
233                 return returnDate.toString();
234         }
235
236         /**
237          * converts string from format: yyyy-mm-dd__hh:mm:ss.d
238          * to hh:mm
239          */
240         public static String dateToTime (String date) {
241                 StringBuffer returnDate = new StringBuffer();
242                 if (date!=null) {
243                         returnDate.append(date.substring(11,16));
244                 }
245                 return returnDate.toString();
246         }
247
248     /**
249      * Splits the provided CSV text into a list. stolen wholesale from 
250      * from Jakarta Turbine StrinUtils.java -mh
251      *
252      * @param text      The CSV list of values to split apart.
253      * @param separator The separator character.
254      * @return          The list of values.
255      */
256     public static String[] split(String text, String separator)
257     {
258         StringTokenizer st = new StringTokenizer(text, separator);
259         String[] values = new String[st.countTokens()];
260         int pos = 0;
261         while (st.hasMoreTokens())
262         {
263             values[pos++] = st.nextToken();
264         }
265         return values;
266     }
267
268     /**
269      * Joins the elements of the provided array into a single string
270      * containing a list of CSV elements. Stolen wholesale from Jakarta
271      * Turbine StringUtils.java. -mh
272      *
273      * @param list      The list of values to join together.
274      * @param separator The separator character.
275      * @return          The CSV text.
276      */
277     public static String join(String[] list, String separator)
278     {
279         StringBuffer csv = new StringBuffer();
280         for (int i = 0; i < list.length; i++)
281         {
282             if (i > 0)
283             {
284                 csv.append(separator);
285             }
286             csv.append(list[i]);
287         }
288         return csv.toString();
289     }
290
291
292         /**
293          * schließt einen String in Anführungsszeichen ein, falls er Leerzeichen o.ä. enthält
294          *
295          * @return gequoteter String
296          */
297          public static String quoteIfNecessary(String s) {
298                 for (int i = 0; i < s.length(); i++)
299                         if (!(Character.isLetterOrDigit(s.charAt(i)) || s.charAt(i) == '.'))
300                                 return quote(s, '"');
301                 return s;
302         }
303
304          /**
305          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
306          * "gefährliche" Zeichen innerhalb des Strings
307          * Quotes special SQL-characters in <code>s</code>
308          *
309          * @return geqoteter String
310          */
311         public static String quote(String s)
312         {
313                 String s2 = quote(s, '\'');
314                 s2 = quote(s2, '\"');
315                 return s2;
316         }
317
318         /**
319          * schließt <code>s</code> in <code>'</code> ein und setzt Backslashes vor
320          * "gefährliche" Zeichen innerhalb des Strings
321          *
322          * @param s String, der gequoted werden soll
323          * @param quoteChar zu quotendes Zeichen
324          * @return gequoteter String
325          */
326         public static String quote(String s, char quoteChar)
327         {
328                 StringBuffer buf = new StringBuffer(s.length());
329                 int pos = 0;
330                 while (pos < s.length()) {
331                         int i = s.indexOf(quoteChar, pos);
332                         if (i < 0) i = s.length();
333                         buf.append(s.substring(pos, i));
334                         pos = i;
335                         if (pos < s.length()) {
336                                 buf.append('\\');
337                                 buf.append(quoteChar);
338                                 pos++;
339                         }
340                 }
341                 return buf.toString();
342         }
343
344         /**
345          * replaces dangerous characters in <code>s</code>
346          *
347          */
348
349         public static String unquote(String s)
350         {
351                 char quoteChar='\'';
352                 StringBuffer buf = new StringBuffer(s.length());
353                 int pos = 0;
354                 String searchString = "\\"+quoteChar;
355                 while (pos < s.length()) {
356                         int i = s.indexOf(searchString, pos);
357                         if (i < 0) i = s.length();
358                         buf.append(s.substring(pos, i));
359                         pos = i+1;
360                 }
361                 return buf.toString();
362         }
363
364         /**
365          * Wandelet String in byte[] um.
366          * @param s
367          * @return byte[] des String
368          */
369
370         public static byte[] stringToBytes(String s) {
371                 String crlf = System.getProperty("line.separator");
372                 if (!crlf.equals("\n"))
373                         s = replace(s, "\n", crlf);
374                 // byte[] buf = new byte[s.length()];
375                 byte[] buf = s.getBytes();
376                 return buf;
377         }
378
379                 /**
380          * Ersetzt in String <code>s</code> das <code>pattern</code> durch <code>substitute</code>
381          * @param s
382          * @param pattern
383          * @param substitute
384          * @return String mit den Ersetzungen
385          */
386         public static String replace(String s, String pattern, String substitute) {
387                 int i = 0, pLen = pattern.length(), sLen = substitute.length();
388                 StringBuffer buf = new StringBuffer(s.length());
389                 while (true) {
390                         int j = s.indexOf(pattern, i);
391                         if (j < 0) {
392                                 buf.append(s.substring(i));
393                                 break;
394                         } else {
395                                 buf.append(s.substring(i, j));
396                                 buf.append(substitute);
397                                 i = j+pLen;
398                         }
399                 }
400                 return buf.toString();
401         }
402
403         /**
404          * Ersetzt in String <code>s</code> das Regexp <code>pattern</code> durch <code>substitute</code>
405          * @param s
406          * @param pattern
407          * @param substitute
408          * @return String mit den Ersetzungen
409          */
410         public static String regexpReplace(String haystack, String pattern, String substitute) {
411                 try {
412                         RE regex = new RE(pattern);
413                         return regex.substituteAll(haystack,substitute);
414                 } catch(REException ex){
415                         return null;
416                 }
417         }
418
419
420
421
422         /**
423          * Fügt einen Separator an den Pfad an
424          * @param path
425          * @return Pfad mit Separator am Ende
426          */
427         public static final String addSeparator (String path) {
428                 return  path.length() == 0 || path.endsWith(File.separator) ? path : path
429                                 + File.separatorChar;
430         }
431
432         /**
433          * Fügt ein <code>/</code> ans ende des Strings and
434          * @param path
435          * @return Pfad mit <code>/</code> am Ende
436          */
437         public static final String addSlash (String path) {
438                 return  path.length() == 0 || path.endsWith("/") ? path : path + '/';
439         }
440
441         /**
442          * Löscht <code>/</code> am Ende des Strings, falls vorhanden
443          * @param path
444          * @return String ohne <code>/</code> am Ende
445          */
446         public static final String removeSlash (String path) {
447                 return  path.length() > 1 && path.endsWith("/") ? path.substring(0, path.length()
448                                 - 1) : path;
449         }
450
451         /**
452          * Checks to see if the path is absolute by looking for a leading file
453          * separater
454          * @param path
455          * @return
456          */
457         public static boolean isAbsolutePath (String path) {
458                 return  path.startsWith(File.separator);
459         }
460
461         /**
462          * Löscht Slash am Anfang des Strings
463          * @param path
464          * @return
465          */
466         public static String removeFirstSlash (String path) {
467                 return  path.startsWith("/") ? path.substring(1) : path;
468         }
469
470         /**
471          * formatiert eine Zahl (0-99) zweistellig (z.B. 5 -> 05)
472          * @return zwistellige Zahl
473          */
474         public static String pad2 (int number) {
475                 return  number < 10 ? "0" + number : String.valueOf(number);
476         }
477
478         /**
479          * formatiert eine Zahl (0-999) dreistellig (z.B. 7 -> 007)
480          *
481          * @return 3-stellige Zahl
482          */
483         public static String pad3 (int number) {
484                 return  number < 10 ? "00" + number : number < 100 ? "0" + number : String.valueOf(number);
485         }
486
487         /**
488          * Konvertiert Unix-Linefeeds in Win-Linefeeds
489          * @param s
490          * @return Konvertierter String
491          */
492         public static String unixLineFeedsToWin(String s) {
493                 int i = -1;
494                 while (true) {
495                         i = s.indexOf('\n', i+1);
496                         if (i < 0) break;
497                         if ((i == 0 || s.charAt(i-1) != '\r') &&
498                                 (i == s.length()-1 || s.charAt(i+1) != '\r')) {
499                                 s = s.substring(0, i)+'\r'+s.substring(i);
500                                 i++;
501                         }
502                 }
503                 return s;
504         }
505
506
507         /**
508          * verwandelt einen String in eine gültige Url, konvertiert Sonderzeichen
509          * und Spaces werden zu Underscores
510          *
511          * @return gültige Url
512          */
513         public static String convert2url(String s) {
514                 s = toLowerCase(s);
515                 StringBuffer buf = new StringBuffer();
516                 for(int i = 0; i < s.length(); i++ ) {
517                                 switch( s.charAt( i ) ) {
518                                 case 'ö':
519                         buf.append( "oe" ); break;
520                                 case 'ä':
521                         buf.append( "ae" ); break;
522                                 case 'ü':
523                         buf.append( "ue" ); break;
524                                 case 'ã':
525                         buf.append( "a" ); break;
526                                 case '´':
527                                 case '.':
528                         buf.append( "_" ); break;
529                                 case ' ':
530                         if( buf.charAt( buf.length() - 1 ) != '_' ) {
531                                         buf.append( "_" );
532                         }
533                         break;
534                                 default:
535                         buf.append( s.charAt( i ) );
536                                 }
537                 }
538                 return buf.toString();
539         }
540
541         /**
542          * wandelt Sonderzeichen in Quotes um
543          *
544          * @return Kovertierter String
545          */
546         public static String encodeHtml(String s) {
547                 StringBuffer buf = new StringBuffer();
548                 for(int i=0;i < s.length(); i++ ) {
549
550                         /** @todo looks inefficient, to ask for index of every char, in
551                          *  case of failure it runs to the end.*/
552                         if (s.charAt(i)=='&') {
553                                 // convert html to xml-parsable representation
554                                 if( s.indexOf( "&ouml;", i ) == i ) {
555                                         buf.append( "&#246;" ); i += 5;
556                                         continue;
557                                 }
558                                 if( s.indexOf( "&auml;", i ) == i ) {
559                                         buf.append( "&#228;" ); i += 5;
560                                         continue;
561                                 }
562                                 if( s.indexOf( "&uuml;", i ) == i ) {
563                                         buf.append( "&#252;" ); i += 5;
564                                         continue;
565                                 }
566                                 if( s.indexOf( "&Ouml;", i ) == i ) {
567                                         buf.append( "&#214;" ); i += 5;
568                                         continue;
569                                 }
570                                 if( s.indexOf( "&Auml;", i ) == i ) {
571                                         buf.append( "&#196;" ); i += 5;
572                                         continue;
573                                 }
574                                 if( s.indexOf( "&Uuml;", i ) == i ) {
575                                         buf.append( "&#220;" ); i += 5;
576                                         continue;
577                                 }
578                                 if( s.indexOf( "&szlig;", i ) == i ) {
579                                         buf.append( "&#223;" ); i += 6;
580                                         continue;
581                                 }
582
583                                 /** @todo should only escape outside of tags */
584
585                                 if( s.indexOf( "&quot;", i ) == i ) {
586                                         buf.append( "&#223;" ); i += 5;
587                                         continue;
588                                 }
589                                 if( s.indexOf( "&ndash;", i ) == i ) {
590                                         buf.append( "&#8211;" ); i += 6;
591                                         continue;
592                                 }
593                                 if( s.indexOf( "&mdash;", i ) == i ) {
594                                         buf.append( "&#8212;" ); i += 6;
595                                         continue;
596                                 }
597                                 if( s.indexOf( "&ldquo;", i ) == i ) {
598                                         buf.append( "&#8220;" ); i += 6;
599                                         continue;
600                                 }
601                                 if( s.indexOf( "&rdquo;", i ) == i ) {
602                                         buf.append( "&#8221;" ); i += 6;
603                                         continue;
604                                 }
605                                 if( s.indexOf( "&bdquo;", i ) == i ) {
606                                         buf.append( "&#8222;" ); i += 6;
607                                         continue;
608                                 }
609
610                                 //looks pretty stupid
611                                 if( s.indexOf( "&lt;", i ) == i ) {
612                                         buf.append( "&lt;" ); i += 3;
613                                         continue;
614                                 }
615                                 if( s.indexOf( "&gt;", i ) == i ) {
616                                         buf.append( "&gt;" ); i += 3;
617                                         continue;
618                                 }
619                                 if( s.indexOf( "&acute;", i ) == i ) {
620                                         buf.append( "&acute;" ); i += 6;
621                                         continue;
622                                 }
623                                 if( s.indexOf( "&nbsp;", i ) == i ) {
624                                         buf.append( "&nbsp;" ); i += 5;
625                                         continue;
626                                 }
627                                 //has to be the last
628                                 //if( s.indexOf( "&", i ) == i ) {
629                                 //  buf.append( "&#38;" ); i += 0;
630                                 //  continue;
631                                 //}
632                         }
633                         // convert umlauts an other special charakters
634                         switch( s.charAt(i) ) {
635                                 case 'ö': buf.append( "&#246;" ); break;
636                                 case 'ä': buf.append( "&#228;" ); break;
637                                 case 'ü': buf.append( "&#252;" ); break;
638                                 case 'Ö': buf.append( "&#214;" ); break;
639                                 case 'Ä': buf.append( "&#196;" ); break;
640                                 case 'Ü': buf.append( "&#220;" ); break;
641                                 case 'ß': buf.append( "&#223;" ); break;
642                                 case 'é': buf.append( "&#233;" ); break;
643                                 case 'è': buf.append( "&#232;" ); break;
644                                 case 'á': buf.append( "&#225;" ); break;
645                                 case 'à': buf.append( "&#224;" ); break;
646                                 case 'â': buf.append( "&#226;" ); break;
647                                 case 'ã': buf.append( "&#227;" ); break;
648                                 case '¬': buf.append( "&#172;" ); break;
649                                 case '¹': buf.append( "&#185;" ); break;
650                                 case '²': buf.append( "&#178;" ); break;
651                                 case '³': buf.append( "&#179;" ); break;
652                                 case '¼': buf.append( "&#188;" ); break;
653                                 case '½': buf.append( "&#189;" ); break;
654                                 case '¾': buf.append( "&#190;" ); break;
655                                 case '¶': buf.append( "&#182;" ); break;
656                                 case 'æ': buf.append( "&#230;" ); break;
657                                 case 'ð': buf.append( "&#240;" ); break;
658                                 case '|': buf.append( "&#166;" ); break;
659                                 case '·': buf.append( "&#183;" ); break;
660                                 case '°': buf.append( "&#176;" ); break;
661                                 case '§': buf.append( "&#167;" ); break;
662                                 case 'ø': buf.append( "&#248;" ); break;
663                                 case 'ç': buf.append( "&#231;" ); break;
664                                 case '¤': buf.append( "&#164;" ); break;
665                                 case 'ª': buf.append( "&#170;" ); break;
666                                 case 'Ç': buf.append( "&#199;" ); break;
667                                 case 'Ã': buf.append( "&#195;" ); break;
668                                 case 'Â': buf.append( "&#194;" ); break;
669                                 case 'Æ': buf.append( "&#198;" ); break;
670                                 case '©': buf.append( "&#169;" ); break;
671                                 case '®': buf.append( "&#174;" ); break;
672                                 case '¥': buf.append( "&#165;" ); break;
673                                 case 'Þ': buf.append( "&#254;" ); break;
674                                 case '¯': buf.append( "&#175;" ); break;
675                                 case 'Ð': buf.append( "&#208;" ); break;
676                                 case 'º': buf.append( "&#186;" ); break;
677                                 case '¡': buf.append( "&#161;" ); break;
678                                 case '£': buf.append( "&#163;" ); break;
679                                 case '±': buf.append( "&#177;" ); break;
680                                 case '¿': buf.append( "&#191;" ); break;
681                                 case 'Ø': buf.append( "&#216;" ); break;
682                                 case 'Á': buf.append( "&#192;" ); break;
683                                 case 'À': buf.append( "&#193;" ); break;
684                                 case 'É': buf.append( "&#200;" ); break;
685                                 case 'È': buf.append( "&#201;" ); break;
686                                 case 'ù': buf.append( "&#250;" ); break;
687                                 case 'ñ': buf.append( "&#241;" ); break;
688                                 case 'Ñ': buf.append( "&#209;" ); break;
689                                 case 'µ': buf.append( "&#181;" ); break;
690                                 case 'Í': buf.append( "&#204;" ); break;
691                                 case 'Ì': buf.append( "&#205;" ); break;
692                                 case 'í': buf.append( "&#236;" ); break;
693                                 case 'ì': buf.append( "&#237;" ); break;
694                                 case 'î': buf.append( "&#238;" ); break;
695                                 case 'Î': buf.append( "&#206;" ); break;
696                                 case 'ó': buf.append( "&#243;" ); break;
697                                 case 'Ó': buf.append( "&#210;" ); break;
698                                 case 'ò': buf.append( "&#206;" ); break;
699                                 case 'Ò': buf.append( "&#211;" ); break;
700                                 case 'ô': buf.append( "&#244;" ); break;
701                                 case 'Ô': buf.append( "&#212;" ); break;
702                                 case 'õ': buf.append( "&#245;" ); break;
703                                 case 'Õ': buf.append( "&#213;" ); break;
704                                 case 'ý': buf.append( "&#253;" ); break;
705                                 case 'Ý': buf.append( "&#221;" ); break;
706                                 case 'û': buf.append( "&#251;" ); break;
707                                 case 'Û': buf.append( "&#219;" ); break;
708                                 case 'ú': buf.append( "&#249;" ); break;
709                                 case 'Ú': buf.append( "&#217;" ); break;
710                                 case 'Ù': buf.append( "&#218;" ); break;
711                                 case 'Ê': buf.append( "&#202;" ); break;
712                                 case 'ê': buf.append( "&#234;" ); break;
713                                 case 'å': buf.append( "&#229;" ); break;
714                                 case 'Å': buf.append( "&#197;" ); break;
715                                 case 'ë': buf.append( "&#235;" ); break;
716                                 case 'Ë': buf.append( "&#203;" ); break;
717                                 case 'ÿ': buf.append( "&#255;" ); break;
718                                 case 'ï': buf.append( "&#239;" ); break;
719                                 case 'Ï': buf.append( "&#207;" ); break;
720                                 case '«': buf.append( "&#171;" ); break;
721                                 case '»': buf.append( "&#187;" ); break;
722                                 case '\'': buf.append( "&acute;" ); break;
723                                 case '\"': buf.append( "&quot;" ); break;
724                                 //case '\u8211': buf.append( "&#8211;" ); break;
725                                 //case '\u8212': buf.append( "&#8212;" ); break;
726                                 //case '\u8220': buf.append( "&#8220;" ); break;
727                                 //case '\u8221': buf.append( "&#8221;" ); break;
728                                 //case '\u8222': buf.append( "&#8222;" ); break;
729                                 //case '\"': buf.append( "&#34;" ); break;
730                                 default: buf.append( s.charAt(i) );
731                         }
732
733                 }
734                 return buf.toString();
735         }
736
737
738         public static String decodeHTMLinTags(String s){
739                 StringBuffer buffer = new StringBuffer();
740                 boolean start = false;
741                 boolean stop = false;
742                 int startIndex = 0;
743                 int stopIndex = 0;
744                 int temp = 0;
745
746                 for(int i=0;i<s.length();i++){
747                         if(s.charAt(i)=='<'){
748                                 start = true;
749                                 startIndex = i;
750                         } else if(s.charAt(i)=='>'){
751                                 stop = true;
752                                 stopIndex = i;
753
754                                 if(start && stop){
755                                         buffer.append(s.substring(temp,startIndex));
756                                         buffer.append(replaceQuot(s.substring(startIndex,stopIndex+1)));
757                                         i= temp= stopIndex+1;
758                                         start= stop= false;
759                                 }
760                         }
761                 }
762                 if(stopIndex>0){
763                         buffer.append(s.substring(stopIndex+1));
764                         return buffer.toString();
765                 } else {
766                         return s;
767                 }
768         }
769
770         public static String replaceQuot(String s) {
771                 StringBuffer buffer = new StringBuffer();
772                 for(int j = 0; j < s.length();j++){
773                         if(s.charAt(j)=='&'){
774                                 if(s.indexOf( "&quot;",j) == j) {
775                                         buffer.append( "\"" );
776                                         j += 5;
777                                 }//if
778                         } else {
779                                 buffer.append(s.charAt(j));
780                         }//else
781                 }//for
782                 return buffer.toString();
783         }
784
785         /** wandelt Quotes in Sonderzeichen um
786          */
787         /**
788         public static String decodeHtml(String s) {
789                 StringBuffer buf = new StringBuffer();
790                 for(int i=0;i < s.length(); i++ ) {
791                         if( s.indexOf( "&ouml;", i ) == i ) {
792                                 buf.append( "ö" ); i += 5;
793                                 continue;
794                         }
795                         if( s.indexOf( "&auml;", i ) == i ) {
796                                 buf.append( "ä" ); i += 5;
797                                 continue;
798                         }
799                         if( s.indexOf( "&uuml;", i ) == i ) {
800                                 buf.append( "ü" ); i += 5;
801                                 continue;
802                         }
803                         if( s.indexOf( "&Ouml;", i ) == i ) {
804                                 buf.append( "Ö" ); i += 5;
805                                 continue;
806                         }
807                         if( s.indexOf( "&Auml;", i ) == i ) {
808                                 buf.append( "Ä" ); i += 5;
809                                 continue;
810                         }
811                         if( s.indexOf( "&Uuml;", i ) == i ) {
812                                 buf.append( "Ü" ); i += 5;
813                                 continue;
814                         }
815                         if( s.indexOf( "&szlig;", i ) == i ) {
816                                 buf.append( "ß" ); i += 6;
817                                 continue;
818                         }
819                         if( s.indexOf( "&quot;", i ) == i ) {
820                                 buf.append( "\"" ); i += 5;
821                                 continue;
822                         }
823                         buf.append( s.charAt(i) );
824                 }
825                 return buf.toString();
826         }
827          */
828
829         /**
830          * schnellere Variante der String.toLowerCase()-Routine
831          *
832          * @return String in Kleinbuchsten
833          */
834         public static String toLowerCase(String s) {
835                 int l = s.length();
836                 char[] a = new char[l];
837                 for (int i = 0; i < l; i++)
838                         a[i] = Character.toLowerCase(s.charAt(i));
839                 return new String(a);
840         }
841
842                 /**
843          * Findet <code>element</code> im String-Array <code>array</code>
844          * @param array
845          * @param element
846          * @return Fundstelle als int oder -1
847          */
848         public static int indexOf(String[] array, String element) {
849                 if (array != null)
850                         for (int i = 0; i < array.length; i++)
851                                 if (array[i].equals(element))
852                                         return i;
853                 return -1;
854         }
855
856         /**
857          * Testet auf Vorkommen von <code>element</code> in <code>array</code>
858          * @param array String-Array
859          * @param element
860          * @return true wenn <code>element</code> vorkommt, sonst false
861          */
862         public static boolean contains(String[] array, String element) {
863                 return indexOf(array, element) >= 0;
864         }
865
866                 /**
867          * Ermittelt CRC-Prüfsumme von String <code>s</code>
868          * @param s
869          * @return CRC-Prüfsumme
870          */
871         public static int getCRC(String s) {
872                 int h = 0;
873                 char val[] = s.toCharArray();
874                 int len = val.length;
875
876                 for (int i = 0 ; i < len; i++) {
877                         h &= 0x7fffffff;
878                         h = (((h >> 30) | (h << 1)) ^ (val[i]+i));
879                 }
880
881                 return (h << 8) | (len & 0xff);
882         }
883
884                 /**
885          * Liefert Default-Wert def zurück, wenn String <code>s</code>
886          * kein Integer ist.
887          *
888          * @param s
889          * @param def
890          * @return geparster int aus s oder def
891          */
892         public static int parseInt(String s, int def) {
893                 if (s == null) return def;
894                 try {
895                         return Integer.parseInt(s);
896                 } catch (NumberFormatException e) {
897                         return def;
898                 }
899         }
900
901         /**
902          * Liefert Defaultwert def zurück, wenn s nicht zu einem float geparsed werden kann.
903          * @param s
904          * @param def
905          * @return geparster float oder def
906          */
907         public static float parseFloat(String s, float def) {
908                 if (s == null) return def;
909                 try {
910                         return new Float(s).floatValue();
911                 } catch (NumberFormatException e) {
912                         return def;
913                 }
914         }
915
916                 /**
917          * Findet Ende eines Satzes in String <code>text</code>
918          * @param text
919          * @param startIndex
920          * @return index des Satzendes, oder -1
921          */
922         public static int findEndOfSentence(String text, int startIndex) {
923                  while (true) {
924                          int i = text.indexOf('.', startIndex);
925                          if (i < 0) return -1;
926                          if (i > 0 && !Character.isDigit(text.charAt(i-1)) &&
927                                         (i+1 >= text.length()
928                                         || text.charAt(i+1) == ' '
929                                         || text.charAt(i+1) == '\n'
930                                         || text.charAt(i+1) == '\t'))
931                                         return i+1;
932                          startIndex = i+1;
933                  }
934         }
935
936                 /**
937          * Findet Wortende in String <code>text</code> ab <code>startIndex</code>
938          * @param text
939          * @param startIndex
940          * @return Index des Wortendes, oder -1
941          */
942         public static int findEndOfWord(String text, int startIndex) {
943                 int i = text.indexOf(' ', startIndex),
944                         j = text.indexOf('\n', startIndex);
945                 if (i < 0) i = text.length();
946                 if (j < 0) j = text.length();
947                 return Math.min(i, j);
948         }
949
950
951         /**
952          *  convertNewline2P ist eine regex-routine zum umwandeln von 2 oder mehr newlines (\n)
953          *  in den html-tag <p>
954          *  nur sinnvoll, wenn text nicht im html-format eingegeben
955          */
956         public static String convertNewline2P(String haystack) {
957                         return re_brbr2p.substituteAll(haystack,"\n</p><p>");
958         }
959
960         /**
961          *  convertNewline2Break ist eine regex-routine zum umwandeln von 1 newline (\n)
962          *  in den html-tag <br>
963          *  nur sinnvoll, wenn text nicht im html-format eingegeben
964          */
965         public static String convertNewline2Break(String haystack) {
966                 return re_newline2br.substituteAll(haystack,"$0<br />");
967         }
968
969         /**
970          *  createMailLinks wandelt text im email-adressenformat
971          *  in einen klickbaren link um
972          *  nur sinnvoll, wenn text nicht im html-format eingegeben
973          */
974         public static String createMailLinks(String haystack) {
975                         return re_mail.substituteAll(haystack,"<a href=\"mailto:$0\">$0</a>");
976         }
977
978
979         /**
980          *  createMailLinks wandelt text im email-adressenformat
981          *  in einen klickbaren link um
982          *  nur sinnvoll, wenn text nicht im html-format eingegeben
983          */
984         public static String createMailLinks(String haystack, String imageRoot, String mailImage) {
985                 return re_mail.substituteAll(haystack,"<img src=\""+imageRoot+"/"+mailImage+"\" border=\"0\"/>&#160;<a href=\"mailto:$0\">$0</a>");
986         }
987
988
989         /**
990          *  createURLLinks wandelt text im url-format
991          *  in einen klickbaren link um
992          *  nur sinnvoll, wenn text nicht im html-format eingegeben
993          */
994         public static String createURLLinks(String haystack) {
995                 return re_url.substituteAll(haystack,"<a href=\"$0\">$0</a>");
996         }
997
998         /**
999          * this routine takes text in url format and makes
1000          * a clickaeble "<href>" link removing any "illegal" html tags
1001          * @param haystack, the url
1002          * @param title, the href link text
1003          * @param imagRoot, the place to find icons
1004          * @param extImage, the url of the icon to show next to the link
1005          * @return a String containing the url
1006          */
1007         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage) {
1008                 if (title == null) {
1009                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">$0</a>");
1010                 } else {
1011                         title = removeHTMLTags(title);
1012                         return re_url.substituteAll(haystack,"<img src=\""+imageRoot+"/"+extImage+"\" border=\"0\"/>&#160;<a href=\"$0\">"+title+"</a>");
1013                 }
1014         }
1015
1016         /**
1017          * this routine takes text in url format and makes
1018          * a clickaeble "<href>" link removing any "illegal" html tags
1019          * @param haystack, the url
1020          * @param imageRoot, the place to find icons
1021          * @param extImage, the url of the icon to show next to the link
1022          * @param intImage, unused
1023          * @return a String containing the url
1024          */
1025         public static String createURLLinks(String haystack, String title, String imageRoot,String extImage,String intImage) {
1026                 return createURLLinks(haystack, title, imageRoot, extImage);
1027         }
1028
1029          /**
1030          *  deleteForbiddenTags
1031          *  this method deletes all <script>, <body> and <head>-tags
1032          */
1033         public static final String deleteForbiddenTags(String haystack) {
1034                 try {
1035                         RE regex = new RE("<[ \t\r\n](.*?)script(.*?)/script(.*?)>",RE.REG_ICASE);
1036                         haystack = regex.substituteAll(haystack,"");
1037                         regex = new RE("<head>(.*?)</head>");
1038                         haystack = regex.substituteAll(haystack,"");
1039                         regex = new RE("<[ \t\r\n/]*body(.*?)>");
1040                         haystack = regex.substituteAll(haystack,"");
1041                         return haystack;
1042                 } catch(REException ex){
1043                         return null;
1044                 }
1045         }
1046
1047         /**
1048          * this method deletes all html tags
1049          */
1050         public static final String removeHTMLTags(String haystack){
1051                         return re_tags.substituteAll(haystack,"");
1052         }
1053
1054
1055         /**
1056          * this method deletes all but the approved tags html tags
1057          * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
1058          */
1059         public static String approveHTMLTags(String haystack){
1060                 try {
1061                         String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
1062                         String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
1063                         String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
1064
1065                         // kill all the bad tags that have attributes
1066                         String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
1067                         RE regex = new RE(s,RE.REG_ICASE);
1068                         haystack = regex.substituteAll(haystack,"");
1069
1070                         // kill all the bad tags that are attributeless
1071                         regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
1072                         haystack = regex.substituteAll(haystack,"");
1073
1074                         // kill all the tags which have a javascript attribute like onLoad
1075                         regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
1076                         haystack = regex.substituteAll(haystack,"");
1077
1078                         // kill all the tags which include a url to an unacceptable protocol
1079                         regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
1080                         haystack = regex.substituteAll(haystack,"");
1081
1082                         return haystack;
1083                 } catch(REException ex){
1084                         ex.printStackTrace();
1085                         return null;
1086                 }
1087         }
1088
1089
1090         /**
1091          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1092          *  htmlcodierten string auf und returnt einen htmlcodierten String
1093          */
1094         public static String createHTML(String content){
1095                 content=convertNewline2Break(content);
1096                 content=convertNewline2P(content);
1097                 content=createMailLinks(content);
1098                 content=createURLLinks(content);
1099                 return content;
1100         }
1101
1102
1103         /**
1104          *  createHTML ruft alle regex-methoden zum unwandeln eines nicht
1105          *  htmlcodierten string auf und returnt einen htmlcodierten String
1106          */
1107         public static String createHTML(String content,String producerDocRoot,String mailImage,String extImage,String intImage){
1108                 content=convertNewline2Break(content);
1109                 content=convertNewline2P(content);
1110                 content=createMailLinks(content,producerDocRoot,mailImage);
1111                 content=createURLLinks(content,null,producerDocRoot,extImage,intImage);
1112                 return content;
1113         }
1114
1115 }
1116