2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
33 import java.lang.reflect.InvocationTargetException;
34 import java.lang.reflect.Method;
35 import java.net.URLEncoder;
36 import java.util.HashMap;
39 public class HTMLRoutines {
40 private HTMLRoutines() {
43 private static Method encodeURLMethod;
46 encodeURLMethod = URLEncoder.class.getMethod("encode", new Class[] {String.class});
48 catch (NoSuchMethodException e) {
49 throw new RuntimeException(e.getMessage());
54 * Encodes a URL: escapes reserved URL characters like &, = into % escape
57 public static String encodeURL(String aString) {
59 return (String) encodeURLMethod.invoke(URLEncoder.class, new Object[] {aString});
61 catch (IllegalAccessException e) {
62 throw new RuntimeException(e.getMessage());
64 catch (InvocationTargetException e) {
65 throw new RuntimeException(e.getMessage());
69 public static String encodeURL(String aString, String anEncoding) {
70 return encodeURL(aString);
73 private static final char[] CHARACTERS_TO_ESCAPE = { '&', '<', '>', '"' };
74 private static final String[] ESCAPE_CODES = { "&", "<", ">", """ };
76 public static String encodeHTML(String aText) {
77 return StringRoutines.replaceStringCharacters(aText, CHARACTERS_TO_ESCAPE, ESCAPE_CODES);
80 public static String prettyEncodeHTML(String aText) throws UtilExc {
81 return StringRoutines.performRegularExpressionReplacement(
82 encodeHTML(aText), "\\n", "<br>\n");
85 public static String encodeXML(String aText) {
86 //#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
87 final char[] CHARACTERS_TO_ESCAPE = { '&', '<', '>', '"', '\'',
88 '\u0000', '\u0001', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0000', '\u000B',
89 '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016',
90 '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E' };
91 final String[] ESCAPE_CODES = { "&", "<", ">", """, "'",
92 "", "", "", "", "", "", "", "", "", "",
93 "", "", "", "", "", "", "", "", "", "",
94 "", "", "", "", "", "", "", ""};
96 return StringRoutines.replaceStringCharacters(aText, CHARACTERS_TO_ESCAPE, ESCAPE_CODES);
99 private static final Map htmlEntities = new HashMap();
101 private static final String[] HTML_ENTITY_TABLE = {
102 "nbsp", "\u00a0", "iexcl", "\u00a1", "cent", "\u00a2", "pound", "\u00a3", "curren", "\u00a4", "yen", "\u00a5",
103 "brvbar", "\u00a6", "sect", "\u00a7", "uml", "\u00a8", "copy", "\u00a9", "ordf", "\u00aa", "laquo", "\u00ab",
104 "not", "\u00ac", "shy", "\u00ad", "reg", "\u00ae", "macr", "\u00af", "deg", "\u00b0", "plusmn", "\u00b1",
105 "sup2", "\u00b2", "sup3", "\u00b3", "acute", "\u00b4", "micro", "\u00b5", "para", "\u00b6", "middot", "\u00b7",
106 "cedil", "\u00b8", "sup1", "\u00b9", "ordm", "\u00ba", "raquo", "\u00bb", "frac14", "\u00bc", "frac12", "\u00bd",
107 "frac34", "\u00be", "iquest", "\u00bf", "Agrave", "\u00c0", "Aacute", "\u00c1", "Acirc", "\u00c2", "Atilde", "\u00c3",
108 "Auml", "\u00c4", "Aring", "\u00c5", "AElig", "\u00c6", "Ccedil", "\u00c7", "Egrave", "\u00c8", "Eacute", "\u00c9",
109 "Ecirc", "\u00ca", "Euml", "\u00cb", "Igrave", "\u00cc", "Iacute", "\u00cd", "Icirc", "\u00ce", "Iuml", "\u00cf",
110 "ETH", "\u00d0", "Ntilde", "\u00d1", "Ograve", "\u00d2", "Oacute", "\u00d3", "Ocirc", "\u00d4", "Otilde", "\u00d5",
111 "Ouml", "\u00d6", "times", "\u00d7", "Oslash", "\u00d8", "Ugrave", "\u00d9", "Uacute", "\u00da", "Ucirc", "\u00db",
112 "Uuml", "\u00dc", "Yacute", "\u00dd", "THORN", "\u00de", "szlig", "\u00df", "agrave", "\u00e0", "aacute", "\u00e1",
113 "acirc", "\u00e2", "atilde", "\u00e3", "auml", "\u00e4", "aring", "\u00e5", "aelig", "\u00e6", "ccedil", "\u00e7",
114 "egrave", "\u00e8", "eacute", "\u00e9", "ecirc", "\u00ea", "euml", "\u00eb", "igrave", "\u00ec", "iacute", "\u00ed",
115 "icirc", "\u00ee", "iuml", "\u00ef", "eth", "\u00f0", "ntilde", "\u00f1", "ograve", "\u00f2", "oacute", "\u00f3",
116 "ocirc", "\u00f4", "otilde", "\u00f5", "ouml", "\u00f6", "divide", "\u00f7", "oslash", "\u00f8", "ugrave", "\u00f9",
117 "uacute", "\u00fa", "ucirc", "\u00fb", "uuml", "\u00fc", "yacute", "\u00fd", "thorn", "\u00fe", "yuml", "\u00ff",
118 "fnof", "\u0192", "Alpha", "\u0391", "Beta", "\u0392", "Gamma", "\u0393", "Delta", "\u0394", "Epsilon", "\u0395",
119 "Zeta", "\u0396", "Eta", "\u0397", "Theta", "\u0398", "Iota", "\u0399", "Kappa", "\u039a", "Lambda", "\u039b",
120 "Mu", "\u039c", "Nu", "\u039d", "Xi", "\u039e", "Omicron", "\u039f", "Pi", "\u03a0", "Rho", "\u03a1",
121 "Sigma", "\u03a3", "Tau", "\u03a4", "Upsilon", "\u03a5", "Phi", "\u03a6", "Chi", "\u03a7", "Psi", "\u03a8",
122 "Omega", "\u03a9", "alpha", "\u03b1", "beta", "\u03b2", "gamma", "\u03b3", "delta", "\u03b4", "epsilon", "\u03b5",
123 "zeta", "\u03b6", "eta", "\u03b7", "theta", "\u03b8", "iota", "\u03b9", "kappa", "\u03ba", "lambda", "\u03bb",
124 "mu", "\u03bc", "nu", "\u03bd", "xi", "\u03be", "omicron", "\u03bf", "pi", "\u03c0", "rho", "\u03c1",
125 "sigmaf", "\u03c2", "sigma", "\u03c3", "tau", "\u03c4", "upsilon", "\u03c5", "phi", "\u03c6", "chi", "\u03c7",
126 "psi", "\u03c8", "omega", "\u03c9", "thetasym","\u03d1", "upsih", "\u03d2", "piv", "\u03d6", "bull", "\u2022",
127 "hellip", "\u2026", "prime", "\u2032", "Prime", "\u2033", "oline", "\u203e", "frasl", "\u2044", "weierp", "\u2118",
128 "image", "\u2111", "real", "\u211c", "trade", "\u2122", "alefsym", "\u2135", "larr", "\u2190", "uarr", "\u2191",
129 "rarr", "\u2192", "darr", "\u2193", "harr", "\u2194", "crarr", "\u21b5", "lArr", "\u21d0", "uArr", "\u21d1",
130 "rArr", "\u21d2", "dArr", "\u21d3", "hArr", "\u21d4", "forall", "\u2200", "part", "\u2202", "exist", "\u2203",
131 "empty", "\u2205", "nabla", "\u2207", "isin", "\u2208", "notin", "\u2209", "ni", "\u220b", "prod", "\u220f",
132 "sum", "\u2211", "minus", "\u2212", "lowast", "\u2217", "radic", "\u221a", "prop", "\u221d", "infin", "\u221e",
133 "ang", "\u2220", "and", "\u2227", "or", "\u2228", "cap", "\u2229", "cup", "\u222a", "int", "\u222b",
134 "there4", "\u2234", "sim", "\u223c", "cong", "\u2245", "asymp", "\u2248", "ne", "\u2260", "equiv", "\u2261",
135 "le", "\u2264", "ge", "\u2265", "sub", "\u2282", "sup", "\u2283", "nsub", "\u2284", "sube", "\u2286",
136 "supe", "\u2287", "oplus", "\u2295", "otimes", "\u2297", "perp", "\u22a5", "sdot", "\u22c5", "lceil", "\u2308",
137 "rceil", "\u2309", "lfloor", "\u230a", "rfloor", "\u230b", "lang", "\u2329", "rang", "\u232a", "loz", "\u25ca",
138 "spades", "\u2660", "clubs", "\u2663", "hearts", "\u2665", "diams", "\u2666", "quot", "\"", "amp", "\u0026",
139 "lt", "\u003c", "gt", "\u003e", "OElig", "\u0152", "oelig", "\u0153", "Scaron", "\u0160", "scaron", "\u0161",
140 "Yuml", "\u0178", "circ", "\u02c6", "tilde", "\u02dc", "ensp", "\u2002", "emsp", "\u2003", "thinsp", "\u2009",
141 "zwnj", "\u200c", "zwj", "\u200d", "lrm", "\u200e", "rlm", "\u200f", "ndash", "\u2013", "mdash", "\u2014",
142 "lsquo", "\u2018", "rsquo", "\u2019", "sbquo", "\u201a", "ldquo", "\u201c", "rdquo", "\u201d", "bdquo", "\u201e",
143 "dagger", "\u2020", "Dagger", "\u2021", "permil", "\u2030", "lsaquo", "\u2039", "rsaquo", "\u203a", "euro", "\u20ac"
147 for (int i=0; i+1<HTML_ENTITY_TABLE.length; i+=2) {
148 htmlEntities.put(HTML_ENTITY_TABLE[i], HTML_ENTITY_TABLE[i+1]);
153 * Resolves an html entity if possible, returns the unresolved entity otherwise
156 * &#<decimal number>;
157 * &#x<hexadecimal number>;
160 public static String resolveHTMLEntity(String anEntity) {
161 if (anEntity.length()<3 || anEntity.length()>10 ||
162 anEntity.charAt(0)!='&' ||
163 anEntity.charAt(anEntity.length()-1)!=';')
166 if (anEntity.charAt(1)=='#') {
170 if (anEntity.charAt(2)=='x') {
171 number = Integer.parseInt(anEntity.substring(3,anEntity.length()-1), 16);
174 number = Integer.parseInt(anEntity.substring(2,anEntity.length()-1), 10);
177 if (number>=Character.MIN_VALUE && number<=Character.MAX_VALUE &&
178 Character.isDefined((char) number)) {
179 return new String(new char[]{(char) number});
182 catch (NumberFormatException e) {
186 String name = anEntity.substring(1,anEntity.length()-1);
188 String result = (String) htmlEntities.get(name);
198 * Resolve all HTML entities (&....;) in a text
200 public static String resolveHTMLEntites(String aText) {
201 StringBuffer result = new StringBuffer();
207 position = aText.indexOf("&", oldPosition);
209 position = aText.length();
211 result.append(aText.substring(oldPosition,position));
213 if (position<aText.length()) {
214 int position2 = aText.indexOf(";", position);
216 if (position2>position+1) {
217 result.append(resolveHTMLEntity(aText.substring(position, position2+1)));
218 oldPosition=position2+1;
221 result.append(aText.charAt(position));
222 oldPosition = position+1;
225 } while (position<aText.length());
227 return result.toString();