X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmir%2Futil%2FHTMLRoutines.java;h=dd4882fb165a69cdadb4090f8a4f33ee3a289f23;hb=b7ea95152eaddbf069564a5f2f117774165d36e6;hp=076f4cef4e1a36994ed68f5b188f58620e0c1df1;hpb=97a33333e6920e0b2578e506475f04c6d1d45a38;p=mir.git diff --git a/source/mir/util/HTMLRoutines.java b/source/mir/util/HTMLRoutines.java index 076f4cef..dd4882fb 100755 --- a/source/mir/util/HTMLRoutines.java +++ b/source/mir/util/HTMLRoutines.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 The Mir-coders group + * Copyright (C) 2001, 2002 The Mir-coders group * * This file is part of Mir. * @@ -18,55 +18,211 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * In addition, as a special exception, The Mir-coders gives permission to link - * the code of this program with the com.oreilly.servlet library, any library - * licensed under the Apache Software License, The Sun (tm) Java Advanced - * Imaging library (JAI), The Sun JIMI library (or with modified versions of - * the above that use the same license as the above), and distribute linked - * combinations including the two. You must obey the GNU General Public - * License in all respects for all of the code used other than the above - * mentioned libraries. If you modify this file, you may extend this exception - * to your version of the file, but you are not obligated to do so. If you do - * not wish to do so, delete this exception statement from your version. + * the code of this program with any library licensed under the Apache Software License, + * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library + * (or with modified versions of the above that use the same license as the above), + * and distribute linked combinations including the two. You must obey the + * GNU General Public License in all respects for all of the code used other than + * the above mentioned libraries. If you modify this file, you may extend this + * exception to your version of the file, but you are not obligated to do so. + * If you do not wish to do so, delete this exception statement from your version. */ package mir.util; -import java.net.*; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.net.URLEncoder; +import java.util.HashMap; +import java.util.Map; public class HTMLRoutines { + private HTMLRoutines() { + } + + private static Method encodeURLMethod; + static { + try { + encodeURLMethod = URLEncoder.class.getMethod("encode", new Class[] {String.class}); + } + catch (NoSuchMethodException e) { + throw new RuntimeException(e.getMessage()); + } + } + /** + * Encodes a URL: escapes reserved URL characters like &, = into % escape + * constructions. + */ public static String encodeURL(String aString) { - return URLEncoder.encode(aString); + try { + return (String) encodeURLMethod.invoke(URLEncoder.class, new Object[] {aString}); + } + catch (IllegalAccessException e) { + throw new RuntimeException(e.getMessage()); + } + catch (InvocationTargetException e) { + throw new RuntimeException(e.getMessage()); + } + } + + public static String encodeURL(String aString, String anEncoding) { + return encodeURL(aString); } + private static final char[] CHARACTERS_TO_ESCAPE = { '&', '<', '>', '"' }; + private static final String[] ESCAPE_CODES = { "&", "<", ">", """ }; + public static String encodeHTML(String aText) { - final char[] CHARACTERS_TO_ESCAPE = { '&', '<', '>', '"', '\'' }; - final String[] ESCAPE_CODES = { "&", "<", ">", """, "'" }; + return StringRoutines.replaceStringCharacters(aText, CHARACTERS_TO_ESCAPE, ESCAPE_CODES); + } + + public static String prettyEncodeHTML(String aText) throws UtilExc { + return StringRoutines.performRegularExpressionReplacement( + encodeHTML(aText), "\\n", "
\n"); + } + + public static String encodeXML(String aText) { + //#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + final char[] CHARACTERS_TO_ESCAPE = { '&', '<', '>', '"', '\'', + '\u0000', '\u0001', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0000', '\u000B', + '\u000C', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', + '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E' }; + final String[] ESCAPE_CODES = { "&", "<", ">", """, "'", + "", "", "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", ""}; + + return StringRoutines.replaceStringCharacters(aText, CHARACTERS_TO_ESCAPE, ESCAPE_CODES); + } + + private static final Map htmlEntities = new HashMap(); + + private static final String[] HTML_ENTITY_TABLE = { + "nbsp", "\u00a0", "iexcl", "\u00a1", "cent", "\u00a2", "pound", "\u00a3", "curren", "\u00a4", "yen", "\u00a5", + "brvbar", "\u00a6", "sect", "\u00a7", "uml", "\u00a8", "copy", "\u00a9", "ordf", "\u00aa", "laquo", "\u00ab", + "not", "\u00ac", "shy", "\u00ad", "reg", "\u00ae", "macr", "\u00af", "deg", "\u00b0", "plusmn", "\u00b1", + "sup2", "\u00b2", "sup3", "\u00b3", "acute", "\u00b4", "micro", "\u00b5", "para", "\u00b6", "middot", "\u00b7", + "cedil", "\u00b8", "sup1", "\u00b9", "ordm", "\u00ba", "raquo", "\u00bb", "frac14", "\u00bc", "frac12", "\u00bd", + "frac34", "\u00be", "iquest", "\u00bf", "Agrave", "\u00c0", "Aacute", "\u00c1", "Acirc", "\u00c2", "Atilde", "\u00c3", + "Auml", "\u00c4", "Aring", "\u00c5", "AElig", "\u00c6", "Ccedil", "\u00c7", "Egrave", "\u00c8", "Eacute", "\u00c9", + "Ecirc", "\u00ca", "Euml", "\u00cb", "Igrave", "\u00cc", "Iacute", "\u00cd", "Icirc", "\u00ce", "Iuml", "\u00cf", + "ETH", "\u00d0", "Ntilde", "\u00d1", "Ograve", "\u00d2", "Oacute", "\u00d3", "Ocirc", "\u00d4", "Otilde", "\u00d5", + "Ouml", "\u00d6", "times", "\u00d7", "Oslash", "\u00d8", "Ugrave", "\u00d9", "Uacute", "\u00da", "Ucirc", "\u00db", + "Uuml", "\u00dc", "Yacute", "\u00dd", "THORN", "\u00de", "szlig", "\u00df", "agrave", "\u00e0", "aacute", "\u00e1", + "acirc", "\u00e2", "atilde", "\u00e3", "auml", "\u00e4", "aring", "\u00e5", "aelig", "\u00e6", "ccedil", "\u00e7", + "egrave", "\u00e8", "eacute", "\u00e9", "ecirc", "\u00ea", "euml", "\u00eb", "igrave", "\u00ec", "iacute", "\u00ed", + "icirc", "\u00ee", "iuml", "\u00ef", "eth", "\u00f0", "ntilde", "\u00f1", "ograve", "\u00f2", "oacute", "\u00f3", + "ocirc", "\u00f4", "otilde", "\u00f5", "ouml", "\u00f6", "divide", "\u00f7", "oslash", "\u00f8", "ugrave", "\u00f9", + "uacute", "\u00fa", "ucirc", "\u00fb", "uuml", "\u00fc", "yacute", "\u00fd", "thorn", "\u00fe", "yuml", "\u00ff", + "fnof", "\u0192", "Alpha", "\u0391", "Beta", "\u0392", "Gamma", "\u0393", "Delta", "\u0394", "Epsilon", "\u0395", + "Zeta", "\u0396", "Eta", "\u0397", "Theta", "\u0398", "Iota", "\u0399", "Kappa", "\u039a", "Lambda", "\u039b", + "Mu", "\u039c", "Nu", "\u039d", "Xi", "\u039e", "Omicron", "\u039f", "Pi", "\u03a0", "Rho", "\u03a1", + "Sigma", "\u03a3", "Tau", "\u03a4", "Upsilon", "\u03a5", "Phi", "\u03a6", "Chi", "\u03a7", "Psi", "\u03a8", + "Omega", "\u03a9", "alpha", "\u03b1", "beta", "\u03b2", "gamma", "\u03b3", "delta", "\u03b4", "epsilon", "\u03b5", + "zeta", "\u03b6", "eta", "\u03b7", "theta", "\u03b8", "iota", "\u03b9", "kappa", "\u03ba", "lambda", "\u03bb", + "mu", "\u03bc", "nu", "\u03bd", "xi", "\u03be", "omicron", "\u03bf", "pi", "\u03c0", "rho", "\u03c1", + "sigmaf", "\u03c2", "sigma", "\u03c3", "tau", "\u03c4", "upsilon", "\u03c5", "phi", "\u03c6", "chi", "\u03c7", + "psi", "\u03c8", "omega", "\u03c9", "thetasym","\u03d1", "upsih", "\u03d2", "piv", "\u03d6", "bull", "\u2022", + "hellip", "\u2026", "prime", "\u2032", "Prime", "\u2033", "oline", "\u203e", "frasl", "\u2044", "weierp", "\u2118", + "image", "\u2111", "real", "\u211c", "trade", "\u2122", "alefsym", "\u2135", "larr", "\u2190", "uarr", "\u2191", + "rarr", "\u2192", "darr", "\u2193", "harr", "\u2194", "crarr", "\u21b5", "lArr", "\u21d0", "uArr", "\u21d1", + "rArr", "\u21d2", "dArr", "\u21d3", "hArr", "\u21d4", "forall", "\u2200", "part", "\u2202", "exist", "\u2203", + "empty", "\u2205", "nabla", "\u2207", "isin", "\u2208", "notin", "\u2209", "ni", "\u220b", "prod", "\u220f", + "sum", "\u2211", "minus", "\u2212", "lowast", "\u2217", "radic", "\u221a", "prop", "\u221d", "infin", "\u221e", + "ang", "\u2220", "and", "\u2227", "or", "\u2228", "cap", "\u2229", "cup", "\u222a", "int", "\u222b", + "there4", "\u2234", "sim", "\u223c", "cong", "\u2245", "asymp", "\u2248", "ne", "\u2260", "equiv", "\u2261", + "le", "\u2264", "ge", "\u2265", "sub", "\u2282", "sup", "\u2283", "nsub", "\u2284", "sube", "\u2286", + "supe", "\u2287", "oplus", "\u2295", "otimes", "\u2297", "perp", "\u22a5", "sdot", "\u22c5", "lceil", "\u2308", + "rceil", "\u2309", "lfloor", "\u230a", "rfloor", "\u230b", "lang", "\u2329", "rang", "\u232a", "loz", "\u25ca", + "spades", "\u2660", "clubs", "\u2663", "hearts", "\u2665", "diams", "\u2666", "quot", "\"", "amp", "\u0026", + "lt", "\u003c", "gt", "\u003e", "OElig", "\u0152", "oelig", "\u0153", "Scaron", "\u0160", "scaron", "\u0161", + "Yuml", "\u0178", "circ", "\u02c6", "tilde", "\u02dc", "ensp", "\u2002", "emsp", "\u2003", "thinsp", "\u2009", + "zwnj", "\u200c", "zwj", "\u200d", "lrm", "\u200e", "rlm", "\u200f", "ndash", "\u2013", "mdash", "\u2014", + "lsquo", "\u2018", "rsquo", "\u2019", "sbquo", "\u201a", "ldquo", "\u201c", "rdquo", "\u201d", "bdquo", "\u201e", + "dagger", "\u2020", "Dagger", "\u2021", "permil", "\u2030", "lsaquo", "\u2039", "rsaquo", "\u203a", "euro", "\u20ac" + }; - int position, nextPosition; - int i; + static { + for (int i=0; i+1; + * &#x; + * &; + */ + public static String resolveHTMLEntity(String anEntity) { + if (anEntity.length()<3 || anEntity.length()>10 || + anEntity.charAt(0)!='&' || + anEntity.charAt(anEntity.length()-1)!=';') + return anEntity; + + if (anEntity.charAt(1)=='#') { + try { + int number=-1; + + if (anEntity.charAt(2)=='x') { + number = Integer.parseInt(anEntity.substring(3,anEntity.length()-1), 16); + } + else { + number = Integer.parseInt(anEntity.substring(2,anEntity.length()-1), 10); + } + + if (number>=Character.MIN_VALUE && number<=Character.MAX_VALUE && + Character.isDefined((char) number)) { + return new String(new char[]{(char) number}); + } + } + catch (NumberFormatException e) { + } + } + else { + String name = anEntity.substring(1,anEntity.length()-1); + + String result = (String) htmlEntities.get(name); + + if (result!=null) + return result; + } + + return anEntity; + } + + /** + * Resolve all HTML entities (&....;) in a text + */ + public static String resolveHTMLEntites(String aText) { StringBuffer result = new StringBuffer(); - position=0; + int oldPosition = 0; + int position; do { - nextPosition = StringRoutines.indexOfCharacters(aText, CHARACTERS_TO_ESCAPE, position); + position = aText.indexOf("&", oldPosition); + if (position<0) + position = aText.length(); - if (nextPosition<0) - nextPosition = aText.length(); + result.append(aText.substring(oldPosition,position)); - result.append(aText.substring(position, nextPosition)); + if (positionposition+1) { + result.append(resolveHTMLEntity(aText.substring(position, position2+1))); + oldPosition=position2+1; } - position=nextPosition+1; - } - while (nextPosition