- /**
- * this method deletes all but the approved tags html tags
- * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
- */
- public static String approveHTMLTags(String haystack){
- try {
- String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
- String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
- String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
-
- // kill all the bad tags that have attributes
- String s = "<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>";
- RE regex = new RE(s,RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the bad tags that are attributeless
- regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which have a javascript attribute like onLoad
- regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- // kill all the tags which include a url to an unacceptable protocol
- regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
- haystack = regex.substituteAll(haystack,"");
-
- return haystack;
- } catch(REException ex){
- ex.printStackTrace();
- return null;
- }
- }
-