package mircoders.pdf;
import gnu.regexp.RE;
+import gnu.regexp.REMatch;
+import gnu.regexp.REMatchEnumeration;
import gnu.regexp.REException;
import java.io.ByteArrayOutputStream;
*/
EntityList images=DatabaseContentToMedia.getInstance().getImages(entityContent);
+ String isHTML = entityContent.getValue("is_html");
String theTitle = entityContent.getValue("title");
String theCreator = entityContent.getValue("creator");
String theDate = entityContent.getValue("webdb_create_formatted");
String theContent = "";
String theDescription = "";
- try {
- RE re1 = new RE("\r?\n\r?\n");
- String theContent1 = re1.substituteAll(theContentRaw,"BREAKHERE");
- String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+ if (isHTML.equals("1")){
- RE re2 = new RE("\r?\n");
- String theContent2 = re2.substituteAll(theContent1," ");
- String theDescription2 = re2.substituteAll(theDescription1," ");
- RE re3 = new RE("BREAKHERE");
- theContent = " " + re3.substituteAll(theContent2,"\n ");
- theDescription = re3.substituteAll(theDescription2,"\n ");
+
+ try {
+ RE nobackslashr = new RE("\r");
+ theContent= nobackslashr.substituteAll(theContentRaw,"");
+
+ RE HxTag = new RE("</?h[1-6][^>]*>",RE.REG_ICASE);
+ theContent = HxTag.substituteAll(theContent,"\n\n");
+
+ RE ListItemTag = new RE("<li[^>]*>",RE.REG_ICASE);
+ theContent = ListItemTag.substituteAll(theContent,"\n * ");
+ RE ListTag = new RE("<(u|o)l[^>]*>",RE.REG_ICASE);
+ theContent = ListTag.substituteAll(theContent,"\n");
+
+ RE DivTag = new RE("</?div[^>]*>",RE.REG_ICASE);
+ theContent= DivTag.substituteAll(theContent,"\n");
+
+ RE PTag = new RE("<(p|P)([:space:]+[^>]*)?>");
+ theContent= PTag.substituteAll(theContent,"\n ");
+
+ RE PTagClose = new RE("</(p|P)([:space:]+[^>]*)?>");
+ theContent= PTagClose.substituteAll(theContent,"\n");
+
+ RE BRTag = new RE("<(br|BR)([:space:]+[^>]*)?>");
+ theContent= BRTag.substituteAll(theContent,"\n");
+
+ RE ATagAll = new RE("<a[^>]*href=(?:\"|\')([^#\"\'][^\'\"]+)(?:\"|\')[^>]*>(.*?)</a>",RE.REG_ICASE);
+ REMatchEnumeration atags= ATagAll.getMatchEnumeration(theContent);
+ String theContentCopy=theContent;
+ while (atags.hasMoreMatches()){
+ REMatch atag = atags.nextMatch();
+ String atagString=atag.toString();
+ String atagStringHref=atag.toString(1);
+ String atagStringText=atag.toString(2);
+ int begin=theContentCopy.indexOf(atagString);
+ theContentCopy=theContentCopy.substring(0,begin) + atagStringText + " ["+ atagStringHref + "] " + theContentCopy.substring(begin+atagString.length());
+ }
+ theContent=theContentCopy;
+
+ RE noTags = new RE("<[^>]*>");
+ theContent= noTags.substituteAll(theContent," ");
+
+ theContent=mir.util.Translate.decode(theContent);
+
+ RE re1 = new RE("\r?\n\r?\n");
+ String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+
+ RE re2 = new RE("\r?\n");
+ String theDescription2 = re2.substituteAll(theDescription1," ");
+
+ RE re3 = new RE("BREAKHERE");
+ theDescription = re3.substituteAll(theDescription2,"\n ");
+
+
+ }
+ catch(REException ree){
+ logger.error(ree.getMessage());
+ }
}
- catch(REException ree){
- logger.error(ree.getMessage());
- }
+ else {
+ try {
+ RE re1 = new RE("\r?\n\r?\n");
+ String theContent1 = re1.substituteAll(theContentRaw,"BREAKHERE");
+ String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+
+ RE re2 = new RE("\r?\n");
+ String theContent2 = re2.substituteAll(theContent1," ");
+ String theDescription2 = re2.substituteAll(theDescription1," ");
+
+ RE re3 = new RE("BREAKHERE");
+ theContent = " " + re3.substituteAll(theContent2,"\n ");
+ theDescription = re3.substituteAll(theDescription2,"\n ");
+ }
+ catch(REException ree){
+ logger.error(ree.getMessage());
+ }
+ }
addArticleSeparator();