put some regular expressions in which turn

author john <john>

Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)

committer john <john>

Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)
author john <john>
Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)
committer john <john>
Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)
diff --git a/source/mircoders/pdf/PDFGenerator.java b/source/mircoders/pdf/PDFGenerator.java

index 2176f34..7679103 100755 (executable)
--- a/source/mircoders/pdf/PDFGenerator.java
+++ b/source/mircoders/pdf/PDFGenerator.java
@@ -30,6 +30,8 @@
  package mircoders.pdf;
  
  import gnu.regexp.RE;
+import gnu.regexp.REMatch;
+import gnu.regexp.REMatchEnumeration;
  import gnu.regexp.REException;
  
  import java.io.ByteArrayOutputStream;
@@ -616,6 +618,7 @@ public class PDFGenerator{
      */
      
      EntityList images=DatabaseContentToMedia.getInstance().getImages(entityContent);
+    String isHTML  = entityContent.getValue("is_html");
      String theTitle = entityContent.getValue("title");
      String theCreator = entityContent.getValue("creator");
      String theDate = entityContent.getValue("webdb_create_formatted");
@@ -628,24 +631,87 @@ public class PDFGenerator{
      String theContent = "";
      String theDescription = "";
      
-    try { 
-      RE re1 = new RE("\r?\n\r?\n");
-      String theContent1 = re1.substituteAll(theContentRaw,"BREAKHERE");
-      String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+    if (isHTML.equals("1")){
        
-      RE re2 = new RE("\r?\n");
-      String theContent2 = re2.substituteAll(theContent1," ");
-      String theDescription2 = re2.substituteAll(theDescription1," ");
        
-      RE re3 = new RE("BREAKHERE");
-      theContent = "    " + re3.substituteAll(theContent2,"\n    ");
-      theDescription = re3.substituteAll(theDescription2,"\n    ");
+            
+      try { 
+       RE nobackslashr = new RE("\r");
+       theContent= nobackslashr.substituteAll(theContentRaw,"");
+       
+       RE HxTag = new RE("</?h[1-6][^>]*>",RE.REG_ICASE);
+       theContent = HxTag.substituteAll(theContent,"\n\n");
+       
+       RE ListItemTag = new RE("<li[^>]*>",RE.REG_ICASE);
+       theContent = ListItemTag.substituteAll(theContent,"\n * ");
  
+       RE ListTag = new RE("<(u|o)l[^>]*>",RE.REG_ICASE);
+       theContent = ListTag.substituteAll(theContent,"\n");
+       
+       RE DivTag = new RE("</?div[^>]*>",RE.REG_ICASE);
+       theContent= DivTag.substituteAll(theContent,"\n");
+
+       RE PTag = new RE("<(p|P)([:space:]+[^>]*)?>");
+       theContent= PTag.substituteAll(theContent,"\n    ");
+
+       RE PTagClose = new RE("</(p|P)([:space:]+[^>]*)?>");
+       theContent= PTagClose.substituteAll(theContent,"\n");
+
+       RE BRTag = new RE("<(br|BR)([:space:]+[^>]*)?>");
+       theContent= BRTag.substituteAll(theContent,"\n");
+       
+       RE ATagAll = new RE("<a[^>]*href=(?:\"|\')([^#\"\'][^\'\"]+)(?:\"|\')[^>]*>(.*?)</a>",RE.REG_ICASE);
+       REMatchEnumeration atags= ATagAll.getMatchEnumeration(theContent);
+       String theContentCopy=theContent;
+       while (atags.hasMoreMatches()){
+         REMatch atag = atags.nextMatch();
+         String atagString=atag.toString();
+         String atagStringHref=atag.toString(1);
+         String atagStringText=atag.toString(2);
+         int begin=theContentCopy.indexOf(atagString);
+         theContentCopy=theContentCopy.substring(0,begin) + atagStringText + " ["+ atagStringHref + "] " + theContentCopy.substring(begin+atagString.length());
+       }
+       theContent=theContentCopy;
+       
+       RE noTags = new RE("<[^>]*>");
+       theContent= noTags.substituteAll(theContent," ");
+       
+       theContent=mir.util.Translate.decode(theContent);
+
+       RE re1 = new RE("\r?\n\r?\n");
+       String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+       
+       RE re2 = new RE("\r?\n");
+       String theDescription2 = re2.substituteAll(theDescription1," ");
+       
+       RE re3 = new RE("BREAKHERE");
+       theDescription = re3.substituteAll(theDescription2,"\n    ");
+       
+
+      }
+      catch(REException ree){
+       logger.error(ree.getMessage());
+      }
      }
-    catch(REException ree){
-      logger.error(ree.getMessage());
-    }
+    else {
+      try { 
+       RE re1 = new RE("\r?\n\r?\n");
+       String theContent1 = re1.substituteAll(theContentRaw,"BREAKHERE");
+       String theDescription1 = re1.substituteAll(theDescriptionRaw,"BREAKHERE");
+       
+       RE re2 = new RE("\r?\n");
+       String theContent2 = re2.substituteAll(theContent1," ");
+       String theDescription2 = re2.substituteAll(theDescription1," ");
+       
+       RE re3 = new RE("BREAKHERE");
+       theContent = "    " + re3.substituteAll(theContent2,"\n    ");
+       theDescription = re3.substituteAll(theDescription2,"\n    ");
  
+      }
+      catch(REException ree){
+       logger.error(ree.getMessage());
+      }
+    }
  
      addArticleSeparator();
author	john <john>
	Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)
committer	john <john>
	Sat, 17 May 2003 16:01:12 +0000 (16:01 +0000)