support for CAPTCHAs
[mir.git] / source / mircoders / pdf / PDFGenerator.java
index 8405ae0..4514d00 100755 (executable)
  */
 package mircoders.pdf;
 
-import gnu.regexp.RE;
-import gnu.regexp.REException;
-import gnu.regexp.REMatch;
-import gnu.regexp.REMatchEnumeration;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.net.MalformedURLException;
-import java.util.*;
-
+import com.lowagie.text.*;
+import com.lowagie.text.pdf.*;
 import mir.config.MirPropertiesConfiguration;
 import mir.entity.EntityBrowser;
 import mir.entity.adapter.EntityAdapter;
+import mir.generator.GeneratorExc;
+import mir.generator.GeneratorHelper;
 import mir.log.LoggerWrapper;
 import mir.misc.StringUtil;
 import mir.util.DateTimeRoutines;
 import mir.util.HTMLRoutines;
 import mir.util.ParameterExpander;
-import mir.generator.GeneratorHelper;
-import mir.generator.GeneratorExc;
 import mircoders.entity.EntityContent;
 import mircoders.entity.EntityImages;
-import mircoders.storage.DatabaseImages;
 import mircoders.global.MirGlobal;
 import mircoders.localizer.MirLocalizerExc;
-
-import com.lowagie.text.BadElementException;
-import com.lowagie.text.Document;
-import com.lowagie.text.DocumentException;
-import com.lowagie.text.Element;
-import com.lowagie.text.Font;
-import com.lowagie.text.Image;
-import com.lowagie.text.PageSize;
-import com.lowagie.text.Paragraph;
-import com.lowagie.text.Phrase;
-import com.lowagie.text.pdf.BaseFont;
-import com.lowagie.text.pdf.ColumnText;
-import com.lowagie.text.pdf.PdfContentByte;
-import com.lowagie.text.pdf.PdfTemplate;
-import com.lowagie.text.pdf.PdfWriter;
+import mircoders.storage.DatabaseImages;
 import multex.Failure;
+import org.apache.oro.text.regex.*;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.util.*;
+import java.util.List;
 
 public class PDFGenerator {
   public Document document;
@@ -120,6 +104,46 @@ public class PDFGenerator {
   public int bigImageCaptionFontFamily;
 
   protected MirPropertiesConfiguration configuration;
+  private static Pattern nobackslashr;
+  private static Pattern HxTag;
+  private static Pattern ListItemTag;
+  private static Pattern ListTag;
+  private static Pattern DivTag;
+  private static Pattern PTag;
+  private static Pattern PTagClose;
+  private static Pattern BRTag;
+  private static Pattern ATagAll;
+  private static Pattern noTags;
+  private static Pattern re1;
+  private static Pattern re2;
+  private static Pattern re3;
+
+  static {
+    Perl5Compiler compiler = new Perl5Compiler();
+
+    try {
+      Pattern nobackslashr = compiler.compile("\r");
+
+      HxTag = compiler.compile("</?h[1-6][^>]*>", Perl5Compiler.READ_ONLY_MASK +
+                                                     Perl5Compiler.CASE_INSENSITIVE_MASK );
+      ListItemTag = compiler.compile("<li[^>]*>", Perl5Compiler.CASE_INSENSITIVE_MASK);
+      ListTag = compiler.compile("<(u|o)l[^>]*>", Perl5Compiler.CASE_INSENSITIVE_MASK);
+      DivTag = compiler.compile("</?div[^>]*>", Perl5Compiler.CASE_INSENSITIVE_MASK);
+      PTag = compiler.compile("<(p|P)([:space:]+[^>]*)?>", Perl5Compiler.READ_ONLY_MASK);
+      PTagClose = compiler.compile("</(p|P)([:space:]+[^>]*)?>", Perl5Compiler.READ_ONLY_MASK);
+      BRTag = compiler.compile("<(br|BR)([:space:]+[^>]*)?>", Perl5Compiler.READ_ONLY_MASK);
+      ATagAll = compiler.compile("<a[^>]*href=(?:\"|\')([^#\"\'][^\'\"]+)(?:\"|\')[^>]*>(.*?)</a>",
+                                                     Perl5Compiler.READ_ONLY_MASK +
+                                                     Perl5Compiler.CASE_INSENSITIVE_MASK );
+      noTags = compiler.compile("<[^>]*>", Perl5Compiler.READ_ONLY_MASK);
+      re1 = compiler.compile("\r?\n\r?\n", Perl5Compiler.READ_ONLY_MASK);
+      re2 = compiler.compile("\r?\n", Perl5Compiler.READ_ONLY_MASK);
+      re3 = compiler.compile("BREAKHERE", Perl5Compiler.READ_ONLY_MASK);
+    }
+    catch (MalformedPatternException e) {
+      throw new RuntimeException(e.getMessage());
+    }
+  }
 
 
   public PDFGenerator(ByteArrayOutputStream out) {
@@ -612,9 +636,7 @@ public class PDFGenerator {
 
 
   private boolean enoughY(int heightOfBlockToAdd) {
-    if ((currentYPosition - heightOfBlockToAdd - footerHeight) < bottomEdge)
-      return false;
-    return true;
+    return (currentYPosition - heightOfBlockToAdd - footerHeight) >= bottomEdge;
   }
 
 
@@ -668,106 +690,88 @@ public class PDFGenerator {
     String theDescription = "";
 
     if (isHTML.equals("1")) {
+        theContent =
+              Util.substitute(new Perl5Matcher(), nobackslashr, new Perl5Substitution(""), theContentRaw, Util.SUBSTITUTE_ALL);
+        theDescription =
+                Util.substitute(new Perl5Matcher(), nobackslashr, new Perl5Substitution(""), theDescriptionRaw, Util.SUBSTITUTE_ALL);
 
+        theContent = Util.substitute(new Perl5Matcher(), HxTag, new Perl5Substitution("\n\n"), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), HxTag, new Perl5Substitution("\n\n"), theDescription, Util.SUBSTITUTE_ALL);
+        theContent  = Util.substitute(new Perl5Matcher(), ListItemTag, new Perl5Substitution("\n * "), theContent, Util.SUBSTITUTE_ALL);
+        theDescription  = Util.substitute(new Perl5Matcher(), ListItemTag, new Perl5Substitution("\n * "), theDescription, Util.SUBSTITUTE_ALL);
 
-      try {
-        RE nobackslashr = new RE("\r");
-        theContent = nobackslashr.substituteAll(theContentRaw, "");
-        theDescription = nobackslashr.substituteAll(theDescriptionRaw, "");
-
-        RE HxTag = new RE("</?h[1-6][^>]*>", RE.REG_ICASE);
-        theContent = HxTag.substituteAll(theContent, "\n\n");
-        theDescription = HxTag.substituteAll(theDescription, "\n\n");
-
-        RE ListItemTag = new RE("<li[^>]*>", RE.REG_ICASE);
-        theContent = ListItemTag.substituteAll(theContent, "\n * ");
-        theDescription = ListItemTag.substituteAll(theDescription, "\n * ");
+        theContent  = Util.substitute(new Perl5Matcher(), ListTag, new Perl5Substitution("\n"), theContent, Util.SUBSTITUTE_ALL);
+        theDescription  = Util.substitute(new Perl5Matcher(), ListTag, new Perl5Substitution("\n"), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE ListTag = new RE("<(u|o)l[^>]*>", RE.REG_ICASE);
-        theContent = ListTag.substituteAll(theContent, "\n");
-        theDescription = ListTag.substituteAll(theDescription, "\n");
+        theContent = Util.substitute(new Perl5Matcher(), DivTag, new Perl5Substitution("\n"), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), DivTag, new Perl5Substitution("\n"), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE DivTag = new RE("</?div[^>]*>", RE.REG_ICASE);
-        theContent = DivTag.substituteAll(theContent, "\n");
-        theDescription = DivTag.substituteAll(theDescription, "\n");
+        theContent = Util.substitute(new Perl5Matcher(), PTag, new Perl5Substitution("\n    "), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), PTag, new Perl5Substitution("\n    "), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE PTag = new RE("<(p|P)([:space:]+[^>]*)?>");
-        theContent = PTag.substituteAll(theContent, "\n    ");
-        theDescription = PTag.substituteAll(theDescription, "\n    ");
+        theContent = Util.substitute(new Perl5Matcher(), PTagClose, new Perl5Substitution("\n"), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), PTagClose, new Perl5Substitution("\n"), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE PTagClose = new RE("</(p|P)([:space:]+[^>]*)?>");
-        theContent = PTagClose.substituteAll(theContent, "\n");
-        theDescription = PTagClose.substituteAll(theDescription, "\n");
+        theContent = Util.substitute(new Perl5Matcher(), BRTag, new Perl5Substitution("\n"), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), BRTag, new Perl5Substitution("\n"), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE BRTag = new RE("<(br|BR)([:space:]+[^>]*)?>");
-        theContent = BRTag.substituteAll(theContent, "\n");
-        theDescription = BRTag.substituteAll(theDescription, "\n");
 
-        RE ATagAll = new RE("<a[^>]*href=(?:\"|\')([^#\"\'][^\'\"]+)(?:\"|\')[^>]*>(.*?)</a>", RE.REG_ICASE);
-        REMatchEnumeration atags = ATagAll.getMatchEnumeration(theContent);
+        Perl5Matcher matcher = new Perl5Matcher();
+        PatternMatcherInput input = new PatternMatcherInput(theContent);
         String theContentCopy = theContent;
-        while (atags.hasMoreMatches()) {
-          REMatch atag = atags.nextMatch();
-          String atagString = atag.toString();
-          String atagStringHref = atag.toString(1);
-          String atagStringText = atag.toString(2);
-          int begin = theContentCopy.indexOf(atagString);
-          theContentCopy = theContentCopy.substring(0, begin) + atagStringText + " [" + atagStringHref + "] " + theContentCopy.substring(begin + atagString.length());
+        while (matcher.contains(input, ATagAll)) {
+          String atagString = matcher.getMatch().group(0);
+          String atagStringHref = matcher.getMatch().group(1);
+          String atagStringText = matcher.getMatch().group(2);
+          int begin = matcher.getMatch().beginOffset(0);
+          theContentCopy = 
+                  theContentCopy.substring(0, begin) + atagStringText +
+                          " [" + atagStringHref + "] " + theContentCopy.substring(begin + atagString.length());
         }
         theContent = theContentCopy;
 
-        REMatchEnumeration atags2 = ATagAll.getMatchEnumeration(theDescription);
+
+        input = new PatternMatcherInput(theDescription);
+
         String theDescriptionCopy = theDescription;
-        while (atags2.hasMoreMatches()) {
-          REMatch atag = atags2.nextMatch();
-          String atagString = atag.toString();
-          String atagStringHref = atag.toString(1);
-          String atagStringText = atag.toString(2);
-          int begin = theDescriptionCopy.indexOf(atagString);
-          theDescriptionCopy = theDescriptionCopy.substring(0, begin) + atagStringText + " [" + atagStringHref + "] " + theDescriptionCopy.substring(begin + atagString.length());
+        while (matcher.contains(input, ATagAll)) {
+          String atagString = matcher.getMatch().group(0);
+          String atagStringHref = matcher.getMatch().group(1);
+          String atagStringText = matcher.getMatch().group(2);
+          int begin = matcher.getMatch().beginOffset(0);
+          theDescriptionCopy = theDescriptionCopy.substring(0, begin) + atagStringText + " [" + atagStringHref + "] " +
+                  theDescriptionCopy.substring(begin + atagString.length());
         }
         theDescription = theDescriptionCopy;
 
 
-        RE noTags = new RE("<[^>]*>");
-        theContent = noTags.substituteAll(theContent, " ");
-        theDescription = noTags.substituteAll(theDescription, " ");
+        theContent = Util.substitute(new Perl5Matcher(), noTags, new Perl5Substitution(" "), theContent, Util.SUBSTITUTE_ALL);
+        theDescription = Util.substitute(new Perl5Matcher(), noTags, new Perl5Substitution(" "), theDescription, Util.SUBSTITUTE_ALL);
 
         theContent = HTMLRoutines.resolveHTMLEntites(theContent);
         theDescription = HTMLRoutines.resolveHTMLEntites(theDescription);
 
-        RE re1 = new RE("\r?\n\r?\n");
-        String theDescription1 = re1.substituteAll(theDescription, "BREAKHERE");
-
-        RE re2 = new RE("\r?\n");
-        String theDescription2 = re2.substituteAll(theDescription1, " ");
+        String theDescription1 = Util.substitute(new Perl5Matcher(), re1, new Perl5Substitution("BREAKHERE"), theDescription, Util.SUBSTITUTE_ALL);
 
-        RE re3 = new RE("BREAKHERE");
-        theDescription = re3.substituteAll(theDescription2, "\n    ");
+        String theDescription2 = Util.substitute(new Perl5Matcher(), re2, new Perl5Substitution(" "), theDescription1, Util.SUBSTITUTE_ALL);
 
-
-      }
-      catch (REException ree) {
-        logger.error(ree.getMessage());
-      }
-    } else {
-      try {
-        RE re1 = new RE("\r?\n\r?\n");
-        String theContent1 = re1.substituteAll(theContentRaw, "BREAKHERE");
-        String theDescription1 = re1.substituteAll(theDescriptionRaw, "BREAKHERE");
-
-        RE re2 = new RE("\r?\n");
-        String theContent2 = re2.substituteAll(theContent1, " ");
-        String theDescription2 = re2.substituteAll(theDescription1, " ");
-
-        RE re3 = new RE("BREAKHERE");
-        theContent = "    " + re3.substituteAll(theContent2, "\n    ");
-        theDescription = re3.substituteAll(theDescription2, "\n    ");
-
-      }
-      catch (REException ree) {
-        logger.error(ree.getMessage());
-      }
+        theDescription = Util.substitute(new Perl5Matcher(), re3, new Perl5Substitution("\n    "), theDescription2, Util.SUBSTITUTE_ALL);
+    }
+    else {
+      String theContent1 = Util.substitute(new Perl5Matcher(), re1,
+              new Perl5Substitution("BREAKHERE"), theContentRaw, Util.SUBSTITUTE_ALL);
+      String theDescription1 = Util.substitute(new Perl5Matcher(), re1,
+              new Perl5Substitution("BREAKHERE"), theDescriptionRaw, Util.SUBSTITUTE_ALL);
+
+      String theContent2 = Util.substitute(new Perl5Matcher(), re2,
+              new Perl5Substitution(" "), theContent1, Util.SUBSTITUTE_ALL);
+      String theDescription2 = Util.substitute(new Perl5Matcher(), re2,
+              new Perl5Substitution(" "), theDescription1, Util.SUBSTITUTE_ALL);
+
+      theContent = "    " + Util.substitute(new Perl5Matcher(), re3,
+              new Perl5Substitution("\n    "), theContent2, Util.SUBSTITUTE_ALL);
+      theDescription = Util.substitute(new Perl5Matcher(), re3,
+              new Perl5Substitution("\n    "), theDescription2, Util.SUBSTITUTE_ALL);
     }
 
     addArticleSeparator();