user agent filter now works with substrings
authorzapata <zapata>
Wed, 15 Aug 2007 22:25:17 +0000 (22:25 +0000)
committerzapata <zapata>
Wed, 15 Aug 2007 22:25:17 +0000 (22:25 +0000)
source/mircoders/abuse/FilterEngine.java
source/mircoders/abuse/RegularExpressionFilterType.java
source/mircoders/abuse/SubStringFilterType.java [new file with mode: 0644]
source/mircoders/abuse/URLBlacklistFilterType.java

index 73da851..f21885c 100755 (executable)
@@ -154,7 +154,8 @@ public class FilterEngine {
           Filter filter = new Filter(entity);
           introduceFilter(filter);
         }
-        catch (AbuseExc e) {
+        catch (Throwable e) {
+          logger.debug("Misbehaving filer: " + entity.toString() + ": " + e);
         }
       }
     }
@@ -264,7 +265,7 @@ public class FilterEngine {
           int lastPriority = Integer.parseInt(lastPriorityString);
           priority = Integer.toString(lastPriority + 1);
         }
-        catch (Exception e) {
+        catch (Throwable e) {
         }
       }
 
index 8eceafa..c62abbe 100755 (executable)
@@ -28,7 +28,6 @@
 
 package mircoders.abuse;
 
-import gnu.regexp.RE;
 import mir.entity.Entity;
 import mir.session.Request;
 
@@ -36,6 +35,8 @@ import java.util.Arrays;
 import java.util.Iterator;
 import java.util.List;
 
+import org.apache.oro.text.regex.*;
+
 
 /**
   * A description of a regular expression filter.
@@ -75,69 +76,66 @@ import java.util.List;
 
   public FilterInstance constructFilterInstance(final String anExpression) throws AbuseExc {
     try {
-      new RE(anExpression);
-
-      return new FilterInstance() {
+      int flags = 0;
 
-        public boolean test(Entity anEntity, Request aRequest) {
+      if (!caseSensitive) {
+        flags |= Perl5Compiler.CASE_INSENSITIVE_MASK;
+      }
 
-          try {
-            Iterator j;
-            int flags = 0;
+      final Pattern pattern = (new Perl5Compiler()).compile(anExpression, flags);
 
-            if (!caseSensitive) {
-              flags |= RE.REG_ICASE;
-            }
+      return new FilterInstance() {
 
-            RE regularExpression = new RE(anExpression, flags);
+        public boolean test(Entity anEntity, Request aRequest) {
+          PatternMatcher matcher = new Perl5Matcher();
 
-            switch (fieldKind) {
-              case REQUEST_HEADERS:
-                if (selectedFields != null) {
-                  j = selectedFields.iterator();
+          Iterator j;
 
-                  while (j.hasNext()) {
-                    String fieldName = (String) j.next();
-                    String field = aRequest.getHeader(fieldName);
-
-                    if (exactMatch) {
-                      if (field != null && regularExpression.isMatch(field)) {
-                        return true;
-                      }
-                    }
-                    else {
-                      if (field != null && regularExpression.getMatch(field) != null) {
-                        return true;
-                      }
-                    }
-                  }
-                }
-                break;
-              case ENTITY_FIELDS:
-                if (selectedFields != null) {
-                  j = selectedFields.iterator();
-                }
-                else {
-                  j = anEntity.getFieldNames().iterator();
-                }
+          switch (fieldKind) {
+            case REQUEST_HEADERS:
+              if (selectedFields != null) {
+                j = selectedFields.iterator();
 
                 while (j.hasNext()) {
-                  String field = anEntity.getFieldValue( (String) j.next());
+                  String fieldName = (String) j.next();
+                  String field = aRequest.getHeader(fieldName);
 
                   if (exactMatch) {
-                    if (field != null && regularExpression.isMatch(field)) {
+                    if (field != null && matcher.matches(field, pattern)) {
                       return true;
                     }
                   }
                   else {
-                    if (field != null && regularExpression.getMatch(field) != null) {
+                    if (field != null && matcher.contains(field, pattern)) {
                       return true;
                     }
                   }
                 }
-            }
-          }
-          catch (Throwable t) {
+              }
+              break;
+
+            case ENTITY_FIELDS:
+              if (selectedFields != null) {
+                j = selectedFields.iterator();
+              }
+              else {
+                j = anEntity.getFieldNames().iterator();
+              }
+
+              while (j.hasNext()) {
+                String field = anEntity.getFieldValue( (String) j.next());
+
+                if (exactMatch) {
+                  if (field != null && matcher.matches(field, pattern)) {
+                    return true;
+                  }
+                }
+                else {
+                  if (field != null && matcher.contains(field, pattern)) {
+                    return true;
+                  }
+                }
+              }
           }
           return false;
         }
@@ -147,7 +145,7 @@ import java.util.List;
         }
       };
     }
-    catch (Throwable t) {
+    catch (MalformedPatternException t) {
       throw new AbuseExc("Invalid expression: " + t.getMessage());
     }
   }
diff --git a/source/mircoders/abuse/SubStringFilterType.java b/source/mircoders/abuse/SubStringFilterType.java
new file mode 100644 (file)
index 0000000..709d5ef
--- /dev/null
@@ -0,0 +1,114 @@
+package mircoders.abuse;\r
+\r
+import mir.entity.Entity;\r
+import mir.session.Request;\r
+\r
+import java.util.Arrays;\r
+import java.util.Iterator;\r
+import java.util.List;\r
+\r
+/**\r
+  * Instances of this filter type will match if the filter expression is a substringo\r
+ *  of the value(s) tested agains\r
+  */\r
+ public class SubStringFilterType extends AbstractFilterType {\r
+    private boolean exactMatch;\r
+    private boolean caseSensitive;\r
+    private int fieldKind;\r
+    private List selectedFields;\r
+\r
+    public static final int ENTITY_FIELDS = 0;\r
+    public static final int REQUEST_HEADERS = 2;\r
+\r
+    public SubStringFilterType(String aName) {\r
+      this(aName, false, false, null);\r
+    }\r
+\r
+    public SubStringFilterType(String aName, boolean aCaseSensitive, boolean anExactMatch, String[] aSelectedFields) {\r
+      this (aName, aCaseSensitive, anExactMatch, ENTITY_FIELDS, aSelectedFields);\r
+    }\r
+\r
+    public SubStringFilterType(String aName, boolean aCaseSensitive, boolean anExactMatch, int aFieldKind, String[] aSelectedFields) {\r
+      super(aName);\r
+\r
+      fieldKind = aFieldKind;\r
+\r
+      caseSensitive = aCaseSensitive;\r
+      exactMatch = anExactMatch;\r
+      if (aSelectedFields==null) {\r
+        selectedFields = null;\r
+      }\r
+      else {\r
+        selectedFields = Arrays.asList(aSelectedFields);\r
+      }\r
+    }\r
+\r
+\r
+  public FilterInstance constructFilterInstance(final String anExpression) throws AbuseExc {\r
+    return new FilterInstance() {\r
+      public boolean test(Entity anEntity, Request aRequest) {\r
+        Iterator j;\r
+        String expression = anExpression;\r
+        if (!caseSensitive) {\r
+          expression = anExpression.toUpperCase();\r
+        }\r
+\r
+        switch (fieldKind) {\r
+          case REQUEST_HEADERS:\r
+            if (selectedFields != null) {\r
+              j = selectedFields.iterator();\r
+\r
+              while (j.hasNext()) {\r
+                String fieldName = (String) j.next();\r
+                String field = aRequest.getHeader(fieldName);\r
+                if (test(field, expression)) {\r
+                  return true;\r
+                }\r
+              }\r
+            }\r
+            break;\r
+\r
+          case ENTITY_FIELDS:\r
+            if (selectedFields != null) {\r
+              j = selectedFields.iterator();\r
+            }\r
+            else {\r
+              j = anEntity.getFieldNames().iterator();\r
+            }\r
+\r
+            while (j.hasNext()) {\r
+              String field = anEntity.getFieldValue( (String) j.next());\r
+\r
+              if (test(field, expression)) {\r
+                return true;\r
+              }\r
+            }\r
+        }\r
+        return false;\r
+      }\r
+\r
+      private boolean test(String aValue, String anExpression) {\r
+        if (!caseSensitive) {\r
+          aValue=aValue.toUpperCase();\r
+        }\r
+\r
+        if (exactMatch) {\r
+          if (aValue!= null && aValue.equals(anExpression)) {\r
+            return true;\r
+          }\r
+        }\r
+        else {\r
+          if (aValue!= null && aValue.indexOf(anExpression)>=0) {\r
+            return true;\r
+          }\r
+        }\r
+\r
+        return false;\r
+      }\r
+\r
+      public String status() {\r
+        return null;\r
+      }\r
+    };\r
+  }\r
+}\r
index 1bb8b03..74c1199 100644 (file)
@@ -75,16 +75,6 @@ public class URLBlacklistFilterType extends AbstractFilterType {
     };\r
   }\r
 \r
-  public static void main(String[] someArguments) {\r
-    HashSet set = new HashSet();\r
-    harvestURLDomains("[URL]https://www.nel9la41.org/informatica[/URL] [URL]http://www.ceud.org/liberi[/URL] [URL]http://www.e6tate.org/danni[/URL] [URL]http://www.e6tate.org/shakira[/URL] [URL]http://www.ceud.org/musica[/URL] [URL]http://www.nel9la41.org/troie[/URL] [URL]http://www.nel9la41.org/corriere-della-sera[/URL] [URL]http://www.e6tate.org/serie-a[/URL] [URL]http://www.nel9la41.org/calciomercato[/URL] [URL]http://www.e6tate.org/vita[/URL] [URL]http://www.e6tate.org/pene[/URL] [URL]http://www.e6tate.org/barzellette[/URL] [URL]http://www.ceud.org/pagine-bianche[/URL] [URL]http://www.e6tate.org/playboy[/URL] [URL]http://www.nel9la41.org/totti[/URL] [URL]http://www.e6tate.org/trenitalia[/URL] [URL]http://www.ceud.org/bambini[/URL] [URL]http://www.ceud.org/pornografia[/URL] [URL]http://www.e6tate.org/corriere-della-sera[/URL] [URL]http://www.nel9la41.org/musica[/URL] [URL]http://www.ceud.org/serie-a[/URL] [URL]http://www.nel9la41.org/barzellette[/URL] [URL]http://www.ceud.org/turismo[/URL] [URL]http://www.ceud.org/del-piero[/URL] [URL]http://www.nel9la41.org/calcio[/URL] [URL]http://www.nel9la41.org/google[/URL] [URL]http://www.e6tate.org/sesso[/URL] [URL]http://www.nel9la41.org/jesse-mccartney[/URL]", set);\r
-    Iterator i = set.iterator();\r
-    while (i.hasNext()) {\r
-      System.out.println(i.next());\r
-    }\r
-  }\r
-\r
-\r
   private static void harvestURLDomains(String someText, Set someResult) {\r
     Perl5Matcher matcher = new Perl5Matcher();\r
 \r
@@ -135,7 +125,6 @@ public class URLBlacklistFilterType extends AbstractFilterType {
     }\r
   }\r
 \r
-\r
   private Set whiteList;\r
 \r
   private static Pattern URL_EXPRESSION;\r