proof of concept producer which takes content and indexes it using lucene
authorjohn <john>
Sat, 6 Jul 2002 19:24:30 +0000 (19:24 +0000)
committerjohn <john>
Sat, 6 Jul 2002 19:24:30 +0000 (19:24 +0000)
not fancy, and not recommended for use yet, needs to be tied into config.properties
and do some cleanup (deleting index locks) when exceptions happen.

but it more or less works to index the content, if anyone was curious about lucene or how
i'm planning to use it

source/mircoders/producer/IndexingProducerNode.java [new file with mode: 0755]

diff --git a/source/mircoders/producer/IndexingProducerNode.java b/source/mircoders/producer/IndexingProducerNode.java
new file mode 100755 (executable)
index 0000000..d4c8dba
--- /dev/null
@@ -0,0 +1,144 @@
+package mircoders.producer;
+
+import java.util.*;
+import java.io.*;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+
+import freemarker.template.*;
+
+
+import mir.util.*;
+import mir.producer.*;
+//import mir.generator.*;
+import mircoders.global.*;
+import mircoders.localizer.*;
+import mir.entity.*;
+import mir.entity.adapter.*;
+import mircoders.entity.*;
+import mircoders.storage.*;
+
+
+public class IndexingProducerNode implements ProducerNode {
+  private String contentKey;
+  private String indexPath;
+    
+
+  public IndexingProducerNode(String aContentKey, String pathToIndex) {
+    contentKey = aContentKey;
+    indexPath=pathToIndex;
+  }
+
+  public void produce(Map aValueMap, String aVerb, PrintWriter aLogger) throws ProducerFailure {
+    IndexWriter indexWriter;
+    Object data;
+    Entity entity;
+
+    long startTime;
+    long endTime;
+
+    startTime = System.currentTimeMillis();
+    
+    try {
+      data = ParameterExpander.findValueForKey( aValueMap, contentKey );
+
+      if (! (data instanceof EntityAdapter)) {
+        throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null);
+      }
+      
+      entity = ((EntityAdapter) data).getEntity();
+      if (! (entity instanceof EntityContent)) {
+        throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null);
+      }
+      aLogger.println("Indexing " + (String) entity.getValue("id") + " into " + indexPath);
+      aLogger.flush();
+      
+      IndexReader indexReader = IndexReader.open(indexPath);
+      indexReader.delete(new Term("id",entity.getValue("id")));
+      indexReader.close();
+      
+      indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), false);
+      Document theDoc =  new Document();
+      
+      // Keyword is stored and indexed, but not tokenized
+      // Text is tokenized,stored, indexed 
+      // Unindexed is not tokenized or indexed, only stored
+      // Unstored is tokenized and indexed, but not stored
+      theDoc.add(Field.Keyword("id",entity.getValue("id")));
+      theDoc.add(Field.Keyword("where",entity.getValue("publish_path")+entity.getValue("id")+".shtml"));
+      theDoc.add(Field.Text("creator",entity.getValue("creator")));
+      theDoc.add(Field.Text("title",entity.getValue("title")));
+      theDoc.add(Field.Keyword("webdb_create",entity.getValue("webdb_create_formatted")));
+      theDoc.add(Field.UnStored("content_and_description",entity.getValue("description")+entity.getValue("content_data")));
+      
+      //topics
+      TemplateModel topics=entity.get("to_topics");
+      aLogger.println("THE CLASS NAME WAS: "+entity.get("to_topics").getClass().getName());
+      while (((TemplateListModel)topics).hasNext()){
+         theDoc.add(Field.UnStored("topic",((SimpleHash)((TemplateListModel)topics).next()).get("title").toString()));
+      }
+
+      
+      //media
+      
+      //images
+      TemplateModel images=entity.get("to_media_images");
+      if (images != null){
+         theDoc.add(Field.UnStored("media","images"));
+      }
+      //audio
+      TemplateModel audio=entity.get("to_media_audio");
+      if (audio != null){
+         theDoc.add(Field.UnStored("media","audio"));
+      }
+      //video
+      TemplateModel video=entity.get("to_media_video");
+      if (video != null){
+         theDoc.add(Field.UnStored("media","video"));
+      }
+
+      //comments-just aggregate all relevant fields
+      String commentsAggregate = "";
+      TemplateModel comments=entity.get("to_comments");
+      if (comments != null){
+       while (((TemplateListModel)comments).hasNext()){
+         SimpleHash aComment = (SimpleHash)((TemplateListModel)comments).next();
+         commentsAggregate = commentsAggregate + " " + aComment.get("title").toString() 
+           + " " + aComment.get("creator").toString() 
+           + " " + aComment.get("text").toString();
+       }
+      }
+      theDoc.add(Field.UnStored("comments",commentsAggregate));
+
+      indexWriter.addDocument(theDoc);
+      indexWriter.close();
+      
+    }
+    catch (Throwable t) {
+      aLogger.println("Error while indexing content: " + t.getMessage());
+      t.printStackTrace(aLogger);
+      //should remove index lock here.....jd
+      throw new ProducerFailure(t.getMessage(), t);
+    }
+      
+      
+  
+      
+    endTime = System.currentTimeMillis();
+    
+    aLogger.println("  IndexTime: " + (endTime-startTime) + " ms<br>");
+    aLogger.flush();
+  }
+
+  public Set buildVerbSet() {
+    return new HashSet();
+  }
+}
+
+
+