X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Fproducer%2FIndexingProducerNode.java;h=3641ec5edc9b75d7da226e628edfc9d112f796a6;hb=a178c6c1739363851d03935b2b6fdebea1d664b5;hp=051f59766f811580de2dedcec022da557efcb23c;hpb=b4c3cd9c13e761f8dcb803b5e6778e223b35c387;p=mir.git
diff --git a/source/mircoders/producer/IndexingProducerNode.java b/source/mircoders/producer/IndexingProducerNode.java
index 051f5976..3641ec5e 100755
--- a/source/mircoders/producer/IndexingProducerNode.java
+++ b/source/mircoders/producer/IndexingProducerNode.java
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2001, 2002 The Mir-coders group
+ * Copyright (C) 2001, 2002 The Mir-coders group
*
* This file is part of Mir.
*
@@ -18,197 +18,205 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* In addition, as a special exception, The Mir-coders gives permission to link
- * the code of this program with the com.oreilly.servlet library, any library
- * licensed under the Apache Software License, The Sun (tm) Java Advanced
- * Imaging library (JAI), The Sun JIMI library (or with modified versions of
- * the above that use the same license as the above), and distribute linked
- * combinations including the two. You must obey the GNU General Public
- * License in all respects for all of the code used other than the above
- * mentioned libraries. If you modify this file, you may extend this exception
- * to your version of the file, but you are not obligated to do so. If you do
- * not wish to do so, delete this exception statement from your version.
+ * the code of this program with any library licensed under the Apache Software License,
+ * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
+ * (or with modified versions of the above that use the same license as the above),
+ * and distribute linked combinations including the two. You must obey the
+ * GNU General Public License in all respects for all of the code used other than
+ * the above mentioned libraries. If you modify this file, you may extend this
+ * exception to your version of the file, but you are not obligated to do so.
+ * If you do not wish to do so, delete this exception statement from your version.
*/
-
package mircoders.producer;
-import java.util.*;
-import java.io.*;
-
+import mir.entity.Entity;
+import mir.entity.adapter.EntityAdapter;
+import mir.log.LoggerWrapper;
+import mir.misc.StringUtil;
+import mir.producer.AbstractProducerNode;
+import mir.producer.ProducerFailure;
+import mir.util.FileRoutines;
+import mir.util.ParameterExpander;
+import mircoders.entity.EntityContent;
+import mircoders.search.*;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.index.*;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.FSDirectory;
-import freemarker.template.*;
-
-
-import mir.util.*;
-import mir.producer.*;
-//import mir.generator.*;
-import mircoders.global.*;
-import mircoders.localizer.*;
-import mir.entity.*;
-import mir.entity.adapter.*;
-import mircoders.entity.*;
-import mircoders.storage.*;
+import java.io.File;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Map;
-public class IndexingProducerNode implements ProducerNode {
+public class IndexingProducerNode extends AbstractProducerNode {
private String contentKey;
private String indexPath;
-
-
- public IndexingProducerNode(String aContentKey, String pathToIndex) {
+ private File indexBasePath;
+
+ public IndexingProducerNode(File anIndexBasePath, String aContentKey, String pathToIndex) {
contentKey = aContentKey;
- indexPath=pathToIndex;
+ indexPath = pathToIndex;
+ indexBasePath = anIndexBasePath;
}
-
- public void produce(Map aValueMap, String aVerb, PrintWriter aLogger) throws ProducerFailure {
- IndexReader indexReader = null;
+
+ public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger)
+ throws ProducerFailure {
IndexWriter indexWriter = null;
Object data;
Entity entity;
-
long startTime;
long endTime;
-
+ File indexFile = null;
+
startTime = System.currentTimeMillis();
-
+
try {
- data = ParameterExpander.findValueForKey( aValueMap, contentKey );
-
- if (! (data instanceof EntityAdapter)) {
- throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null);
+ indexFile = FileRoutines.getAbsoluteOrRelativeFile(indexBasePath, ParameterExpander.expandExpression(aValueMap, indexPath));
+ data = ParameterExpander.findValueForKey(aValueMap, contentKey);
+
+ if (!(data instanceof EntityAdapter)) {
+ throw new ProducerFailure("IndexingProducerNode: value of '" +
+ contentKey + "' is not an EntityAdapter, but an " +
+ data.getClass().getName(), null);
}
-
+
entity = ((EntityAdapter) data).getEntity();
- if (! (entity instanceof EntityContent)) {
- throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null);
+
+ if (!(entity instanceof EntityContent)) {
+ throw new ProducerFailure("IndexingProducerNode: value of '" +
+ contentKey + "' is not a content EntityAdapter, but a " +
+ entity.getClass().getName() + " adapter", null);
}
- aLogger.println("Indexing " + (String) entity.getValue("id") + " into " + indexPath);
- aLogger.flush();
-
- indexReader = IndexReader.open(indexPath);
- indexReader.delete(new Term("id",entity.getValue("id")));
- indexReader.close();
-
- indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), false);
- Document theDoc = new Document();
-
+
+ aLogger.debug("Indexing " + entity.getFieldValue("id") + " into " + indexFile.getAbsolutePath());
+
+ // create an index here if one did not already exist
+ if (!(IndexReader.indexExists(indexFile))) {
+ aLogger.warn("Didn't find existing index, so I'm making one in " + indexFile.getAbsolutePath());
+
+ IndexWriter indexCreator = new IndexWriter(indexFile, new StandardAnalyzer(), true);
+ indexCreator.close();
+ }
+
+ IndexUtil.unindexEntity((EntityContent) entity, indexFile);
+
+ indexWriter = new IndexWriter(indexFile, new StandardAnalyzer(), false);
+
+ Document theDoc = new Document();
+
// Keyword is stored and indexed, but not tokenized
// Text is tokenized,stored, indexed
// Unindexed is not tokenized or indexed, only stored
// Unstored is tokenized and indexed, but not stored
-
- theDoc.add(Field.Keyword("id",entity.getValue("id")));
- theDoc.add(Field.Keyword("where",entity.getValue("publish_path")+entity.getValue("id")+".shtml"));
- theDoc.add(Field.Text("creator",entity.getValue("creator")));
- theDoc.add(Field.Text("title",entity.getValue("title")));
- theDoc.add(Field.Keyword("webdb_create",entity.getValue("webdb_create_formatted")));
- theDoc.add(Field.UnStored("content_and_description",entity.getValue("description")+entity.getValue("content_data")));
-
- //topics
- TemplateModel topics=entity.get("to_topics");
- aLogger.println("THE CLASS NAME WAS: "+entity.get("to_topics").getClass().getName());
- while (((TemplateListModel)topics).hasNext()){
- theDoc.add(Field.UnStored("topic",((TemplateHashModel)((TemplateListModel)topics).next()).get("title").toString()));
- }
-
-
- //media
-
- //images
- TemplateModel images=entity.get("to_media_images");
- if (images != null){
- //here we should really store a list of urls instead,
- //so we can thumbnail from another server
- theDoc.add(Field.UnStored("media","images"));
-
- }
- //audio
- TemplateModel audio=entity.get("to_media_audio");
- if (audio != null){
- theDoc.add(Field.UnStored("media","audio"));
- }
- //video
- TemplateModel video=entity.get("to_media_video");
- if (video != null){
- theDoc.add(Field.UnStored("media","video"));
+ //this initialization should go somewhere global like an xml file....
+ (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity);
+
+ String textValue = entity.getFieldValue("webdb_create");
+ Calendar calendar = GregorianCalendar.getInstance();
+ int year;
+ int month;
+ int day;
+ int hours;
+ int minutes;
+ Date date;
+ String formattedDate="";
+
+ if (textValue!=null) {
+ try {
+ year = Integer.parseInt(textValue.substring(0, 4));
+ month = Integer.parseInt(textValue.substring(5, 7));
+ day = Integer.parseInt(textValue.substring(8, 10));
+ hours = Integer.parseInt(textValue.substring(11, 13));
+ minutes = Integer.parseInt(textValue.substring(14, 16));
+
+ calendar.set(year, month - 1, day, hours, minutes);
+ date = calendar.getTime();
+ SimpleDateFormat formatter = new SimpleDateFormat("yyyy.MM.dd hh:mm");
+ formattedDate = formatter.format(date);
+ }
+ catch (Throwable t) {
+ aLogger.warn("Error while generating content date to index", t);
+ }
}
+ (new KeywordSearchTerm("webdb_create_formatted", "search_date",
+ "webdb_create_formatted", "webdb_create_formatted",
+ "webdb_create_formatted")).indexValue(theDoc,formattedDate);
- //comments-just aggregate all relevant fields
- String commentsAggregate = "";
- TemplateModel comments=entity.get("to_comments");
- if (comments != null){
- while (((TemplateListModel)comments).hasNext()){
- TemplateModel aComment = ((TemplateListModel)comments).next();
- commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
- + " " + ((TemplateHashModel)aComment).get("creator").toString()
- + " " + ((TemplateHashModel)aComment).get("text").toString();
- }
- }
- theDoc.add(Field.UnStored("comments",commentsAggregate));
- indexWriter.addDocument(theDoc);
-
+ (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc,
+ "/"+ year +"/" + month +
+ entity.getFieldValue("id") + ".shtml");
+
+ (new TextSearchTerm("creator", "search_creator", "creator", "creator",
+ "creator")).index(theDoc, entity);
+ (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc,
+ entity);
+ (new UnIndexedSearchTerm("description", "search_content", "description",
+ "description", "description")).index(theDoc, entity);
+ (new UnIndexedSearchTerm("webdb_create", "search_irrelevant",
+ "creationDate", "creationDate", "creationDate")).index(theDoc, entity);
+
+ (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc,
+ entity.getFieldValue("content_data") + " " + entity.getFieldValue("description") +
+ " " + entity.getFieldValue("title"));
+
+ (new TopicSearchTerm()).index(theDoc, entity);
+
+ (new ImagesSearchTerm()).index(theDoc, entity);
+
+ (new AudioSearchTerm()).index(theDoc, entity);
+ (new VideoSearchTerm()).index(theDoc, entity);
+
+ //comments-just aggregate all relevant fields
+ //removed until i get a chance to do this right
+ //String commentsAggregate = "";
+ //TemplateModel comments=entity.get("to_comments");
+ //if (comments != null){
+ // while (((TemplateListModel)comments).hasNext()){
+ // TemplateModel aComment = ((TemplateListModel)comments).next();
+ // commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
+ // + " " + ((TemplateHashModel)aComment).get("creator").toString()
+ // + " " + ((TemplateHashModel)aComment).get("text").toString();
+ // }
+ //}
+ //theDoc.add(Field.UnStored("comments",commentsAggregate));
+ indexWriter.addDocument(theDoc);
}
catch (Throwable t) {
- aLogger.println("Error while indexing content: " + t.getMessage());
- t.printStackTrace(aLogger);
- //should remove index lock here.....jd
- throw new ProducerFailure(t.getMessage(), t);
+ aLogger.error("Error while indexing content: " + t.getMessage(), t);
}
finally {
- if (indexReader != null){
- try{
- indexReader.close();
- }
- catch (Throwable t) {
- aLogger.println("Error while closing indexReader: " + t.getMessage());
- }
-
+ if (indexWriter != null) {
+ try {
+ indexWriter.close();
+ }
+ catch (Throwable t) {
+ aLogger.warn("Error while closing indexWriter", t);
+ }
}
- if (indexWriter != null){
- try{
- indexWriter.close();
- }
- catch (Throwable t) {
- aLogger.println("Error while closing indexWriter: " + t.getMessage());
- }
-
- }
-
-
- try{
- FSDirectory theIndexDir=FSDirectory.getDirectory(indexPath,false);
- if (indexReader.isLocked(theIndexDir)){
- indexReader.unlock(theIndexDir);
- }
- }
- catch (Throwable t) {
- aLogger.println("Error while unlocking index: " + t.getMessage());
+ if (indexFile!=null) {
+ try {
+ FSDirectory theIndexDir = FSDirectory.getDirectory(indexFile, false);
+
+ if (IndexReader.isLocked(theIndexDir)) {
+ IndexReader.unlock(theIndexDir);
+ }
+ }
+ catch (Throwable t) {
+ aLogger.warn("Error while unlocking index", t);
+ }
}
}
-
-
-
endTime = System.currentTimeMillis();
-
- aLogger.println(" IndexTime: " + (endTime-startTime) + " ms
");
- aLogger.flush();
+
+ aLogger.debug(" IndexTime: " + (endTime - startTime) + " ms
");
}
}
-
-
-
-
-
-
-
-
-
-