source/mircoders/producer/IndexingProducerNode.java

   1 /* Copyright (C) 2001, 2002  The Mir-coders group
   2  *
   3  * This file is part of Mir.
   4  *
   5  * Mir is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * Mir is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with Mir; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * In addition, as a special exception, The Mir-coders gives permission to link
  20  * the code of this program with the com.oreilly.servlet library, any library
  21  * licensed under the Apache Software License, The Sun (tm) Java Advanced
  22  * Imaging library (JAI), The Sun JIMI library (or with modified versions of
  23  * the above that use the same license as the above), and distribute linked
  24  * combinations including the two.  You must obey the GNU General Public
  25  * License in all respects for all of the code used other than the above
  26  * mentioned libraries.  If you modify this file, you may extend this exception
  27  * to your version of the file, but you are not obligated to do so.  If you do
  28  * not wish to do so, delete this exception statement from your version.
  29  */
  30
  31 package mircoders.producer;
  32
  33 import java.util.*;
  34 import java.io.*;
  35
  36 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  37 import org.apache.lucene.index.*;
  38 import org.apache.lucene.document.Document;
  39 import org.apache.lucene.document.Field;
  40 import org.apache.lucene.store.FSDirectory;
  41
  42 import freemarker.template.*;
  43
  44
  45 import mir.util.*;
  46 import mir.log.*;
  47 import mir.producer.*;
  48 //import mir.generator.*;
  49 import mircoders.global.*;
  50 import mircoders.localizer.*;
  51 import mir.entity.*;
  52 import mir.entity.adapter.*;
  53 import mircoders.entity.*;
  54 import mircoders.storage.*;
  55 import mircoders.search.*;
  56
  57
  58 public class IndexingProducerNode implements ProducerNode {
  59   private String contentKey;
  60   private String indexPath;
  61
  62
  63   public IndexingProducerNode(String aContentKey, String pathToIndex) {
  64     contentKey = aContentKey;
  65     indexPath=pathToIndex;
  66   }
  67
  68   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger) throws ProducerFailure {
  69     IndexReader indexReader = null;
  70     IndexWriter indexWriter = null;
  71     Object data;
  72     Entity entity;
  73     String index = null;
  74     long startTime;
  75     long endTime;
  76
  77     startTime = System.currentTimeMillis();
  78
  79
  80
  81     try {
  82       index = ParameterExpander.expandExpression(aValueMap, indexPath);
  83       data =  ParameterExpander.findValueForKey( aValueMap, contentKey );
  84       if (! (data instanceof EntityAdapter)) {
  85         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null);
  86       }
  87
  88       entity = ((EntityAdapter) data).getEntity();
  89       if (! (entity instanceof EntityContent)) {
  90         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null);
  91       }
  92       aLogger.info("Indexing " + (String) entity.getValue("id") + " into " + index);
  93
  94       // create an index here if one did not already exist
  95       if (! (IndexReader.indexExists(index))){
  96         aLogger.error("Didn't find existing index, so I'm making one in "+index);
  97         IndexWriter indexCreator = new IndexWriter(index,new StandardAnalyzer(),true);
  98         indexCreator.close();
  99       }
 100
 101       indexReader = IndexReader.open(index);
 102       indexReader.delete(new Term("id",entity.getValue("id")));
 103       indexReader.close();
 104
 105       indexWriter = new IndexWriter(index, new StandardAnalyzer(), false);
 106       Document theDoc =  new Document();
 107
 108       // Keyword is stored and indexed, but not tokenized
 109       // Text is tokenized,stored, indexed
 110       // Unindexed is not tokenized or indexed, only stored
 111       // Unstored is tokenized and indexed, but not stored
 112
 113       //this initialization should go somewhere global like an xml file....
 114
 115       (new KeywordSearchTerm("id","","id","","id")).index(theDoc,entity);
 116
 117       (new KeywordSearchTerm("webdb_create_formatted","search_date","webdb_create_formatted","webdb_create_formatted","webdb_create_formatted")).index(theDoc,entity);
 118
 119       (new UnIndexedSearchTerm("","","","where","where")).indexValue(theDoc,entity.getValue("publish_path")+entity.getValue("id")+".shtml");
 120
 121       (new TextSearchTerm("creator","search_creator","creator","creator","creator")).index(theDoc,entity);
 122       (new TextSearchTerm("title","search_title","title","title","title")).index(theDoc,entity);
 123       (new UnIndexedSearchTerm("description","search_content","description","description","description")).index(theDoc,entity);
 124       (new UnIndexedSearchTerm("webdb_create","search_irrelevant","creationDate","creationDate","creationDate")).index(theDoc,entity);
 125
 126       (new ContentSearchTerm("content_data","search_content","content","","")).indexValue(theDoc,
 127                                                                                      entity.getValue("content_data")+ " "
 128                                                                                      + entity.getValue("description")+ " "
 129                                                                                      + entity.getValue("title")
 130                                                                                      );
 131
 132       (new TopicSearchTerm()).index(theDoc,entity);
 133
 134       (new ImagesSearchTerm()).index(theDoc,entity);
 135
 136       (new AudioSearchTerm()).index(theDoc,entity);
 137
 138       (new VideoSearchTerm()).index(theDoc,entity);
 139
 140
 141       //comments-just aggregate all relevant fields
 142       //removed until i get a chance to do this right
 143
 144       //String commentsAggregate = "";
 145       //TemplateModel comments=entity.get("to_comments");
 146       //if (comments != null){
 147       // while (((TemplateListModel)comments).hasNext()){
 148       //    TemplateModel aComment = ((TemplateListModel)comments).next();
 149       //    commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
 150       //     + " " + ((TemplateHashModel)aComment).get("creator").toString()
 151       //      + " " + ((TemplateHashModel)aComment).get("text").toString();
 152       //  }
 153       //}
 154       //theDoc.add(Field.UnStored("comments",commentsAggregate));
 155
 156       indexWriter.addDocument(theDoc);
 157
 158
 159     }
 160     catch (Throwable t) {
 161       aLogger.error("Error while indexing content: " + t.getMessage());
 162       t.printStackTrace(new PrintWriter(new LoggerToWriterAdapter(aLogger, LoggerWrapper.DEBUG_MESSAGE)));
 163     }
 164     finally {
 165       if (indexReader != null){
 166         try{
 167           indexReader.close();
 168         }
 169         catch (Throwable t) {
 170           aLogger.warn("Error while closing indexReader: " + t.getMessage());
 171         }
 172
 173       }
 174
 175       if (indexWriter != null){
 176         try{
 177           indexWriter.close();
 178         }
 179         catch (Throwable t) {
 180           aLogger.warn("Error while closing indexWriter: " + t.getMessage());
 181         }
 182
 183       }
 184
 185
 186       try{
 187         FSDirectory theIndexDir=FSDirectory.getDirectory(index,false);
 188         if (indexReader.isLocked(theIndexDir)){
 189           indexReader.unlock(theIndexDir);
 190         }
 191       }
 192       catch (Throwable t) {
 193         aLogger.warn("Error while unlocking index: " + t.getMessage());
 194       }
 195     }
 196
 197
 198
 199
 200     endTime = System.currentTimeMillis();
 201
 202     aLogger.info("  IndexTime: " + (endTime-startTime) + " ms<br>");
 203   }
 204 }
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214