source/mircoders/producer/IndexingProducerNode.java

   1 /* Copyright (C) 2001, 2002  The Mir-coders group
   2  *
   3  * This file is part of Mir.
   4  *
   5  * Mir is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * Mir is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License
  16  * along with Mir; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * In addition, as a special exception, The Mir-coders gives permission to link
  20  * the code of this program with the com.oreilly.servlet library, any library
  21  * licensed under the Apache Software License, The Sun (tm) Java Advanced
  22  * Imaging library (JAI), The Sun JIMI library (or with modified versions of
  23  * the above that use the same license as the above), and distribute linked
  24  * combinations including the two.  You must obey the GNU General Public
  25  * License in all respects for all of the code used other than the above
  26  * mentioned libraries.  If you modify this file, you may extend this exception
  27  * to your version of the file, but you are not obligated to do so.  If you do
  28  * not wish to do so, delete this exception statement from your version.
  29  */
  30 package mircoders.producer;
  31
  32 import java.io.PrintWriter;
  33 import java.util.Map;
  34
  35 import mir.entity.Entity;
  36 import mir.entity.adapter.EntityAdapter;
  37 import mir.log.LoggerToWriterAdapter;
  38 import mir.log.LoggerWrapper;
  39 import mir.misc.StringUtil;
  40 import mir.producer.ProducerFailure;
  41 import mir.producer.ProducerNode;
  42 import mir.util.ParameterExpander;
  43 import mircoders.entity.EntityContent;
  44 import mircoders.search.AudioSearchTerm;
  45 import mircoders.search.ContentSearchTerm;
  46 import mircoders.search.ImagesSearchTerm;
  47 import mircoders.search.IndexUtil;
  48 import mircoders.search.KeywordSearchTerm;
  49 import mircoders.search.TextSearchTerm;
  50 import mircoders.search.TopicSearchTerm;
  51 import mircoders.search.UnIndexedSearchTerm;
  52 import mircoders.search.VideoSearchTerm;
  53
  54 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  55 import org.apache.lucene.document.Document;
  56 import org.apache.lucene.index.IndexReader;
  57 import org.apache.lucene.index.IndexWriter;
  58 import org.apache.lucene.store.FSDirectory;
  59
  60
  61 public class IndexingProducerNode implements ProducerNode {
  62   private String contentKey;
  63   private String indexPath;
  64
  65   public IndexingProducerNode(String aContentKey, String pathToIndex) {
  66     contentKey = aContentKey;
  67     indexPath = pathToIndex;
  68   }
  69
  70   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger)
  71     throws ProducerFailure {
  72     IndexWriter indexWriter = null;
  73     Object data;
  74     Entity entity;
  75     String index = null;
  76     long startTime;
  77     long endTime;
  78
  79     startTime = System.currentTimeMillis();
  80
  81     try {
  82       index = ParameterExpander.expandExpression(aValueMap, indexPath);
  83       data = ParameterExpander.findValueForKey(aValueMap, contentKey);
  84
  85       if (!(data instanceof EntityAdapter)) {
  86         throw new ProducerFailure("IndexingProducerNode: value of '" +
  87           contentKey + "' is not an EntityAdapter, but an " +
  88           data.getClass().getName(), null);
  89       }
  90
  91       entity = ((EntityAdapter) data).getEntity();
  92
  93       if (!(entity instanceof EntityContent)) {
  94         throw new ProducerFailure("IndexingProducerNode: value of '" +
  95           contentKey + "' is not a content EntityAdapter, but a " +
  96           entity.getClass().getName() + " adapter", null);
  97       }
  98
  99       aLogger.info("Indexing " + (String) entity.getValue("id") + " into " +
 100         index);
 101
 102       // create an index here if one did not already exist
 103       if (!(IndexReader.indexExists(index))) {
 104         aLogger.error("Didn't find existing index, so I'm making one in " +
 105           index);
 106
 107         IndexWriter indexCreator =
 108           new IndexWriter(index, new StandardAnalyzer(), true);
 109         indexCreator.close();
 110       }
 111
 112       IndexUtil.unindexEntity((EntityContent) entity, index);
 113
 114       indexWriter = new IndexWriter(index, new StandardAnalyzer(), false);
 115
 116       Document theDoc = new Document();
 117
 118       // Keyword is stored and indexed, but not tokenized
 119       // Text is tokenized,stored, indexed
 120       // Unindexed is not tokenized or indexed, only stored
 121       // Unstored is tokenized and indexed, but not stored
 122       //this initialization should go somewhere global like an xml file....
 123       (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity);
 124
 125       (new KeywordSearchTerm("webdb_create_formatted", "search_date",
 126         "webdb_create_formatted", "webdb_create_formatted",
 127         "webdb_create_formatted")).index(theDoc, entity);
 128
 129       (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc,
 130         StringUtil.webdbDate2path(entity.getValue("date")) +
 131         entity.getValue("id") + ".shtml");
 132
 133       (new TextSearchTerm("creator", "search_creator", "creator", "creator",
 134         "creator")).index(theDoc, entity);
 135       (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc,
 136         entity);
 137       (new UnIndexedSearchTerm("description", "search_content", "description",
 138         "description", "description")).index(theDoc, entity);
 139       (new UnIndexedSearchTerm("webdb_create", "search_irrelevant",
 140         "creationDate", "creationDate", "creationDate")).index(theDoc, entity);
 141
 142       (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc,
 143         entity.getValue("content_data") + " " + entity.getValue("description") +
 144         " " + entity.getValue("title"));
 145
 146       (new TopicSearchTerm()).index(theDoc, entity);
 147
 148       (new ImagesSearchTerm()).index(theDoc, entity);
 149
 150       (new AudioSearchTerm()).index(theDoc, entity);
 151
 152       (new VideoSearchTerm()).index(theDoc, entity);
 153
 154       //comments-just aggregate all relevant fields
 155       //removed until i get a chance to do this right
 156       //String commentsAggregate = "";
 157       //TemplateModel comments=entity.get("to_comments");
 158       //if (comments != null){
 159       // while (((TemplateListModel)comments).hasNext()){
 160       //    TemplateModel aComment = ((TemplateListModel)comments).next();
 161       //    commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
 162       //     + " " + ((TemplateHashModel)aComment).get("creator").toString()
 163       //      + " " + ((TemplateHashModel)aComment).get("text").toString();
 164       //  }
 165       //}
 166       //theDoc.add(Field.UnStored("comments",commentsAggregate));
 167       indexWriter.addDocument(theDoc);
 168     } catch (Throwable t) {
 169       aLogger.error("Error while indexing content: " + t.getMessage());
 170       t.printStackTrace(new PrintWriter(
 171           new LoggerToWriterAdapter(aLogger, LoggerWrapper.DEBUG_MESSAGE)));
 172     } finally {
 173       if (indexWriter != null) {
 174         try {
 175           indexWriter.close();
 176         } catch (Throwable t) {
 177           aLogger.warn("Error while closing indexWriter: " + t.getMessage());
 178         }
 179       }
 180
 181       try {
 182         FSDirectory theIndexDir = FSDirectory.getDirectory(index, false);
 183
 184         if (IndexReader.isLocked(theIndexDir)) {
 185           IndexReader.unlock(theIndexDir);
 186         }
 187       } catch (Throwable t) {
 188         aLogger.warn("Error while unlocking index: " + t.getMessage());
 189       }
 190     }
 191
 192     endTime = System.currentTimeMillis();
 193
 194     aLogger.info("  IndexTime: " + (endTime - startTime) + " ms<br>");
 195   }
 196 }