source/mircoders/producer/IndexingProducerNode.java

   1 /*
   2  * Copyright (C) 2001, 2002 The Mir-coders group
   3  *
   4  * This file is part of Mir.
   5  *
   6  * Mir is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * Mir is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with Mir; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * In addition, as a special exception, The Mir-coders gives permission to link
  21  * the code of this program with  any library licensed under the Apache Software License,
  22  * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
  23  * (or with modified versions of the above that use the same license as the above),
  24  * and distribute linked combinations including the two.  You must obey the
  25  * GNU General Public License in all respects for all of the code used other than
  26  * the above mentioned libraries.  If you modify this file, you may extend this
  27  * exception to your version of the file, but you are not obligated to do so.
  28  * If you do not wish to do so, delete this exception statement from your version.
  29  */
  30 package mircoders.producer;
  31
  32 import java.text.SimpleDateFormat;
  33 import java.util.Calendar;
  34 import java.util.Date;
  35 import java.util.GregorianCalendar;
  36 import java.util.Map;
  37
  38 import mir.entity.Entity;
  39 import mir.entity.adapter.EntityAdapter;
  40 import mir.log.LoggerWrapper;
  41 import mir.misc.StringUtil;
  42 import mir.producer.ProducerFailure;
  43 import mir.producer.ProducerNode;
  44 import mir.util.ParameterExpander;
  45 import mircoders.entity.EntityContent;
  46 import mircoders.search.AudioSearchTerm;
  47 import mircoders.search.ContentSearchTerm;
  48 import mircoders.search.ImagesSearchTerm;
  49 import mircoders.search.IndexUtil;
  50 import mircoders.search.KeywordSearchTerm;
  51 import mircoders.search.TextSearchTerm;
  52 import mircoders.search.TopicSearchTerm;
  53 import mircoders.search.UnIndexedSearchTerm;
  54 import mircoders.search.VideoSearchTerm;
  55
  56 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  57 import org.apache.lucene.document.Document;
  58 import org.apache.lucene.index.IndexReader;
  59 import org.apache.lucene.index.IndexWriter;
  60 import org.apache.lucene.store.FSDirectory;
  61
  62
  63 public class IndexingProducerNode implements ProducerNode {
  64   private String contentKey;
  65   private String indexPath;
  66
  67   public IndexingProducerNode(String aContentKey, String pathToIndex) {
  68     contentKey = aContentKey;
  69     indexPath = pathToIndex;
  70   }
  71
  72   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger)
  73     throws ProducerFailure {
  74     IndexWriter indexWriter = null;
  75     Object data;
  76     Entity entity;
  77     String index = null;
  78     long startTime;
  79     long endTime;
  80
  81     startTime = System.currentTimeMillis();
  82
  83     try {
  84       index = ParameterExpander.expandExpression(aValueMap, indexPath);
  85       data = ParameterExpander.findValueForKey(aValueMap, contentKey);
  86
  87       if (!(data instanceof EntityAdapter)) {
  88         throw new ProducerFailure("IndexingProducerNode: value of '" +
  89           contentKey + "' is not an EntityAdapter, but an " +
  90           data.getClass().getName(), null);
  91       }
  92
  93       entity = ((EntityAdapter) data).getEntity();
  94
  95       if (!(entity instanceof EntityContent)) {
  96         throw new ProducerFailure("IndexingProducerNode: value of '" +
  97           contentKey + "' is not a content EntityAdapter, but a " +
  98           entity.getClass().getName() + " adapter", null);
  99       }
 100
 101       aLogger.info("Indexing " + (String) entity.getValue("id") + " into " +
 102         index);
 103
 104       // create an index here if one did not already exist
 105       if (!(IndexReader.indexExists(index))) {
 106         aLogger.error("Didn't find existing index, so I'm making one in " +
 107           index);
 108
 109         IndexWriter indexCreator =
 110           new IndexWriter(index, new StandardAnalyzer(), true);
 111         indexCreator.close();
 112       }
 113
 114       IndexUtil.unindexEntity((EntityContent) entity, index);
 115
 116       indexWriter = new IndexWriter(index, new StandardAnalyzer(), false);
 117
 118       Document theDoc = new Document();
 119
 120       // Keyword is stored and indexed, but not tokenized
 121       // Text is tokenized,stored, indexed
 122       // Unindexed is not tokenized or indexed, only stored
 123       // Unstored is tokenized and indexed, but not stored
 124       //this initialization should go somewhere global like an xml file....
 125       (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity);
 126
 127       String textValue = entity.getValue("webdb_create");
 128       Calendar calendar = GregorianCalendar.getInstance();
 129       int year;
 130       int month;
 131       int day;
 132       int hours;
 133       int minutes;
 134       Date date;
 135       String formattedDate="";
 136
 137       if (textValue!=null) {
 138         try {
 139           year = Integer.parseInt(textValue.substring(0,4));
 140           month = Integer.parseInt(textValue.substring(5,7));
 141           day = Integer.parseInt(textValue.substring(8,10));
 142           hours = Integer.parseInt(textValue.substring(11,13));
 143           minutes = Integer.parseInt(textValue.substring(14,16));
 144
 145           calendar.set(year, month-1, day, hours, minutes);
 146           date = calendar.getTime();
 147           SimpleDateFormat formatter = new SimpleDateFormat ("yyyy.MM.dd hh:mm");
 148           formattedDate=formatter.format(date);
 149         }
 150         catch (Throwable t){
 151           aLogger.error("Error while generating content date to index: " + t.getMessage());
 152           t.printStackTrace(aLogger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
 153         }
 154       }
 155       (new KeywordSearchTerm("webdb_create_formatted", "search_date",
 156         "webdb_create_formatted", "webdb_create_formatted",
 157         "webdb_create_formatted")).indexValue(theDoc,formattedDate);
 158
 159
 160       (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc,
 161         StringUtil.webdbDate2path(entity.getValue("date")) +
 162         entity.getValue("id") + ".shtml");
 163
 164       (new TextSearchTerm("creator", "search_creator", "creator", "creator",
 165         "creator")).index(theDoc, entity);
 166       (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc,
 167         entity);
 168       (new UnIndexedSearchTerm("description", "search_content", "description",
 169         "description", "description")).index(theDoc, entity);
 170       (new UnIndexedSearchTerm("webdb_create", "search_irrelevant",
 171         "creationDate", "creationDate", "creationDate")).index(theDoc, entity);
 172
 173       (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc,
 174         entity.getValue("content_data") + " " + entity.getValue("description") +
 175         " " + entity.getValue("title"));
 176
 177       (new TopicSearchTerm()).index(theDoc, entity);
 178
 179       (new ImagesSearchTerm()).index(theDoc, entity);
 180
 181       (new AudioSearchTerm()).index(theDoc, entity);
 182
 183       (new VideoSearchTerm()).index(theDoc, entity);
 184
 185       //comments-just aggregate all relevant fields
 186       //removed until i get a chance to do this right
 187       //String commentsAggregate = "";
 188       //TemplateModel comments=entity.get("to_comments");
 189       //if (comments != null){
 190       // while (((TemplateListModel)comments).hasNext()){
 191       //    TemplateModel aComment = ((TemplateListModel)comments).next();
 192       //    commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
 193       //     + " " + ((TemplateHashModel)aComment).get("creator").toString()
 194       //      + " " + ((TemplateHashModel)aComment).get("text").toString();
 195       //  }
 196       //}
 197       //theDoc.add(Field.UnStored("comments",commentsAggregate));
 198       indexWriter.addDocument(theDoc);
 199     }
 200     catch (Throwable t) {
 201       aLogger.error("Error while indexing content: " + t.getMessage());
 202       t.printStackTrace(aLogger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
 203     }
 204     finally {
 205       if (indexWriter != null) {
 206         try {
 207           indexWriter.close();
 208         } catch (Throwable t) {
 209           aLogger.warn("Error while closing indexWriter: " + t.getMessage());
 210         }
 211       }
 212
 213       try {
 214         FSDirectory theIndexDir = FSDirectory.getDirectory(index, false);
 215
 216         if (IndexReader.isLocked(theIndexDir)) {
 217           IndexReader.unlock(theIndexDir);
 218         }
 219       } catch (Throwable t) {
 220         aLogger.warn("Error while unlocking index: " + t.getMessage());
 221       }
 222     }
 223
 224     endTime = System.currentTimeMillis();
 225
 226     aLogger.info("  IndexTime: " + (endTime - startTime) + " ms<br>");
 227   }
 228 }