source/mircoders/producer/IndexingProducerNode.java

   1 /*
   2  * Copyright (C) 2001, 2002 The Mir-coders group
   3  *
   4  * This file is part of Mir.
   5  *
   6  * Mir is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * Mir is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with Mir; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * In addition, as a special exception, The Mir-coders gives permission to link
  21  * the code of this program with  any library licensed under the Apache Software License,
  22  * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
  23  * (or with modified versions of the above that use the same license as the above),
  24  * and distribute linked combinations including the two.  You must obey the
  25  * GNU General Public License in all respects for all of the code used other than
  26  * the above mentioned libraries.  If you modify this file, you may extend this
  27  * exception to your version of the file, but you are not obligated to do so.
  28  * If you do not wish to do so, delete this exception statement from your version.
  29  */
  30 package mircoders.producer;
  31
  32 import mir.entity.Entity;
  33 import mir.entity.adapter.EntityAdapter;
  34 import mir.log.LoggerWrapper;
  35 import mir.misc.StringUtil;
  36 import mir.producer.AbstractProducerNode;
  37 import mir.producer.ProducerFailure;
  38 import mir.util.FileRoutines;
  39 import mir.util.ParameterExpander;
  40 import mircoders.entity.EntityContent;
  41 import mircoders.search.*;
  42 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  43 import org.apache.lucene.document.Document;
  44 import org.apache.lucene.index.IndexReader;
  45 import org.apache.lucene.index.IndexWriter;
  46 import org.apache.lucene.store.FSDirectory;
  47
  48 import java.io.File;
  49 import java.text.SimpleDateFormat;
  50 import java.util.Calendar;
  51 import java.util.Date;
  52 import java.util.GregorianCalendar;
  53 import java.util.Map;
  54
  55
  56 public class IndexingProducerNode extends AbstractProducerNode {
  57   private String contentKey;
  58   private String indexPath;
  59   private File indexBasePath;
  60
  61   public IndexingProducerNode(File anIndexBasePath, String aContentKey, String pathToIndex) {
  62     contentKey = aContentKey;
  63     indexPath = pathToIndex;
  64     indexBasePath = anIndexBasePath;
  65   }
  66
  67   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger)
  68     throws ProducerFailure {
  69     IndexWriter indexWriter = null;
  70     Object data;
  71     Entity entity;
  72     long startTime;
  73     long endTime;
  74     File indexFile = null;
  75
  76     startTime = System.currentTimeMillis();
  77
  78     try {
  79       indexFile = FileRoutines.getAbsoluteOrRelativeFile(indexBasePath, ParameterExpander.expandExpression(aValueMap, indexPath));
  80       data = ParameterExpander.findValueForKey(aValueMap, contentKey);
  81
  82       if (!(data instanceof EntityAdapter)) {
  83         throw new ProducerFailure("IndexingProducerNode: value of '" +
  84           contentKey + "' is not an EntityAdapter, but an " +
  85           data.getClass().getName(), null);
  86       }
  87
  88       entity = ((EntityAdapter) data).getEntity();
  89
  90       if (!(entity instanceof EntityContent)) {
  91         throw new ProducerFailure("IndexingProducerNode: value of '" +
  92           contentKey + "' is not a content EntityAdapter, but a " +
  93           entity.getClass().getName() + " adapter", null);
  94       }
  95
  96       aLogger.debug("Indexing " + entity.getFieldValue("id") + " into " +  indexFile.getAbsolutePath());
  97
  98       // create an index here if one did not already exist
  99       if (!(IndexReader.indexExists(indexFile))) {
 100         aLogger.warn("Didn't find existing index, so I'm making one in " + indexFile.getAbsolutePath());
 101
 102         IndexWriter indexCreator = new IndexWriter(indexFile, new StandardAnalyzer(), true);
 103         indexCreator.close();
 104       }
 105
 106       IndexUtil.unindexEntity((EntityContent) entity, indexFile);
 107
 108       indexWriter = new IndexWriter(indexFile, new StandardAnalyzer(), false);
 109
 110       Document theDoc = new Document();
 111
 112       // Keyword is stored and indexed, but not tokenized
 113       // Text is tokenized,stored, indexed
 114       // Unindexed is not tokenized or indexed, only stored
 115       // Unstored is tokenized and indexed, but not stored
 116       //this initialization should go somewhere global like an xml file....
 117       (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity);
 118
 119       String textValue = entity.getFieldValue("webdb_create");
 120       Calendar calendar = GregorianCalendar.getInstance();
 121       int year=0;
 122       int month=0;
 123       int day;
 124       int hours;
 125       int minutes;
 126       Date date;
 127       String formattedDate="";
 128       String pathDate="";
 129
 130       if (textValue!=null) {
 131         try {
 132           year = Integer.parseInt(textValue.substring(0, 4));
 133           month = Integer.parseInt(textValue.substring(5, 7));
 134           day = Integer.parseInt(textValue.substring(8, 10));
 135           hours = Integer.parseInt(textValue.substring(11, 13));
 136           minutes = Integer.parseInt(textValue.substring(14, 16));
 137
 138           calendar.set(year, month - 1, day, hours, minutes);
 139           date = calendar.getTime();
 140           SimpleDateFormat formatter = new SimpleDateFormat("yyyy.MM.dd hh:mm");
 141           formattedDate = formatter.format(date);
 142
 143           SimpleDateFormat pathFormatter = new SimpleDateFormat("/yyyy/MM/");
 144           pathDate = pathFormatter.format(date);
 145         }
 146         catch (Throwable t) {
 147           aLogger.warn("Error while generating content date to index", t);
 148         }
 149       }
 150       (new KeywordSearchTerm("webdb_create_formatted", "search_date",
 151         "webdb_create_formatted", "webdb_create_formatted",
 152         "webdb_create_formatted")).indexValue(theDoc,formattedDate);
 153
 154
 155       (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc,
 156         pathDate +  entity.getFieldValue("id") + ".shtml");
 157
 158       (new TextSearchTerm("creator", "search_creator", "creator", "creator",
 159         "creator")).index(theDoc, entity);
 160       (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc,
 161         entity);
 162       (new UnIndexedSearchTerm("description", "search_content", "description",
 163         "description", "description")).index(theDoc, entity);
 164       (new UnIndexedSearchTerm("webdb_create", "search_irrelevant",
 165         "creationDate", "creationDate", "creationDate")).index(theDoc, entity);
 166
 167       (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc,
 168         entity.getFieldValue("content_data") + " " + entity.getFieldValue("description") +
 169         " " + entity.getFieldValue("title"));
 170
 171       (new TopicSearchTerm()).index(theDoc, entity);
 172
 173       (new ImagesSearchTerm()).index(theDoc, entity);
 174
 175       (new AudioSearchTerm()).index(theDoc, entity);
 176
 177       (new VideoSearchTerm()).index(theDoc, entity);
 178
 179       //comments-just aggregate all relevant fields
 180       //removed until i get a chance to do this right
 181       //String commentsAggregate = "";
 182       //TemplateModel comments=entity.get("to_comments");
 183       //if (comments != null){
 184       // while (((TemplateListModel)comments).hasNext()){
 185       //    TemplateModel aComment = ((TemplateListModel)comments).next();
 186       //    commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
 187       //     + " " + ((TemplateHashModel)aComment).get("creator").toString()
 188       //      + " " + ((TemplateHashModel)aComment).get("text").toString();
 189       //  }
 190       //}
 191       //theDoc.add(Field.UnStored("comments",commentsAggregate));
 192       indexWriter.addDocument(theDoc);
 193     }
 194     catch (Throwable t) {
 195       aLogger.error("Error while indexing content: " + t.getMessage(), t);
 196     }
 197     finally {
 198       if (indexWriter != null) {
 199         try {
 200           indexWriter.close();
 201         }
 202         catch (Throwable t) {
 203           aLogger.warn("Error while closing indexWriter", t);
 204         }
 205       }
 206
 207       if (indexFile!=null) {
 208         try {
 209           FSDirectory theIndexDir = FSDirectory.getDirectory(indexFile, false);
 210
 211           if (IndexReader.isLocked(theIndexDir)) {
 212             IndexReader.unlock(theIndexDir);
 213           }
 214         }
 215         catch (Throwable t) {
 216           aLogger.warn("Error while unlocking index", t);
 217         }
 218       }
 219     }
 220
 221     endTime = System.currentTimeMillis();
 222
 223     aLogger.debug("  IndexTime: " + (endTime - startTime) + " ms<br>");
 224   }
 225 }