source/mircoders/producer/IndexingProducerNode.java

   1 /*
   2  * Copyright (C) 2001, 2002  The Mir-coders group
   3  *
   4  * This file is part of Mir.
   5  *
   6  * Mir is free software; you can redistribute it and/or modify
   7  * it under the terms of the GNU General Public License as published by
   8  * the Free Software Foundation; either version 2 of the License, or
   9  * (at your option) any later version.
  10  *
  11  * Mir is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14  * GNU General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU General Public License
  17  * along with Mir; if not, write to the Free Software
  18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19  *
  20  * In addition, as a special exception, The Mir-coders gives permission to link
  21  * the code of this program with the com.oreilly.servlet library, any library
  22  * licensed under the Apache Software License, The Sun (tm) Java Advanced
  23  * Imaging library (JAI), The Sun JIMI library (or with modified versions of
  24  * the above that use the same license as the above), and distribute linked
  25  * combinations including the two.  You must obey the GNU General Public
  26  * License in all respects for all of the code used other than the above
  27  * mentioned libraries.  If you modify this file, you may extend this exception
  28  * to your version of the file, but you are not obligated to do so.  If you do
  29  * not wish to do so, delete this exception statement from your version.
  30  */
  31
  32 package mircoders.producer;
  33
  34 import java.util.*;
  35 import java.io.*;
  36
  37 import org.apache.lucene.analysis.standard.StandardAnalyzer;
  38 import org.apache.lucene.index.*;
  39 import org.apache.lucene.document.Document;
  40 import org.apache.lucene.document.Field;
  41 import org.apache.lucene.store.FSDirectory;
  42
  43 import freemarker.template.*;
  44
  45
  46 import mir.util.*;
  47 import mir.log.*;
  48 import mir.producer.*;
  49 //import mir.generator.*;
  50 import mircoders.global.*;
  51 import mircoders.localizer.*;
  52 import mir.entity.*;
  53 import mir.entity.adapter.*;
  54 import mircoders.entity.*;
  55 import mircoders.storage.*;
  56
  57
  58 public class IndexingProducerNode implements ProducerNode {
  59   private String contentKey;
  60   private String indexPath;
  61
  62
  63   public IndexingProducerNode(String aContentKey, String pathToIndex) {
  64     contentKey = aContentKey;
  65     indexPath=pathToIndex;
  66   }
  67
  68   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger) throws ProducerFailure {
  69     IndexReader indexReader = null;
  70     IndexWriter indexWriter = null;
  71     Object data;
  72     Entity entity;
  73
  74     long startTime;
  75     long endTime;
  76
  77     startTime = System.currentTimeMillis();
  78
  79     try {
  80       data = ParameterExpander.findValueForKey( aValueMap, contentKey );
  81
  82       if (! (data instanceof EntityAdapter)) {
  83         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null);
  84       }
  85
  86       entity = ((EntityAdapter) data).getEntity();
  87       if (! (entity instanceof EntityContent)) {
  88         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null);
  89       }
  90       aLogger.info("Indexing " + (String) entity.getValue("id") + " into " + indexPath);
  91
  92       indexReader = IndexReader.open(indexPath);
  93       indexReader.delete(new Term("id",entity.getValue("id")));
  94       indexReader.close();
  95
  96       indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), false);
  97       Document theDoc =  new Document();
  98
  99       // Keyword is stored and indexed, but not tokenized
 100       // Text is tokenized,stored, indexed
 101       // Unindexed is not tokenized or indexed, only stored
 102       // Unstored is tokenized and indexed, but not stored
 103
 104       theDoc.add(Field.Keyword("id",entity.getValue("id")));
 105       theDoc.add(Field.Keyword("where",entity.getValue("publish_path")+entity.getValue("id")+".shtml"));
 106       theDoc.add(Field.Text("creator",entity.getValue("creator")));
 107       theDoc.add(Field.Text("title",entity.getValue("title")));
 108       theDoc.add(Field.Keyword("webdb_create",entity.getValue("webdb_create_formatted")));
 109       theDoc.add(Field.UnStored("content_and_description",entity.getValue("description")+entity.getValue("content_data")));
 110
 111       //topics
 112       TemplateModel topics=entity.get("to_topics");
 113       aLogger.debug("THE CLASS NAME WAS: "+entity.get("to_topics").getClass().getName());
 114       while (((TemplateListModel)topics).hasNext()){
 115         theDoc.add(Field.UnStored("topic",((TemplateHashModel)((TemplateListModel)topics).next()).get("title").toString()));
 116       }
 117
 118
 119       //media
 120
 121       //images
 122       TemplateModel images=entity.get("to_media_images");
 123       if (images != null){
 124           //here we should really store a list of urls instead,
 125           //so we can thumbnail from another server
 126           theDoc.add(Field.UnStored("media","images"));
 127
 128       }
 129       //audio
 130       TemplateModel audio=entity.get("to_media_audio");
 131       if (audio != null){
 132         theDoc.add(Field.UnStored("media","audio"));
 133       }
 134       //video
 135       TemplateModel video=entity.get("to_media_video");
 136       if (video != null){
 137         theDoc.add(Field.UnStored("media","video"));
 138       }
 139
 140       //comments-just aggregate all relevant fields
 141       String commentsAggregate = "";
 142       TemplateModel comments=entity.get("to_comments");
 143       if (comments != null){
 144         while (((TemplateListModel)comments).hasNext()){
 145           TemplateModel aComment = ((TemplateListModel)comments).next();
 146           commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
 147             + " " + ((TemplateHashModel)aComment).get("creator").toString()
 148             + " " + ((TemplateHashModel)aComment).get("text").toString();
 149         }
 150       }
 151       theDoc.add(Field.UnStored("comments",commentsAggregate));
 152
 153       indexWriter.addDocument(theDoc);
 154
 155
 156     }
 157     catch (Throwable t) {
 158       aLogger.error("Error while indexing content: " + t.getMessage());
 159       t.printStackTrace(new PrintWriter(new LoggerToWriterAdapter(aLogger, LoggerWrapper.DEBUG_MESSAGE)));
 160       //should remove index lock here.....jd
 161     }
 162     finally {
 163       if (indexReader != null){
 164         try{
 165           indexReader.close();
 166         }
 167         catch (Throwable t) {
 168           aLogger.warn("Error while closing indexReader: " + t.getMessage());
 169         }
 170
 171       }
 172
 173       if (indexWriter != null){
 174         try{
 175           indexWriter.close();
 176         }
 177         catch (Throwable t) {
 178           aLogger.warn("Error while closing indexWriter: " + t.getMessage());
 179         }
 180
 181       }
 182
 183
 184       try{
 185         FSDirectory theIndexDir=FSDirectory.getDirectory(indexPath,false);
 186         if (indexReader.isLocked(theIndexDir)){
 187           indexReader.unlock(theIndexDir);
 188         }
 189       }
 190       catch (Throwable t) {
 191         aLogger.warn("Error while unlocking index: " + t.getMessage());
 192       }
 193     }
 194
 195
 196
 197
 198     endTime = System.currentTimeMillis();
 199
 200     aLogger.info("  IndexTime: " + (endTime-startTime) + " ms<br>");
 201   }
 202 }
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212