code cleaning, new config
[mir.git] / source / mircoders / producer / IndexingProducerNode.java
1 /* Copyright (C) 2001, 2002  The Mir-coders group
2  *
3  * This file is part of Mir.
4  *
5  * Mir is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * Mir is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with Mir; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  * In addition, as a special exception, The Mir-coders gives permission to link
20  * the code of this program with the com.oreilly.servlet library, any library
21  * licensed under the Apache Software License, The Sun (tm) Java Advanced
22  * Imaging library (JAI), The Sun JIMI library (or with modified versions of
23  * the above that use the same license as the above), and distribute linked
24  * combinations including the two.  You must obey the GNU General Public
25  * License in all respects for all of the code used other than the above
26  * mentioned libraries.  If you modify this file, you may extend this exception
27  * to your version of the file, but you are not obligated to do so.  If you do
28  * not wish to do so, delete this exception statement from your version.
29  */
30
31 package mircoders.producer;
32
33 import java.io.PrintWriter;
34 import java.util.Map;
35
36 import mir.entity.Entity;
37 import mir.entity.adapter.EntityAdapter;
38 import mir.log.LoggerToWriterAdapter;
39 import mir.log.LoggerWrapper;
40 import mir.misc.StringUtil;
41 import mir.producer.ProducerFailure;
42 import mir.producer.ProducerNode;
43 import mir.util.ParameterExpander;
44 import mircoders.entity.EntityContent;
45 import mircoders.search.AudioSearchTerm;
46 import mircoders.search.ContentSearchTerm;
47 import mircoders.search.ImagesSearchTerm;
48 import mircoders.search.IndexUtil;
49 import mircoders.search.KeywordSearchTerm;
50 import mircoders.search.TextSearchTerm;
51 import mircoders.search.TopicSearchTerm;
52 import mircoders.search.UnIndexedSearchTerm;
53 import mircoders.search.VideoSearchTerm;
54
55 import org.apache.lucene.analysis.standard.StandardAnalyzer;
56 import org.apache.lucene.document.Document;
57 import org.apache.lucene.index.IndexReader;
58 import org.apache.lucene.index.IndexWriter;
59 import org.apache.lucene.store.FSDirectory;
60
61 public class IndexingProducerNode implements ProducerNode {
62   private String contentKey;
63   private String indexPath;
64
65
66   public IndexingProducerNode(String aContentKey, String pathToIndex) {
67     contentKey = aContentKey;
68     indexPath=pathToIndex;
69   }
70
71   public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger) throws ProducerFailure {
72     IndexWriter indexWriter = null;
73     Object data;
74     Entity entity;
75     String index = null;
76     long startTime;
77     long endTime;
78
79     startTime = System.currentTimeMillis();
80
81
82
83     try {
84       index = ParameterExpander.expandExpression(aValueMap, indexPath);
85       data =  ParameterExpander.findValueForKey( aValueMap, contentKey );
86       if (! (data instanceof EntityAdapter)) {
87         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null);
88       }
89
90       entity = ((EntityAdapter) data).getEntity();
91       if (! (entity instanceof EntityContent)) {
92         throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null);
93       }
94       aLogger.info("Indexing " + (String) entity.getValue("id") + " into " + index);
95
96       // create an index here if one did not already exist
97       if (! (IndexReader.indexExists(index))){
98         aLogger.error("Didn't find existing index, so I'm making one in "+index);
99         IndexWriter indexCreator = new IndexWriter(index,new StandardAnalyzer(),true);
100         indexCreator.close();
101       }
102
103       IndexUtil.unindexEntity((EntityContent) entity,index);
104
105       indexWriter = new IndexWriter(index, new StandardAnalyzer(), false);
106       Document theDoc =  new Document();
107
108       // Keyword is stored and indexed, but not tokenized
109       // Text is tokenized,stored, indexed
110       // Unindexed is not tokenized or indexed, only stored
111       // Unstored is tokenized and indexed, but not stored
112
113       //this initialization should go somewhere global like an xml file....
114
115       (new KeywordSearchTerm("id","","id","","id")).index(theDoc,entity);
116
117       (new KeywordSearchTerm("webdb_create_formatted","search_date","webdb_create_formatted","webdb_create_formatted","webdb_create_formatted")).index(theDoc,entity);
118
119       (new UnIndexedSearchTerm("","","","where","where")).indexValue(theDoc, StringUtil.webdbDate2path(entity.getValue("date"))+entity.getValue("id")+".shtml");
120
121       (new TextSearchTerm("creator","search_creator","creator","creator","creator")).index(theDoc,entity);
122       (new TextSearchTerm("title","search_title","title","title","title")).index(theDoc,entity);
123       (new UnIndexedSearchTerm("description","search_content","description","description","description")).index(theDoc,entity);
124       (new UnIndexedSearchTerm("webdb_create","search_irrelevant","creationDate","creationDate","creationDate")).index(theDoc,entity);
125
126       (new ContentSearchTerm("content_data","search_content","content","","")).indexValue(theDoc,
127                                                                                      entity.getValue("content_data")+ " "
128                                                                                      + entity.getValue("description")+ " "
129                                                                                      + entity.getValue("title")
130                                                                                      );
131
132       (new TopicSearchTerm()).index(theDoc,entity);
133
134       (new ImagesSearchTerm()).index(theDoc,entity);
135
136       (new AudioSearchTerm()).index(theDoc,entity);
137
138       (new VideoSearchTerm()).index(theDoc,entity);
139
140
141       //comments-just aggregate all relevant fields
142       //removed until i get a chance to do this right
143
144       //String commentsAggregate = "";
145       //TemplateModel comments=entity.get("to_comments");
146       //if (comments != null){
147       // while (((TemplateListModel)comments).hasNext()){
148       //    TemplateModel aComment = ((TemplateListModel)comments).next();
149       //    commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
150       //     + " " + ((TemplateHashModel)aComment).get("creator").toString()
151       //      + " " + ((TemplateHashModel)aComment).get("text").toString();
152       //  }
153       //}
154       //theDoc.add(Field.UnStored("comments",commentsAggregate));
155
156       indexWriter.addDocument(theDoc);
157
158
159     }
160     catch (Throwable t) {
161       aLogger.error("Error while indexing content: " + t.getMessage());
162       t.printStackTrace(new PrintWriter(new LoggerToWriterAdapter(aLogger, LoggerWrapper.DEBUG_MESSAGE)));
163     }
164     finally {
165       if (indexWriter != null){
166         try{
167           indexWriter.close();
168         }
169         catch (Throwable t) {
170           aLogger.warn("Error while closing indexWriter: " + t.getMessage());
171         }
172
173       }
174       try{
175         FSDirectory theIndexDir=FSDirectory.getDirectory(index,false);
176         if (IndexReader.isLocked(theIndexDir)){
177           IndexReader.unlock(theIndexDir);
178         }
179       }
180       catch (Throwable t) {
181         aLogger.warn("Error while unlocking index: " + t.getMessage());
182       }
183     }
184
185
186
187
188     endTime = System.currentTimeMillis();
189
190     aLogger.info("  IndexTime: " + (endTime-startTime) + " ms<br>");
191   }
192 }
193
194
195
196
197
198
199
200
201
202