2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
30 package mircoders.producer;
32 import java.text.SimpleDateFormat;
33 import java.util.Calendar;
34 import java.util.Date;
35 import java.util.GregorianCalendar;
38 import mir.entity.Entity;
39 import mir.entity.adapter.EntityAdapter;
40 import mir.log.LoggerWrapper;
41 import mir.misc.StringUtil;
42 import mir.producer.ProducerFailure;
43 import mir.producer.ProducerNode;
44 import mir.util.ParameterExpander;
45 import mircoders.entity.EntityContent;
46 import mircoders.search.AudioSearchTerm;
47 import mircoders.search.ContentSearchTerm;
48 import mircoders.search.ImagesSearchTerm;
49 import mircoders.search.IndexUtil;
50 import mircoders.search.KeywordSearchTerm;
51 import mircoders.search.TextSearchTerm;
52 import mircoders.search.TopicSearchTerm;
53 import mircoders.search.UnIndexedSearchTerm;
54 import mircoders.search.VideoSearchTerm;
56 import org.apache.lucene.analysis.standard.StandardAnalyzer;
57 import org.apache.lucene.document.Document;
58 import org.apache.lucene.index.IndexReader;
59 import org.apache.lucene.index.IndexWriter;
60 import org.apache.lucene.store.FSDirectory;
63 public class IndexingProducerNode implements ProducerNode {
64 private String contentKey;
65 private String indexPath;
67 public IndexingProducerNode(String aContentKey, String pathToIndex) {
68 contentKey = aContentKey;
69 indexPath = pathToIndex;
72 public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger)
73 throws ProducerFailure {
74 IndexWriter indexWriter = null;
81 startTime = System.currentTimeMillis();
84 index = ParameterExpander.expandExpression(aValueMap, indexPath);
85 data = ParameterExpander.findValueForKey(aValueMap, contentKey);
87 if (!(data instanceof EntityAdapter)) {
88 throw new ProducerFailure("IndexingProducerNode: value of '" +
89 contentKey + "' is not an EntityAdapter, but an " +
90 data.getClass().getName(), null);
93 entity = ((EntityAdapter) data).getEntity();
95 if (!(entity instanceof EntityContent)) {
96 throw new ProducerFailure("IndexingProducerNode: value of '" +
97 contentKey + "' is not a content EntityAdapter, but a " +
98 entity.getClass().getName() + " adapter", null);
101 aLogger.info("Indexing " + (String) entity.getValue("id") + " into " +
104 // create an index here if one did not already exist
105 if (!(IndexReader.indexExists(index))) {
106 aLogger.error("Didn't find existing index, so I'm making one in " +
109 IndexWriter indexCreator =
110 new IndexWriter(index, new StandardAnalyzer(), true);
111 indexCreator.close();
114 IndexUtil.unindexEntity((EntityContent) entity, index);
116 indexWriter = new IndexWriter(index, new StandardAnalyzer(), false);
118 Document theDoc = new Document();
120 // Keyword is stored and indexed, but not tokenized
121 // Text is tokenized,stored, indexed
122 // Unindexed is not tokenized or indexed, only stored
123 // Unstored is tokenized and indexed, but not stored
124 //this initialization should go somewhere global like an xml file....
125 (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity);
127 String textValue = entity.getValue("webdb_create");
128 Calendar calendar = GregorianCalendar.getInstance();
135 String formattedDate="";
137 if (textValue!=null) {
139 year = Integer.parseInt(textValue.substring(0,4));
140 month = Integer.parseInt(textValue.substring(5,7));
141 day = Integer.parseInt(textValue.substring(8,10));
142 hours = Integer.parseInt(textValue.substring(11,13));
143 minutes = Integer.parseInt(textValue.substring(14,16));
145 calendar.set(year, month-1, day, hours, minutes);
146 date = calendar.getTime();
147 SimpleDateFormat formatter = new SimpleDateFormat ("yyyy.MM.dd hh:mm");
148 formattedDate=formatter.format(date);
151 aLogger.error("Error while generating content date to index: " + t.getMessage());
152 t.printStackTrace(aLogger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
155 (new KeywordSearchTerm("webdb_create_formatted", "search_date",
156 "webdb_create_formatted", "webdb_create_formatted",
157 "webdb_create_formatted")).indexValue(theDoc,formattedDate);
160 (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc,
161 StringUtil.webdbDate2path(entity.getValue("date")) +
162 entity.getValue("id") + ".shtml");
164 (new TextSearchTerm("creator", "search_creator", "creator", "creator",
165 "creator")).index(theDoc, entity);
166 (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc,
168 (new UnIndexedSearchTerm("description", "search_content", "description",
169 "description", "description")).index(theDoc, entity);
170 (new UnIndexedSearchTerm("webdb_create", "search_irrelevant",
171 "creationDate", "creationDate", "creationDate")).index(theDoc, entity);
173 (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc,
174 entity.getValue("content_data") + " " + entity.getValue("description") +
175 " " + entity.getValue("title"));
177 (new TopicSearchTerm()).index(theDoc, entity);
179 (new ImagesSearchTerm()).index(theDoc, entity);
181 (new AudioSearchTerm()).index(theDoc, entity);
183 (new VideoSearchTerm()).index(theDoc, entity);
185 //comments-just aggregate all relevant fields
186 //removed until i get a chance to do this right
187 //String commentsAggregate = "";
188 //TemplateModel comments=entity.get("to_comments");
189 //if (comments != null){
190 // while (((TemplateListModel)comments).hasNext()){
191 // TemplateModel aComment = ((TemplateListModel)comments).next();
192 // commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString()
193 // + " " + ((TemplateHashModel)aComment).get("creator").toString()
194 // + " " + ((TemplateHashModel)aComment).get("text").toString();
197 //theDoc.add(Field.UnStored("comments",commentsAggregate));
198 indexWriter.addDocument(theDoc);
200 catch (Throwable t) {
201 aLogger.error("Error while indexing content: " + t.getMessage());
202 t.printStackTrace(aLogger.asPrintWriter(LoggerWrapper.DEBUG_MESSAGE));
205 if (indexWriter != null) {
208 } catch (Throwable t) {
209 aLogger.warn("Error while closing indexWriter: " + t.getMessage());
214 FSDirectory theIndexDir = FSDirectory.getDirectory(index, false);
216 if (IndexReader.isLocked(theIndexDir)) {
217 IndexReader.unlock(theIndexDir);
219 } catch (Throwable t) {
220 aLogger.warn("Error while unlocking index: " + t.getMessage());
224 endTime = System.currentTimeMillis();
226 aLogger.info(" IndexTime: " + (endTime - startTime) + " ms<br>");