X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmircoders%2Fproducer%2FIndexingProducerNode.java;h=3641ec5edc9b75d7da226e628edfc9d112f796a6;hb=a178c6c1739363851d03935b2b6fdebea1d664b5;hp=051f59766f811580de2dedcec022da557efcb23c;hpb=b4c3cd9c13e761f8dcb803b5e6778e223b35c387;p=mir.git diff --git a/source/mircoders/producer/IndexingProducerNode.java b/source/mircoders/producer/IndexingProducerNode.java index 051f5976..3641ec5e 100755 --- a/source/mircoders/producer/IndexingProducerNode.java +++ b/source/mircoders/producer/IndexingProducerNode.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2001, 2002 The Mir-coders group + * Copyright (C) 2001, 2002 The Mir-coders group * * This file is part of Mir. * @@ -18,197 +18,205 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * In addition, as a special exception, The Mir-coders gives permission to link - * the code of this program with the com.oreilly.servlet library, any library - * licensed under the Apache Software License, The Sun (tm) Java Advanced - * Imaging library (JAI), The Sun JIMI library (or with modified versions of - * the above that use the same license as the above), and distribute linked - * combinations including the two. You must obey the GNU General Public - * License in all respects for all of the code used other than the above - * mentioned libraries. If you modify this file, you may extend this exception - * to your version of the file, but you are not obligated to do so. If you do - * not wish to do so, delete this exception statement from your version. + * the code of this program with any library licensed under the Apache Software License, + * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library + * (or with modified versions of the above that use the same license as the above), + * and distribute linked combinations including the two. You must obey the + * GNU General Public License in all respects for all of the code used other than + * the above mentioned libraries. If you modify this file, you may extend this + * exception to your version of the file, but you are not obligated to do so. + * If you do not wish to do so, delete this exception statement from your version. */ - package mircoders.producer; -import java.util.*; -import java.io.*; - +import mir.entity.Entity; +import mir.entity.adapter.EntityAdapter; +import mir.log.LoggerWrapper; +import mir.misc.StringUtil; +import mir.producer.AbstractProducerNode; +import mir.producer.ProducerFailure; +import mir.util.FileRoutines; +import mir.util.ParameterExpander; +import mircoders.entity.EntityContent; +import mircoders.search.*; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.index.*; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.FSDirectory; -import freemarker.template.*; - - -import mir.util.*; -import mir.producer.*; -//import mir.generator.*; -import mircoders.global.*; -import mircoders.localizer.*; -import mir.entity.*; -import mir.entity.adapter.*; -import mircoders.entity.*; -import mircoders.storage.*; +import java.io.File; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.Map; -public class IndexingProducerNode implements ProducerNode { +public class IndexingProducerNode extends AbstractProducerNode { private String contentKey; private String indexPath; - - - public IndexingProducerNode(String aContentKey, String pathToIndex) { + private File indexBasePath; + + public IndexingProducerNode(File anIndexBasePath, String aContentKey, String pathToIndex) { contentKey = aContentKey; - indexPath=pathToIndex; + indexPath = pathToIndex; + indexBasePath = anIndexBasePath; } - - public void produce(Map aValueMap, String aVerb, PrintWriter aLogger) throws ProducerFailure { - IndexReader indexReader = null; + + public void produce(Map aValueMap, String aVerb, LoggerWrapper aLogger) + throws ProducerFailure { IndexWriter indexWriter = null; Object data; Entity entity; - long startTime; long endTime; - + File indexFile = null; + startTime = System.currentTimeMillis(); - + try { - data = ParameterExpander.findValueForKey( aValueMap, contentKey ); - - if (! (data instanceof EntityAdapter)) { - throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not an EntityAdapter, but an " + data.getClass().getName(), null); + indexFile = FileRoutines.getAbsoluteOrRelativeFile(indexBasePath, ParameterExpander.expandExpression(aValueMap, indexPath)); + data = ParameterExpander.findValueForKey(aValueMap, contentKey); + + if (!(data instanceof EntityAdapter)) { + throw new ProducerFailure("IndexingProducerNode: value of '" + + contentKey + "' is not an EntityAdapter, but an " + + data.getClass().getName(), null); } - + entity = ((EntityAdapter) data).getEntity(); - if (! (entity instanceof EntityContent)) { - throw new ProducerFailure("IndexingProducerNode: value of '"+contentKey+"' is not a content EntityAdapter, but a " + entity.getClass().getName() + " adapter", null); + + if (!(entity instanceof EntityContent)) { + throw new ProducerFailure("IndexingProducerNode: value of '" + + contentKey + "' is not a content EntityAdapter, but a " + + entity.getClass().getName() + " adapter", null); } - aLogger.println("Indexing " + (String) entity.getValue("id") + " into " + indexPath); - aLogger.flush(); - - indexReader = IndexReader.open(indexPath); - indexReader.delete(new Term("id",entity.getValue("id"))); - indexReader.close(); - - indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), false); - Document theDoc = new Document(); - + + aLogger.debug("Indexing " + entity.getFieldValue("id") + " into " + indexFile.getAbsolutePath()); + + // create an index here if one did not already exist + if (!(IndexReader.indexExists(indexFile))) { + aLogger.warn("Didn't find existing index, so I'm making one in " + indexFile.getAbsolutePath()); + + IndexWriter indexCreator = new IndexWriter(indexFile, new StandardAnalyzer(), true); + indexCreator.close(); + } + + IndexUtil.unindexEntity((EntityContent) entity, indexFile); + + indexWriter = new IndexWriter(indexFile, new StandardAnalyzer(), false); + + Document theDoc = new Document(); + // Keyword is stored and indexed, but not tokenized // Text is tokenized,stored, indexed // Unindexed is not tokenized or indexed, only stored // Unstored is tokenized and indexed, but not stored - - theDoc.add(Field.Keyword("id",entity.getValue("id"))); - theDoc.add(Field.Keyword("where",entity.getValue("publish_path")+entity.getValue("id")+".shtml")); - theDoc.add(Field.Text("creator",entity.getValue("creator"))); - theDoc.add(Field.Text("title",entity.getValue("title"))); - theDoc.add(Field.Keyword("webdb_create",entity.getValue("webdb_create_formatted"))); - theDoc.add(Field.UnStored("content_and_description",entity.getValue("description")+entity.getValue("content_data"))); - - //topics - TemplateModel topics=entity.get("to_topics"); - aLogger.println("THE CLASS NAME WAS: "+entity.get("to_topics").getClass().getName()); - while (((TemplateListModel)topics).hasNext()){ - theDoc.add(Field.UnStored("topic",((TemplateHashModel)((TemplateListModel)topics).next()).get("title").toString())); - } - - - //media - - //images - TemplateModel images=entity.get("to_media_images"); - if (images != null){ - //here we should really store a list of urls instead, - //so we can thumbnail from another server - theDoc.add(Field.UnStored("media","images")); - - } - //audio - TemplateModel audio=entity.get("to_media_audio"); - if (audio != null){ - theDoc.add(Field.UnStored("media","audio")); - } - //video - TemplateModel video=entity.get("to_media_video"); - if (video != null){ - theDoc.add(Field.UnStored("media","video")); + //this initialization should go somewhere global like an xml file.... + (new KeywordSearchTerm("id", "", "id", "", "id")).index(theDoc, entity); + + String textValue = entity.getFieldValue("webdb_create"); + Calendar calendar = GregorianCalendar.getInstance(); + int year; + int month; + int day; + int hours; + int minutes; + Date date; + String formattedDate=""; + + if (textValue!=null) { + try { + year = Integer.parseInt(textValue.substring(0, 4)); + month = Integer.parseInt(textValue.substring(5, 7)); + day = Integer.parseInt(textValue.substring(8, 10)); + hours = Integer.parseInt(textValue.substring(11, 13)); + minutes = Integer.parseInt(textValue.substring(14, 16)); + + calendar.set(year, month - 1, day, hours, minutes); + date = calendar.getTime(); + SimpleDateFormat formatter = new SimpleDateFormat("yyyy.MM.dd hh:mm"); + formattedDate = formatter.format(date); + } + catch (Throwable t) { + aLogger.warn("Error while generating content date to index", t); + } } + (new KeywordSearchTerm("webdb_create_formatted", "search_date", + "webdb_create_formatted", "webdb_create_formatted", + "webdb_create_formatted")).indexValue(theDoc,formattedDate); - //comments-just aggregate all relevant fields - String commentsAggregate = ""; - TemplateModel comments=entity.get("to_comments"); - if (comments != null){ - while (((TemplateListModel)comments).hasNext()){ - TemplateModel aComment = ((TemplateListModel)comments).next(); - commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString() - + " " + ((TemplateHashModel)aComment).get("creator").toString() - + " " + ((TemplateHashModel)aComment).get("text").toString(); - } - } - theDoc.add(Field.UnStored("comments",commentsAggregate)); - indexWriter.addDocument(theDoc); - + (new UnIndexedSearchTerm("", "", "", "where", "where")).indexValue(theDoc, + "/"+ year +"/" + month + + entity.getFieldValue("id") + ".shtml"); + + (new TextSearchTerm("creator", "search_creator", "creator", "creator", + "creator")).index(theDoc, entity); + (new TextSearchTerm("title", "search_title", "title", "title", "title")).index(theDoc, + entity); + (new UnIndexedSearchTerm("description", "search_content", "description", + "description", "description")).index(theDoc, entity); + (new UnIndexedSearchTerm("webdb_create", "search_irrelevant", + "creationDate", "creationDate", "creationDate")).index(theDoc, entity); + + (new ContentSearchTerm("content_data", "search_content", "content", "", "")).indexValue(theDoc, + entity.getFieldValue("content_data") + " " + entity.getFieldValue("description") + + " " + entity.getFieldValue("title")); + + (new TopicSearchTerm()).index(theDoc, entity); + + (new ImagesSearchTerm()).index(theDoc, entity); + + (new AudioSearchTerm()).index(theDoc, entity); + (new VideoSearchTerm()).index(theDoc, entity); + + //comments-just aggregate all relevant fields + //removed until i get a chance to do this right + //String commentsAggregate = ""; + //TemplateModel comments=entity.get("to_comments"); + //if (comments != null){ + // while (((TemplateListModel)comments).hasNext()){ + // TemplateModel aComment = ((TemplateListModel)comments).next(); + // commentsAggregate = commentsAggregate + " " + ((TemplateHashModel)aComment).get("title").toString() + // + " " + ((TemplateHashModel)aComment).get("creator").toString() + // + " " + ((TemplateHashModel)aComment).get("text").toString(); + // } + //} + //theDoc.add(Field.UnStored("comments",commentsAggregate)); + indexWriter.addDocument(theDoc); } catch (Throwable t) { - aLogger.println("Error while indexing content: " + t.getMessage()); - t.printStackTrace(aLogger); - //should remove index lock here.....jd - throw new ProducerFailure(t.getMessage(), t); + aLogger.error("Error while indexing content: " + t.getMessage(), t); } finally { - if (indexReader != null){ - try{ - indexReader.close(); - } - catch (Throwable t) { - aLogger.println("Error while closing indexReader: " + t.getMessage()); - } - + if (indexWriter != null) { + try { + indexWriter.close(); + } + catch (Throwable t) { + aLogger.warn("Error while closing indexWriter", t); + } } - if (indexWriter != null){ - try{ - indexWriter.close(); - } - catch (Throwable t) { - aLogger.println("Error while closing indexWriter: " + t.getMessage()); - } - - } - - - try{ - FSDirectory theIndexDir=FSDirectory.getDirectory(indexPath,false); - if (indexReader.isLocked(theIndexDir)){ - indexReader.unlock(theIndexDir); - } - } - catch (Throwable t) { - aLogger.println("Error while unlocking index: " + t.getMessage()); + if (indexFile!=null) { + try { + FSDirectory theIndexDir = FSDirectory.getDirectory(indexFile, false); + + if (IndexReader.isLocked(theIndexDir)) { + IndexReader.unlock(theIndexDir); + } + } + catch (Throwable t) { + aLogger.warn("Error while unlocking index", t); + } } } - - - endTime = System.currentTimeMillis(); - - aLogger.println(" IndexTime: " + (endTime-startTime) + " ms
"); - aLogger.flush(); + + aLogger.debug(" IndexTime: " + (endTime - startTime) + " ms
"); } } - - - - - - - - - -