1 /*TK make sure the items go somewhere! */
4 * Copyright (C) 2001-2006 The Mir-coders group
6 * This file is part of Mir.
8 * Mir is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * Mir is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with Mir; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 * In addition, as a special exception, The Mir-coders gives permission to link
23 * the code of this program with any library licensed under the Apache Software License,
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
32 import mir.log.LoggerWrapper;
33 import mir.util.HTTPClientHelper;
35 import org.apache.commons.feedparser.*;
36 import org.apache.commons.feedparser.impl.*;
37 import org.apache.commons.feedparser.network.*;
40 import java.io.InputStream;
41 import java.text.SimpleDateFormat;
42 import java.util.ArrayList;
43 import java.util.HashMap;
44 import java.util.Iterator;
45 import java.util.List;
46 import java.util.LinkedList;
52 * <p>Description: </p>
53 * <p>Copyright: Copyright (c) 2007</p>
55 * @author not attributable
59 public class RSS2Reader {
60 private FeedParser parser;
61 private FeedParserListener listener;
62 private RSSData result;
64 private RDFResource channel;
65 static final LoggerWrapper logger =
66 new LoggerWrapper("rss.rss2reader");
69 private RDFResource currentItem(){
71 return (RDFResource) items.get(items.size()-1);
73 catch (IndexOutOfBoundsException i){
74 logger.debug( "Something screwy happened!");
75 return new RDFResource("nonexistent","entity");
80 public RSS2Reader() throws org.apache.commons.feedparser.FeedParserException {
81 parser = FeedParserFactory.newFeedParser();
83 result = new RSSData();
84 items = new ArrayList();
85 channel = new RDFResource("rss:channel");
88 listener = new DefaultFeedParserListener() {
91 public void onItem( FeedParserState state,
95 String permalink ) throws FeedParserException {
96 logger.debug( "Found a new published article: " + permalink );
97 RDFResource item = new RDFResource("rss:item", link);
98 item.set("rss:link",permalink);
99 item.set("rss:title",title);
100 item.set("rss:description",description);
104 public void onAuthor(FeedParserState state, java.lang.String name, java.lang.String email, java.lang.String resource){
105 currentItem().set("dc:creator",name);
108 public void onContent(FeedParserState state, java.lang.String type, java.lang.String format, java.lang.String encoding, java.lang.String mode, java.lang.String value, boolean isSummary) {
109 currentItem().set("content.encoded",value);
112 public void onCreated(FeedParserState state,java.util.Date date){
113 SimpleDateFormat formatter = new SimpleDateFormat("yyyy-mm-ddThh:mm:ssTZ");
114 currentItem().set("dc:date",formatter.format(date));
117 public void onItemEnd(){
118 result.addResource(currentItem());
119 logger.debug( "Finished processing article" );
129 public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure {
131 parser.parse( listener, aStream, "" );
134 catch (Throwable t) {
135 throw new RSSFailure(t);
139 public RSSData parseInputStream(InputStream aStream, String anEncoding) throws RSSExc, RSSFailure {
141 return parseInputStream(aStream);
143 catch (Throwable t) {
144 throw new RSSFailure(t);
148 public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure {
150 ResourceRequest request = ResourceRequestFactory.getResourceRequest( anUrl );
151 InputStream is = request.getInputStream();
152 return parseInputStream(is);
155 catch (Throwable t) {
156 throw new RSSFailure(t);
160 public RSSData parseUrl(String anUrl, String anEncoding) throws RSSExc, RSSFailure {
162 return parseUrl(anUrl);
165 catch (Throwable t) {
166 throw new RSSFailure(t);