2 * Copyright (C) 2001-2006 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * and distribute linked combinations including the two. You must obey the
23 * GNU General Public License in all respects for all of the code used other than
24 * the above mentioned libraries. If you modify this file, you may extend this
25 * exception to your version of the file, but you are not obligated to do so.
26 * If you do not wish to do so, delete this exception statement from your version.
30 import mir.util.DateTimeRoutines;
31 import mir.util.HTTPClientHelper;
32 import mir.util.xml.XMLParserEngine;
33 import mir.util.xml.XMLParserExc;
34 import mir.util.xml.XMLParserFailure;
36 import java.io.InputStream;
37 import java.util.ArrayList;
38 import java.util.HashMap;
39 import java.util.List;
42 public class RSSReader {
43 public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
44 public static final String RSS_1_0_NAMESPACE_URI = "http://purl.org/rss/1.0/";
45 public static final String RSS_0_9_NAMESPACE_URI = "http://my.netscape.com/rdf/simple/0.9/";
46 public static final String DUBLINCORE_NAMESPACE_URI = "http://purl.org/dc/elements/1.1/";
47 public static final String EVENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/event/";
48 public static final String TAXONOMY_NAMESPACE_URI = "http://web.resource.org/rss/1.0/modules/taxonomy/";
49 public static final String DUBLINCORE_TERMS_NAMESPACE_URI = "http://purl.org/dc/terms/";
50 public static final String CONTENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/content/";
52 // ML: to be localized:
53 public static final String V2V_NAMESPACE_URI = "http://v2v.cc/rss/";
55 private static final mir.util.xml.XMLName RDF_ABOUT_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "about");
56 private static final mir.util.xml.XMLName RDF_SEQUENCE_TAG = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Seq");
57 private static final mir.util.xml.XMLName RDF_BAG_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Bag");
59 private static final mir.util.xml.XMLName RSS_CHANNEL_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "channel");
60 private static final mir.util.xml.XMLName RSS_ITEM_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "item");
61 private static final mir.util.xml.XMLName RSS_ITEMS_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "items");
64 private Map namespaceURItoModule;
65 private Map moduleToPrefix;
68 modules = new ArrayList();
69 namespaceURItoModule = new HashMap();
70 moduleToPrefix = new HashMap();
72 registerModule(new RSSBasicModule(RDF_NAMESPACE_URI, "RDF module"), "rdf");
73 registerModule(new RSSBasicModule(RSS_1_0_NAMESPACE_URI, "RSS 1.0 module"), "rss");
74 registerModule(new RSSBasicModule(RSS_0_9_NAMESPACE_URI, "RSS 0.9 module"), "rss");
76 RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "RSS Dublin Core 1.1");
77 dcModule.addProperty("date", RSSModule.W3CDTF_PROPERTY_TYPE);
78 registerModule(dcModule, "dc");
80 RSSBasicModule dcTermsModule = new RSSBasicModule(DUBLINCORE_TERMS_NAMESPACE_URI, "RSS Qualified Dublin core");
81 dcTermsModule.addProperty("created", RSSModule.W3CDTF_PROPERTY_TYPE);
82 dcTermsModule.addProperty("issued", RSSModule.W3CDTF_PROPERTY_TYPE);
83 dcTermsModule.addProperty("modified", RSSModule.W3CDTF_PROPERTY_TYPE);
84 dcTermsModule.addProperty("dateAccepted", RSSModule.W3CDTF_PROPERTY_TYPE);
85 dcTermsModule.addProperty("dateCopyrighted", RSSModule.W3CDTF_PROPERTY_TYPE);
86 dcTermsModule.addProperty("dateSubmitted", RSSModule.W3CDTF_PROPERTY_TYPE);
87 registerModule(dcTermsModule, "dcterms");
89 RSSBasicModule v2vTermsModule = new RSSBasicModule(V2V_NAMESPACE_URI, "indymedia v2v RSS module");
90 v2vTermsModule.addMultiValuedProperty("topic", RSSModule.PCDATA_PROPERTY_TYPE);
91 v2vTermsModule.addMultiValuedProperty("genre", RSSModule.PCDATA_PROPERTY_TYPE);
92 v2vTermsModule.addMultiValuedProperty("link", RSSModule.PCDATA_PROPERTY_TYPE);
93 registerModule(v2vTermsModule, "v2v");
95 registerModule(new RSSBasicModule(EVENT_NAMESPACE_URI, "Event RSS module"), "ev");
96 registerModule(new RSSBasicModule(TAXONOMY_NAMESPACE_URI, "Taxonomy RSS module"), "taxo");
97 registerModule(new RSSBasicModule(CONTENT_NAMESPACE_URI , "Content RSS module"), "content");
100 public void registerModule(RSSModule aModule, String aPrefix) {
101 modules.add(aModule);
102 namespaceURItoModule.put(aModule.getNamespaceURI(), aModule);
103 moduleToPrefix.put(aModule, aPrefix);
106 public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure {
108 RSSData result = new RSSData();
109 XMLParserEngine.getInstance().parse("xml", aStream, new RootSectionHandler(result));
113 catch (Throwable t) {
114 throw new RSSFailure(t);
118 public RSSData parseInputStream(InputStream aStream, String anEncoding) throws RSSExc, RSSFailure {
120 RSSData result = new RSSData();
121 XMLParserEngine.getInstance().parse("xml", aStream, anEncoding, new RootSectionHandler(result));
125 catch (Throwable t) {
126 throw new RSSFailure(t);
130 public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure {
132 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
133 InputStream inputStream = httpClientHelper.getUrl(anUrl);
134 if (inputStream==null)
135 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
137 RSSData theRSSData = parseInputStream(inputStream);
138 httpClientHelper.releaseHTTPConnection();
141 catch (Throwable t) {
142 throw new RSSFailure(t);
146 public RSSData parseUrl(String anUrl, String anEncoding) throws RSSExc, RSSFailure {
148 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
149 InputStream inputStream = httpClientHelper.getUrl(anUrl);
150 if (inputStream==null)
151 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
153 RSSData theRSSData = parseInputStream(inputStream, anEncoding);
154 httpClientHelper.releaseHTTPConnection();
157 catch (Throwable t) {
158 throw new RSSFailure(t);
162 private class RootSectionHandler extends mir.util.xml.AbstractSectionHandler {
163 private RSSData data;
165 public RootSectionHandler(RSSData aData) {
169 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
170 if (aTag.getLocalName().equals("RDF")) {
171 return new RDFSectionHandler(data);
173 throw new XMLParserFailure(new RSSExc("'RDF' tag expected"));
176 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
179 public void characters(String aCharacters) throws XMLParserExc {
180 if (aCharacters.trim().length()>0)
181 throw new XMLParserExc("No character data allowed here");
184 public void finishSection() throws XMLParserExc {
188 private class RDFSectionHandler extends mir.util.xml.AbstractSectionHandler {
189 private RSSData data;
192 public RDFSectionHandler(RSSData aData) {
196 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
197 String identifier = (String) anAttributes.get(RDF_ABOUT_PARAMETER);
198 String rdfClass = makeQualifiedName(aTag);
200 return new RDFResourceSectionHandler(rdfClass, identifier);
203 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
204 if (aHandler instanceof RDFResourceSectionHandler) {
205 data.addResource(((RDFResourceSectionHandler) aHandler).getResource());
209 public void characters(String aCharacters) throws XMLParserExc {
210 if (aCharacters.trim().length()>0)
211 throw new XMLParserExc("No character data allowed here");
214 public void finishSection() throws XMLParserExc {
218 private mir.util.xml.SectionHandler makePropertyValueSectionHandler(mir.util.xml.XMLName aTag, Map anAttributes) {
219 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
222 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
224 if (property!=null) {
225 switch (property.getType()) {
227 RSSModule.PCDATA_PROPERTY_TYPE:
228 return new PCDATASectionHandler();
230 RSSModule.RDFCOLLECTION_PROPERTY_TYPE:
231 return new RDFCollectionSectionHandler();
233 // RSSModule.RDF_PROPERTY_TYPE:
234 // return new RDFValueSectionHandler();
236 RSSModule.W3CDTF_PROPERTY_TYPE:
237 return new DateSectionHandler();
242 return new FlexiblePropertyValueSectionHandler();
245 private void usePropertyValueSectionHandler(RDFResource aResource, PropertyValueSectionHandler aHandler, mir.util.xml.XMLName aTag) {
246 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
249 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
251 if (property!=null && property.getIsMultiValued()) {
252 List value = (List) aResource.get(makeQualifiedName(aTag));
255 value = new ArrayList();
256 aResource.set(makeQualifiedName(aTag), value);
259 value.add(aHandler.getValue());
265 aResource.set(makeQualifiedName(aTag), aHandler.getValue());
268 private String makeQualifiedName(mir.util.xml.XMLName aName) {
269 String result=aName.getLocalName();
270 RSSModule module = (RSSModule) namespaceURItoModule.get(aName.getNamespaceURI());
272 String prefix = (String) moduleToPrefix.get(module);
274 if (prefix!=null && prefix.length()>0)
275 result = prefix+":"+result;
281 private class RDFResourceSectionHandler extends mir.util.xml.AbstractSectionHandler {
282 private String image;
283 private mir.util.xml.XMLName currentTag;
284 private RDFResource resource;
286 public RDFResourceSectionHandler(String anRDFClass, String anIdentifier) {
287 resource = new RDFResource(anRDFClass, anIdentifier);
290 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
293 return makePropertyValueSectionHandler(aTag, anAttributes);
296 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
297 if (aHandler instanceof PropertyValueSectionHandler) {
298 usePropertyValueSectionHandler(resource, (PropertyValueSectionHandler) aHandler, currentTag);
299 // resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getFieldValue());
303 public void characters(String aCharacters) throws XMLParserExc {
304 if (aCharacters.trim().length()>0)
305 throw new XMLParserExc("No character data allowed here");
308 public void finishSection() throws XMLParserExc {
311 public RDFResource getResource() {
312 if ((resource.getIdentifier()==null || resource.getIdentifier().length()==0) && resource.get("rss:link")!=null) {
313 resource.setIdentifier(resource.get("rss:link").toString());
320 private abstract class PropertyValueSectionHandler extends mir.util.xml.AbstractSectionHandler {
321 public abstract Object getValue();
324 private class FlexiblePropertyValueSectionHandler extends PropertyValueSectionHandler {
325 private StringBuffer stringData;
326 private Object structuredData;
328 public FlexiblePropertyValueSectionHandler() {
329 stringData = new StringBuffer();
333 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
334 if (aTag.equals(RDF_SEQUENCE_TAG))
335 return new RDFSequenceSectionHandler();
337 return new DiscardingSectionHandler();
340 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
341 if (aHandler instanceof RDFSequenceSectionHandler) {
342 structuredData= ((RDFSequenceSectionHandler) aHandler).getItems();
346 public void characters(String aCharacters) throws XMLParserExc {
347 stringData.append(aCharacters);
350 public void finishSection() throws XMLParserExc {
353 public String getData() {
354 return stringData.toString();
357 public Object getValue() {
358 if (structuredData==null)
359 return stringData.toString();
360 return structuredData;
364 private class RDFCollectionSectionHandler extends PropertyValueSectionHandler {
367 public RDFCollectionSectionHandler() {
368 items = new ArrayList();
371 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
372 if (aTag.equals(RDF_SEQUENCE_TAG))
373 return new RDFSequenceSectionHandler();
375 return new DiscardingSectionHandler();
378 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
379 if (aHandler instanceof RDFSequenceSectionHandler) {
380 items.addAll(((RDFSequenceSectionHandler) aHandler).getItems());
384 public void characters(String aCharacters) throws XMLParserExc {
385 if (aCharacters.trim().length()>0)
386 throw new XMLParserExc("No character data allowed here");
389 public void finishSection() throws XMLParserExc {
392 public List getItems() {
396 public Object getValue() {
401 private class PCDATASectionHandler extends PropertyValueSectionHandler {
402 private StringBuffer data;
404 public PCDATASectionHandler() {
405 data = new StringBuffer();
408 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
409 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
412 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
415 public void characters(String aCharacters) throws XMLParserExc {
416 data.append(aCharacters);
419 public void finishSection() throws XMLParserExc {
422 public String getData() {
423 return data.toString();
426 public Object getValue() {
427 return data.toString();
431 private class DateSectionHandler extends PropertyValueSectionHandler {
432 private StringBuffer data;
434 public DateSectionHandler() {
435 data = new StringBuffer();
438 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
439 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
442 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
445 public void characters(String aCharacters) throws XMLParserExc {
446 data.append(aCharacters);
449 public void finishSection() throws XMLParserExc {
452 public Object getValue() {
454 String expression = data.toString().trim();
456 return DateTimeRoutines.parseW3CDTFString(expression);
458 catch (Throwable t) {
466 private class RDFSequenceSectionHandler extends mir.util.xml.AbstractSectionHandler {
469 public RDFSequenceSectionHandler() {
470 items = new ArrayList();
473 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
474 if (aTag.equals("rdf:li")) {
475 String item = (String) anAttributes.get("rdf:resource");
481 return new DiscardingSectionHandler();
484 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
487 public void characters(String aCharacters) throws XMLParserExc {
490 public void finishSection() throws XMLParserExc {
493 public List getItems() {
498 private class RDFLiteralSectionHandler extends PropertyValueSectionHandler {
499 private StringBuffer data;
502 public RDFLiteralSectionHandler() {
503 data = new StringBuffer();
506 protected StringBuffer getData() {
510 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
512 data.append("<"+tag+">");
514 return new RDFLiteralSectionHandler();
517 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
518 data.append(((RDFLiteralSectionHandler) aHandler).getData());
519 data.append("</"+tag+">");
522 public void characters(String aCharacters) throws XMLParserExc {
523 data.append(aCharacters);
526 public void finishSection() throws XMLParserExc {
529 public Object getValue() {
530 return data.toString();
534 private class DiscardingSectionHandler extends mir.util.xml.AbstractSectionHandler {
535 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
539 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
542 public void characters(String aCharacters) throws XMLParserExc {
545 public void finishSection() throws XMLParserExc {