2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
32 import java.io.InputStream;
33 import java.util.ArrayList;
34 import java.util.HashMap;
35 import java.util.List;
38 import mir.util.DateTimeRoutines;
39 import mir.util.HTTPClientHelper;
40 import mir.util.xml.XMLParserEngine;
41 import mir.util.xml.XMLParserExc;
42 import mir.util.xml.XMLParserFailure;
44 public class RSSReader {
45 public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
46 public static final String RSS_1_0_NAMESPACE_URI = "http://purl.org/rss/1.0/";
47 public static final String RSS_0_9_NAMESPACE_URI = "http://my.netscape.com/rdf/simple/0.9/";
48 public static final String DUBLINCORE_NAMESPACE_URI = "http://purl.org/dc/elements/1.1/";
49 public static final String EVENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/event/";
50 public static final String TAXONOMY_NAMESPACE_URI = "http://web.resource.org/rss/1.0/modules/taxonomy/";
51 public static final String DUBLINCORE_TERMS_NAMESPACE_URI = "http://purl.org/dc/terms/";
52 public static final String CONTENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/content/";
54 // ML: to be localized:
55 public static final String V2V_NAMESPACE_URI = "http://v2v.cc/rss/";
57 private static final mir.util.xml.XMLName RDF_ABOUT_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "about");
58 private static final mir.util.xml.XMLName RDF_SEQUENCE_TAG = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Seq");
59 private static final mir.util.xml.XMLName RDF_BAG_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Bag");
61 private static final mir.util.xml.XMLName RSS_CHANNEL_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "channel");
62 private static final mir.util.xml.XMLName RSS_ITEM_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "item");
63 private static final mir.util.xml.XMLName RSS_ITEMS_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "items");
66 private Map namespaceURItoModule;
67 private Map moduleToPrefix;
70 modules = new ArrayList();
71 namespaceURItoModule = new HashMap();
72 moduleToPrefix = new HashMap();
74 registerModule(new RSSBasicModule(RDF_NAMESPACE_URI, "RDF module"), "rdf");
75 registerModule(new RSSBasicModule(RSS_1_0_NAMESPACE_URI, "RSS 1.0 module"), "rss");
76 registerModule(new RSSBasicModule(RSS_0_9_NAMESPACE_URI, "RSS 0.9 module"), "rss");
78 RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "RSS Dublin Core 1.1");
79 dcModule.addProperty("date", RSSModule.W3CDTF_PROPERTY_TYPE);
80 registerModule(dcModule, "dc");
82 RSSBasicModule dcTermsModule = new RSSBasicModule(DUBLINCORE_TERMS_NAMESPACE_URI, "RSS Qualified Dublin core");
83 dcTermsModule.addProperty("created", RSSModule.W3CDTF_PROPERTY_TYPE);
84 dcTermsModule.addProperty("issued", RSSModule.W3CDTF_PROPERTY_TYPE);
85 dcTermsModule.addProperty("modified", RSSModule.W3CDTF_PROPERTY_TYPE);
86 dcTermsModule.addProperty("dateAccepted", RSSModule.W3CDTF_PROPERTY_TYPE);
87 dcTermsModule.addProperty("dateCopyrighted", RSSModule.W3CDTF_PROPERTY_TYPE);
88 dcTermsModule.addProperty("dateSubmitted", RSSModule.W3CDTF_PROPERTY_TYPE);
89 registerModule(dcTermsModule, "dcterms");
91 RSSBasicModule v2vTermsModule = new RSSBasicModule(V2V_NAMESPACE_URI, "indymedia v2v RSS module");
92 v2vTermsModule.addMultiValuedProperty("topic", RSSModule.PCDATA_PROPERTY_TYPE);
93 v2vTermsModule.addMultiValuedProperty("genre", RSSModule.PCDATA_PROPERTY_TYPE);
94 v2vTermsModule.addMultiValuedProperty("link", RSSModule.PCDATA_PROPERTY_TYPE);
95 registerModule(v2vTermsModule, "v2v");
97 registerModule(new RSSBasicModule(EVENT_NAMESPACE_URI, "Event RSS module"), "ev");
98 registerModule(new RSSBasicModule(TAXONOMY_NAMESPACE_URI, "Taxonomy RSS module"), "taxo");
99 registerModule(new RSSBasicModule(CONTENT_NAMESPACE_URI , "Content RSS module"), "content");
102 public void registerModule(RSSModule aModule, String aPrefix) {
103 modules.add(aModule);
104 namespaceURItoModule.put(aModule.getNamespaceURI(), aModule);
105 moduleToPrefix.put(aModule, aPrefix);
108 public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure {
110 RSSData result = new RSSData();
111 XMLParserEngine.getInstance().parse("xml", aStream, new RootSectionHandler(result));
115 catch (Throwable t) {
116 throw new RSSFailure(t);
120 public RSSData parseInputStream(InputStream aStream, String anEncoding) throws RSSExc, RSSFailure {
122 RSSData result = new RSSData();
123 XMLParserEngine.getInstance().parse("xml", aStream, anEncoding, new RootSectionHandler(result));
127 catch (Throwable t) {
128 throw new RSSFailure(t);
132 public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure {
134 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
135 InputStream inputStream = httpClientHelper.getUrl(anUrl);
136 if (inputStream==null)
137 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
139 RSSData theRSSData = parseInputStream(inputStream);
140 httpClientHelper.releaseHTTPConnection();
143 catch (Throwable t) {
144 throw new RSSFailure(t);
148 public RSSData parseUrl(String anUrl, String anEncoding) throws RSSExc, RSSFailure {
150 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
151 InputStream inputStream = httpClientHelper.getUrl(anUrl);
152 if (inputStream==null)
153 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
155 RSSData theRSSData = parseInputStream(inputStream, anEncoding);
156 httpClientHelper.releaseHTTPConnection();
159 catch (Throwable t) {
160 throw new RSSFailure(t);
164 private class RootSectionHandler extends mir.util.xml.AbstractSectionHandler {
165 private RSSData data;
167 public RootSectionHandler(RSSData aData) {
171 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
172 if (aTag.getLocalName().equals("RDF")) {
173 return new RDFSectionHandler(data);
175 throw new XMLParserFailure(new RSSExc("'RDF' tag expected"));
178 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
181 public void characters(String aCharacters) throws XMLParserExc {
182 if (aCharacters.trim().length()>0)
183 throw new XMLParserExc("No character data allowed here");
186 public void finishSection() throws XMLParserExc {
190 private class RDFSectionHandler extends mir.util.xml.AbstractSectionHandler {
191 private RSSData data;
194 public RDFSectionHandler(RSSData aData) {
198 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
199 String identifier = (String) anAttributes.get(RDF_ABOUT_PARAMETER);
200 String rdfClass = makeQualifiedName(aTag);
202 return new RDFResourceSectionHandler(rdfClass, identifier);
205 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
206 if (aHandler instanceof RDFResourceSectionHandler) {
207 data.addResource(((RDFResourceSectionHandler) aHandler).getResource());
211 public void characters(String aCharacters) throws XMLParserExc {
212 if (aCharacters.trim().length()>0)
213 throw new XMLParserExc("No character data allowed here");
216 public void finishSection() throws XMLParserExc {
220 private mir.util.xml.SectionHandler makePropertyValueSectionHandler(mir.util.xml.XMLName aTag, Map anAttributes) {
221 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
224 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
226 if (property!=null) {
227 switch (property.getType()) {
229 RSSModule.PCDATA_PROPERTY_TYPE:
230 return new PCDATASectionHandler();
232 RSSModule.RDFCOLLECTION_PROPERTY_TYPE:
233 return new RDFCollectionSectionHandler();
235 // RSSModule.RDF_PROPERTY_TYPE:
236 // return new RDFValueSectionHandler();
238 RSSModule.W3CDTF_PROPERTY_TYPE:
239 return new DateSectionHandler();
244 return new FlexiblePropertyValueSectionHandler();
247 private void usePropertyValueSectionHandler(RDFResource aResource, PropertyValueSectionHandler aHandler, mir.util.xml.XMLName aTag) {
248 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
251 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
253 if (property!=null && property.getIsMultiValued()) {
254 List value = (List) aResource.get(makeQualifiedName(aTag));
257 value = new ArrayList();
258 aResource.set(makeQualifiedName(aTag), value);
261 value.add(aHandler.getValue());
267 aResource.set(makeQualifiedName(aTag), aHandler.getValue());
270 private String makeQualifiedName(mir.util.xml.XMLName aName) {
271 String result=aName.getLocalName();
272 RSSModule module = (RSSModule) namespaceURItoModule.get(aName.getNamespaceURI());
274 String prefix = (String) moduleToPrefix.get(module);
276 if (prefix!=null && prefix.length()>0)
277 result = prefix+":"+result;
283 private class RDFResourceSectionHandler extends mir.util.xml.AbstractSectionHandler {
284 private String image;
285 private mir.util.xml.XMLName currentTag;
286 private RDFResource resource;
288 public RDFResourceSectionHandler(String anRDFClass, String anIdentifier) {
289 resource = new RDFResource(anRDFClass, anIdentifier);
292 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
295 return makePropertyValueSectionHandler(aTag, anAttributes);
298 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
299 if (aHandler instanceof PropertyValueSectionHandler) {
300 usePropertyValueSectionHandler(resource, (PropertyValueSectionHandler) aHandler, currentTag);
301 // resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getFieldValue());
305 public void characters(String aCharacters) throws XMLParserExc {
306 if (aCharacters.trim().length()>0)
307 throw new XMLParserExc("No character data allowed here");
310 public void finishSection() throws XMLParserExc {
313 public RDFResource getResource() {
314 if ((resource.getIdentifier()==null || resource.getIdentifier().length()==0) && resource.get("rss:link")!=null) {
315 resource.setIdentifier(resource.get("rss:link").toString());
322 private abstract class PropertyValueSectionHandler extends mir.util.xml.AbstractSectionHandler {
323 public abstract Object getValue();
326 private class FlexiblePropertyValueSectionHandler extends PropertyValueSectionHandler {
327 private StringBuffer stringData;
328 private Object structuredData;
330 public FlexiblePropertyValueSectionHandler() {
331 stringData = new StringBuffer();
335 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
336 if (aTag.equals(RDF_SEQUENCE_TAG))
337 return new RDFSequenceSectionHandler();
339 return new DiscardingSectionHandler();
342 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
343 if (aHandler instanceof RDFSequenceSectionHandler) {
344 structuredData= ((RDFSequenceSectionHandler) aHandler).getItems();
348 public void characters(String aCharacters) throws XMLParserExc {
349 stringData.append(aCharacters);
352 public void finishSection() throws XMLParserExc {
355 public String getData() {
356 return stringData.toString();
359 public Object getValue() {
360 if (structuredData==null)
361 return stringData.toString();
362 return structuredData;
366 private class RDFCollectionSectionHandler extends PropertyValueSectionHandler {
369 public RDFCollectionSectionHandler() {
370 items = new ArrayList();
373 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
374 if (aTag.equals(RDF_SEQUENCE_TAG))
375 return new RDFSequenceSectionHandler();
377 return new DiscardingSectionHandler();
380 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
381 if (aHandler instanceof RDFSequenceSectionHandler) {
382 items.addAll(((RDFSequenceSectionHandler) aHandler).getItems());
386 public void characters(String aCharacters) throws XMLParserExc {
387 if (aCharacters.trim().length()>0)
388 throw new XMLParserExc("No character data allowed here");
391 public void finishSection() throws XMLParserExc {
394 public List getItems() {
398 public Object getValue() {
403 private class PCDATASectionHandler extends PropertyValueSectionHandler {
404 private StringBuffer data;
406 public PCDATASectionHandler() {
407 data = new StringBuffer();
410 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
411 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
414 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
417 public void characters(String aCharacters) throws XMLParserExc {
418 data.append(aCharacters);
421 public void finishSection() throws XMLParserExc {
424 public String getData() {
425 return data.toString();
428 public Object getValue() {
429 return data.toString();
433 private class DateSectionHandler extends PropertyValueSectionHandler {
434 private StringBuffer data;
436 public DateSectionHandler() {
437 data = new StringBuffer();
440 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
441 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
444 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
447 public void characters(String aCharacters) throws XMLParserExc {
448 data.append(aCharacters);
451 public void finishSection() throws XMLParserExc {
454 public Object getValue() {
456 String expression = data.toString().trim();
458 return DateTimeRoutines.parseW3CDTFString(expression);
460 catch (Throwable t) {
468 private class RDFSequenceSectionHandler extends mir.util.xml.AbstractSectionHandler {
471 public RDFSequenceSectionHandler() {
472 items = new ArrayList();
475 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
476 if (aTag.equals("rdf:li")) {
477 String item = (String) anAttributes.get("rdf:resource");
483 return new DiscardingSectionHandler();
486 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
489 public void characters(String aCharacters) throws XMLParserExc {
492 public void finishSection() throws XMLParserExc {
495 public List getItems() {
500 private class RDFLiteralSectionHandler extends PropertyValueSectionHandler {
501 private StringBuffer data;
504 public RDFLiteralSectionHandler() {
505 data = new StringBuffer();
508 protected StringBuffer getData() {
512 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
514 data.append("<"+tag+">");
516 return new RDFLiteralSectionHandler();
519 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
520 data.append(((RDFLiteralSectionHandler) aHandler).getData());
521 data.append("</"+tag+">");
524 public void characters(String aCharacters) throws XMLParserExc {
525 data.append(aCharacters);
528 public void finishSection() throws XMLParserExc {
531 public Object getValue() {
532 return data.toString();
536 private class DiscardingSectionHandler extends mir.util.xml.AbstractSectionHandler {
537 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
541 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
544 public void characters(String aCharacters) throws XMLParserExc {
547 public void finishSection() throws XMLParserExc {