2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
32 import java.io.InputStream;
34 import java.util.List;
36 import java.util.Vector;
39 import mir.util.XMLReader;
45 * <p>Description: </p>
46 * <p>Copyright: Copyright (c) 2003</p>
48 * @author not attributable
52 public class RSSReader {
53 public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
54 public static final String RSS_1_0_NAMESPACE_URI = "http://purl.org/rss/1.0/";
55 public static final String RSS_0_9_NAMESPACE_URI = "http://my.netscape.com/rdf/simple/0.9/";
56 public static final String DUBLINCORE_NAMESPACE_URI = "http://purl.org/dc/elements/1.1/";
57 public static final String EVENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/event/";
58 public static final String TAXONOMY_NAMESPACE_URI = "http://web.resource.org/rss/1.0/modules/taxonomy/";
59 public static final String DUBLINCORE_TERMS_NAMESPACE_URI = "http://purl.org/dc/terms/";
62 // ML: to be localized:
63 public static final String V2V_NAMESPACE_URI = "http://v2v.indymedia.de/rss/";
65 private static final XMLReader.XMLName RDF_ABOUT_PARAMETER = new XMLReader.XMLName(RDF_NAMESPACE_URI, "about");
66 private static final XMLReader.XMLName RDF_SEQUENCE_TAG = new XMLReader.XMLName(RDF_NAMESPACE_URI, "Seq");
67 private static final XMLReader.XMLName RDF_BAG_PARAMETER = new XMLReader.XMLName(RDF_NAMESPACE_URI, "Bag");
69 private static final XMLReader.XMLName RSS_CHANNEL_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "channel");
70 private static final XMLReader.XMLName RSS_ITEM_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "item");
71 private static final XMLReader.XMLName RSS_ITEMS_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "items");
74 private Map namespaceURItoModule;
75 private Map moduleToPrefix;
78 modules = new Vector();
79 namespaceURItoModule = new HashMap();
80 moduleToPrefix = new HashMap();
82 registerModule(new RSSBasicModule(RDF_NAMESPACE_URI, "RDF module"), "rdf");
83 registerModule(new RSSBasicModule(RSS_1_0_NAMESPACE_URI, "RSS 1.0 module"), "rss");
84 registerModule(new RSSBasicModule(RSS_0_9_NAMESPACE_URI, "RSS 0.9 module"), "rss");
86 RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "RSS Dublin Core 1.1");
87 dcModule.addProperty("date", RSSModule.W3CDTF_PROPERTY_TYPE);
88 registerModule(dcModule, "dc");
90 RSSBasicModule dcTermsModule = new RSSBasicModule(DUBLINCORE_TERMS_NAMESPACE_URI, "RSS Qualified Dublin core");
91 dcTermsModule.addProperty("created", RSSModule.W3CDTF_PROPERTY_TYPE);
92 dcTermsModule.addProperty("issued", RSSModule.W3CDTF_PROPERTY_TYPE);
93 dcTermsModule.addProperty("modified", RSSModule.W3CDTF_PROPERTY_TYPE);
94 dcTermsModule.addProperty("dateAccepted", RSSModule.W3CDTF_PROPERTY_TYPE);
95 dcTermsModule.addProperty("dateCopyrighted", RSSModule.W3CDTF_PROPERTY_TYPE);
96 dcTermsModule.addProperty("dateSubmitted", RSSModule.W3CDTF_PROPERTY_TYPE);
97 registerModule(dcTermsModule, "dcterms");
99 RSSBasicModule v2vTermsModule = new RSSBasicModule(V2V_NAMESPACE_URI, "indymedia v2v RSS module");
100 v2vTermsModule.addMultiValuedProperty("topic", RSSModule.PCDATA_PROPERTY_TYPE);
101 v2vTermsModule.addMultiValuedProperty("genre", RSSModule.PCDATA_PROPERTY_TYPE);
102 v2vTermsModule.addMultiValuedProperty("link", RSSModule.PCDATA_PROPERTY_TYPE);
103 registerModule(v2vTermsModule, "v2v");
105 registerModule(new RSSBasicModule(EVENT_NAMESPACE_URI, "Event RSS module"), "ev");
106 registerModule(new RSSBasicModule(TAXONOMY_NAMESPACE_URI, "Taxonomy RSS module"), "taxo");
109 public void registerModule(RSSModule aModule, String aPrefix) {
110 modules.add(aModule);
111 namespaceURItoModule.put(aModule.getNamespaceURI(), aModule);
112 moduleToPrefix.put(aModule, aPrefix);
115 public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure {
117 XMLReader xmlReader = new XMLReader(true);
118 RSSData result = new RSSData();
119 xmlReader.parseInputStream(aStream, new RootSectionHandler(result));
123 catch (Throwable t) {
124 throw new RSSFailure(t);
128 public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure {
130 InputStream inputStream = (InputStream) new URL(anUrl).getContent(new Class[] {InputStream.class});
132 if (inputStream==null)
133 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
135 return parseInputStream(inputStream);
137 catch (Throwable t) {
138 throw new RSSFailure(t);
142 private class RootSectionHandler extends XMLReader.AbstractSectionHandler {
143 private RSSData data;
145 public RootSectionHandler(RSSData aData) {
149 public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
150 if (aTag.getLocalName().equals("RDF")) {
151 return new RDFSectionHandler(data);
154 throw new XMLReader.XMLReaderFailure(new RSSExc("'RDF' tag expected"));
157 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
160 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
161 if (aCharacters.trim().length()>0)
162 throw new XMLReader.XMLReaderExc("No character data allowed here");
165 public void finishSection() throws XMLReader.XMLReaderExc {
169 private class RDFSectionHandler extends XMLReader.AbstractSectionHandler {
170 private RSSData data;
173 public RDFSectionHandler(RSSData aData) {
177 public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
178 String identifier = (String) anAttributes.get(RDF_ABOUT_PARAMETER);
179 String rdfClass = makeQualifiedName(aTag);
181 return new RDFResourceSectionHandler(rdfClass, identifier);
184 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
185 if (aHandler instanceof RDFResourceSectionHandler) {
186 data.addResource(((RDFResourceSectionHandler) aHandler).getResource());
190 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
191 if (aCharacters.trim().length()>0)
192 throw new XMLReader.XMLReaderExc("No character data allowed here");
195 public void finishSection() throws XMLReader.XMLReaderExc {
199 private XMLReader.SectionHandler makePropertyValueSectionHandler(XMLReader.XMLName aTag, Map anAttributes) {
200 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
203 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
205 if (property!=null) {
206 switch (property.getType()) {
208 RSSModule.PCDATA_PROPERTY_TYPE:
209 return new PCDATASectionHandler();
211 RSSModule.RDFCOLLECTION_PROPERTY_TYPE:
212 return new RDFCollectionSectionHandler();
214 // RSSModule.RDF_PROPERTY_TYPE:
215 // return new RDFValueSectionHandler();
217 RSSModule.W3CDTF_PROPERTY_TYPE:
218 return new DateSectionHandler();
224 return new FlexiblePropertyValueSectionHandler();
227 private void usePropertyValueSectionHandler(RDFResource aResource, PropertyValueSectionHandler aHandler, XMLReader.XMLName aTag) {
228 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
231 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
233 if (property!=null && property.getIsMultiValued()) {
234 List value = (List) aResource.get(makeQualifiedName(aTag));
237 value = new Vector();
238 aResource.set(makeQualifiedName(aTag), value);
241 value.add(aHandler.getValue());
247 aResource.set(makeQualifiedName(aTag), aHandler.getValue());
250 private String makeQualifiedName(XMLReader.XMLName aName) {
251 String result=aName.getLocalName();
252 RSSModule module = (RSSModule) namespaceURItoModule.get(aName.getNamespaceURI());
254 String prefix = (String) moduleToPrefix.get(module);
256 if (prefix!=null && prefix.length()>0)
257 result = prefix+":"+result;
263 private class RDFResourceSectionHandler extends XMLReader.AbstractSectionHandler {
264 private String image;
265 private XMLReader.XMLName currentTag;
266 private RDFResource resource;
268 public RDFResourceSectionHandler(String anRDFClass, String anIdentifier) {
269 resource = new RDFResource(anRDFClass, anIdentifier);
272 public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
275 return makePropertyValueSectionHandler(aTag, anAttributes);
278 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
279 if (aHandler instanceof PropertyValueSectionHandler) {
280 usePropertyValueSectionHandler(resource, (PropertyValueSectionHandler) aHandler, currentTag);
281 // resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getValue());
285 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
286 if (aCharacters.trim().length()>0)
287 throw new XMLReader.XMLReaderExc("No character data allowed here");
290 public void finishSection() throws XMLReader.XMLReaderExc {
293 public RDFResource getResource() {
294 if (resource.getIdentifier()==null || resource.getIdentifier().length()==0) {
295 resource.setIdentifier(resource.get("rss:link").toString());
302 private abstract class PropertyValueSectionHandler extends XMLReader.AbstractSectionHandler {
303 public abstract Object getValue();
306 private class FlexiblePropertyValueSectionHandler extends PropertyValueSectionHandler {
307 private StringBuffer stringData;
308 private Object structuredData;
310 public FlexiblePropertyValueSectionHandler() {
311 stringData = new StringBuffer();
315 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
316 if (aTag.equals(RDF_SEQUENCE_TAG))
317 return new RDFSequenceSectionHandler();
319 return new DiscardingSectionHandler();
322 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
323 if (aHandler instanceof RDFSequenceSectionHandler) {
324 structuredData= ((RDFSequenceSectionHandler) aHandler).getItems();
328 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
329 stringData.append(aCharacters);
332 public void finishSection() throws XMLReader.XMLReaderExc {
335 public String getData() {
336 return stringData.toString();
339 public Object getValue() {
340 if (structuredData==null)
341 return stringData.toString();
343 return structuredData;
347 private class RDFCollectionSectionHandler extends PropertyValueSectionHandler {
350 public RDFCollectionSectionHandler() {
351 items = new Vector();
354 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
355 if (aTag.equals(RDF_SEQUENCE_TAG))
356 return new RDFSequenceSectionHandler();
358 return new DiscardingSectionHandler();
361 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
362 if (aHandler instanceof RDFSequenceSectionHandler) {
363 items.addAll(((RDFSequenceSectionHandler) aHandler).getItems());
367 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
368 if (aCharacters.trim().length()>0)
369 throw new XMLReader.XMLReaderExc("No character data allowed here");
372 public void finishSection() throws XMLReader.XMLReaderExc {
375 public List getItems() {
379 public Object getValue() {
384 private class PCDATASectionHandler extends PropertyValueSectionHandler {
385 private StringBuffer data;
387 public PCDATASectionHandler() {
388 data = new StringBuffer();
391 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
392 throw new XMLReader.XMLReaderFailure(new RSSExc("No subtags allowed here"));
395 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
398 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
399 data.append(aCharacters);
402 public void finishSection() throws XMLReader.XMLReaderExc {
405 public String getData() {
406 return data.toString();
409 public Object getValue() {
410 return data.toString();
414 private class DateSectionHandler extends PropertyValueSectionHandler {
415 private StringBuffer data;
417 public DateSectionHandler() {
418 data = new StringBuffer();
421 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
422 throw new XMLReader.XMLReaderFailure(new RSSExc("No subtags allowed here"));
425 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
428 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
429 data.append(aCharacters);
432 public void finishSection() throws XMLReader.XMLReaderExc {
435 private final static String SPACE = "[\t\n\r ]*";
436 private final static String NUMBER = "[0-9]*";
437 private final static String SIGN = "[-+]";
439 public Object getValue() {
441 String expression = data.toString().trim();
443 return DateTimeFunctions.parseW3CDTFString(expression);
445 catch (Throwable t) {
453 private class RDFSequenceSectionHandler extends XMLReader.AbstractSectionHandler {
456 public RDFSequenceSectionHandler() {
457 items = new Vector();
460 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
461 if (aTag.equals("rdf:li")) {
462 String item = (String) anAttributes.get("rdf:resource");
468 return new DiscardingSectionHandler();
471 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
474 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
477 public void finishSection() throws XMLReader.XMLReaderExc {
480 public List getItems() {
485 private class RDFLiteralSectionHandler extends PropertyValueSectionHandler {
486 private StringBuffer data;
489 public RDFLiteralSectionHandler() {
490 data = new StringBuffer();
493 protected StringBuffer getData() {
497 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
499 data.append("<"+tag+">");
501 return new RDFLiteralSectionHandler();
504 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
505 data.append(((RDFLiteralSectionHandler) aHandler).getData());
506 data.append("</"+tag+">");
509 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
510 data.append(aCharacters);
513 public void finishSection() throws XMLReader.XMLReaderExc {
516 public Object getValue() {
517 return data.toString();
521 private class DiscardingSectionHandler extends XMLReader.AbstractSectionHandler {
522 public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc {
526 public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc {
529 public void characters(String aCharacters) throws XMLReader.XMLReaderExc {
532 public void finishSection() throws XMLReader.XMLReaderExc {