2 * Copyright (C) 2001, 2002 The Mir-coders group
4 * This file is part of Mir.
6 * Mir is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * Mir is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with Mir; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 * In addition, as a special exception, The Mir-coders gives permission to link
21 * the code of this program with any library licensed under the Apache Software License,
22 * The Sun (tm) Java Advanced Imaging library (JAI), The Sun JIMI library
23 * (or with modified versions of the above that use the same license as the above),
24 * and distribute linked combinations including the two. You must obey the
25 * GNU General Public License in all respects for all of the code used other than
26 * the above mentioned libraries. If you modify this file, you may extend this
27 * exception to your version of the file, but you are not obligated to do so.
28 * If you do not wish to do so, delete this exception statement from your version.
32 import java.io.InputStream;
33 import java.util.ArrayList;
34 import java.util.HashMap;
35 import java.util.List;
38 import mir.util.DateTimeFunctions;
39 import mir.util.HTTPClientHelper;
40 import mir.util.xml.XMLParserEngine;
41 import mir.util.xml.XMLParserExc;
42 import mir.util.xml.XMLParserFailure;
47 * <p>Description: </p>
48 * <p>Copyright: Copyright (c) 2003</p>
50 * @author not attributable
54 public class RSSReader {
55 public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
56 public static final String RSS_1_0_NAMESPACE_URI = "http://purl.org/rss/1.0/";
57 public static final String RSS_0_9_NAMESPACE_URI = "http://my.netscape.com/rdf/simple/0.9/";
58 public static final String DUBLINCORE_NAMESPACE_URI = "http://purl.org/dc/elements/1.1/";
59 public static final String EVENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/event/";
60 public static final String TAXONOMY_NAMESPACE_URI = "http://web.resource.org/rss/1.0/modules/taxonomy/";
61 public static final String DUBLINCORE_TERMS_NAMESPACE_URI = "http://purl.org/dc/terms/";
62 public static final String CONTENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/content/";
64 // ML: to be localized:
65 public static final String V2V_NAMESPACE_URI = "http://v2v.cc/rss/";
67 private static final mir.util.xml.XMLName RDF_ABOUT_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "about");
68 private static final mir.util.xml.XMLName RDF_SEQUENCE_TAG = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Seq");
69 private static final mir.util.xml.XMLName RDF_BAG_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Bag");
71 private static final mir.util.xml.XMLName RSS_CHANNEL_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "channel");
72 private static final mir.util.xml.XMLName RSS_ITEM_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "item");
73 private static final mir.util.xml.XMLName RSS_ITEMS_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "items");
76 private Map namespaceURItoModule;
77 private Map moduleToPrefix;
80 modules = new ArrayList();
81 namespaceURItoModule = new HashMap();
82 moduleToPrefix = new HashMap();
84 registerModule(new RSSBasicModule(RDF_NAMESPACE_URI, "RDF module"), "rdf");
85 registerModule(new RSSBasicModule(RSS_1_0_NAMESPACE_URI, "RSS 1.0 module"), "rss");
86 registerModule(new RSSBasicModule(RSS_0_9_NAMESPACE_URI, "RSS 0.9 module"), "rss");
88 RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "RSS Dublin Core 1.1");
89 dcModule.addProperty("date", RSSModule.W3CDTF_PROPERTY_TYPE);
90 registerModule(dcModule, "dc");
92 RSSBasicModule dcTermsModule = new RSSBasicModule(DUBLINCORE_TERMS_NAMESPACE_URI, "RSS Qualified Dublin core");
93 dcTermsModule.addProperty("created", RSSModule.W3CDTF_PROPERTY_TYPE);
94 dcTermsModule.addProperty("issued", RSSModule.W3CDTF_PROPERTY_TYPE);
95 dcTermsModule.addProperty("modified", RSSModule.W3CDTF_PROPERTY_TYPE);
96 dcTermsModule.addProperty("dateAccepted", RSSModule.W3CDTF_PROPERTY_TYPE);
97 dcTermsModule.addProperty("dateCopyrighted", RSSModule.W3CDTF_PROPERTY_TYPE);
98 dcTermsModule.addProperty("dateSubmitted", RSSModule.W3CDTF_PROPERTY_TYPE);
99 registerModule(dcTermsModule, "dcterms");
101 RSSBasicModule v2vTermsModule = new RSSBasicModule(V2V_NAMESPACE_URI, "indymedia v2v RSS module");
102 v2vTermsModule.addMultiValuedProperty("topic", RSSModule.PCDATA_PROPERTY_TYPE);
103 v2vTermsModule.addMultiValuedProperty("genre", RSSModule.PCDATA_PROPERTY_TYPE);
104 v2vTermsModule.addMultiValuedProperty("link", RSSModule.PCDATA_PROPERTY_TYPE);
105 registerModule(v2vTermsModule, "v2v");
107 registerModule(new RSSBasicModule(EVENT_NAMESPACE_URI, "Event RSS module"), "ev");
108 registerModule(new RSSBasicModule(TAXONOMY_NAMESPACE_URI, "Taxonomy RSS module"), "taxo");
109 registerModule(new RSSBasicModule(CONTENT_NAMESPACE_URI , "Content RSS module"), "content");
112 public void registerModule(RSSModule aModule, String aPrefix) {
113 modules.add(aModule);
114 namespaceURItoModule.put(aModule.getNamespaceURI(), aModule);
115 moduleToPrefix.put(aModule, aPrefix);
118 public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure {
120 RSSData result = new RSSData();
121 XMLParserEngine.getInstance().parse("xml", aStream, new RootSectionHandler(result));
125 catch (Throwable t) {
126 throw new RSSFailure(t);
130 public RSSData parseInputStream(InputStream aStream, String anEncoding) throws RSSExc, RSSFailure {
132 RSSData result = new RSSData();
133 XMLParserEngine.getInstance().parse("xml", aStream, anEncoding, new RootSectionHandler(result));
137 catch (Throwable t) {
138 throw new RSSFailure(t);
142 public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure {
144 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
145 InputStream inputStream = httpClientHelper.getUrl(anUrl);
146 if (inputStream==null)
147 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
149 RSSData theRSSData = parseInputStream(inputStream);
150 httpClientHelper.releaseHTTPConnection();
153 catch (Throwable t) {
154 throw new RSSFailure(t);
158 public RSSData parseUrl(String anUrl, String anEncoding) throws RSSExc, RSSFailure {
160 HTTPClientHelper httpClientHelper = new HTTPClientHelper();
161 InputStream inputStream = httpClientHelper.getUrl(anUrl);
162 if (inputStream==null)
163 throw new RSSExc("RSSChannel.parseUrl: Can't get url content");
165 RSSData theRSSData = parseInputStream(inputStream, anEncoding);
166 httpClientHelper.releaseHTTPConnection();
169 catch (Throwable t) {
170 throw new RSSFailure(t);
174 private class RootSectionHandler extends mir.util.xml.AbstractSectionHandler {
175 private RSSData data;
177 public RootSectionHandler(RSSData aData) {
181 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
182 if (aTag.getLocalName().equals("RDF")) {
183 return new RDFSectionHandler(data);
186 throw new XMLParserFailure(new RSSExc("'RDF' tag expected"));
189 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
192 public void characters(String aCharacters) throws XMLParserExc {
193 if (aCharacters.trim().length()>0)
194 throw new XMLParserExc("No character data allowed here");
197 public void finishSection() throws XMLParserExc {
201 private class RDFSectionHandler extends mir.util.xml.AbstractSectionHandler {
202 private RSSData data;
205 public RDFSectionHandler(RSSData aData) {
209 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
210 String identifier = (String) anAttributes.get(RDF_ABOUT_PARAMETER);
211 String rdfClass = makeQualifiedName(aTag);
213 return new RDFResourceSectionHandler(rdfClass, identifier);
216 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
217 if (aHandler instanceof RDFResourceSectionHandler) {
218 data.addResource(((RDFResourceSectionHandler) aHandler).getResource());
222 public void characters(String aCharacters) throws XMLParserExc {
223 if (aCharacters.trim().length()>0)
224 throw new XMLParserExc("No character data allowed here");
227 public void finishSection() throws XMLParserExc {
231 private mir.util.xml.SectionHandler makePropertyValueSectionHandler(mir.util.xml.XMLName aTag, Map anAttributes) {
232 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
235 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
237 if (property!=null) {
238 switch (property.getType()) {
240 RSSModule.PCDATA_PROPERTY_TYPE:
241 return new PCDATASectionHandler();
243 RSSModule.RDFCOLLECTION_PROPERTY_TYPE:
244 return new RDFCollectionSectionHandler();
246 // RSSModule.RDF_PROPERTY_TYPE:
247 // return new RDFValueSectionHandler();
249 RSSModule.W3CDTF_PROPERTY_TYPE:
250 return new DateSectionHandler();
255 return new FlexiblePropertyValueSectionHandler();
258 private void usePropertyValueSectionHandler(RDFResource aResource, PropertyValueSectionHandler aHandler, mir.util.xml.XMLName aTag) {
259 RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI());
262 RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName());
264 if (property!=null && property.getIsMultiValued()) {
265 List value = (List) aResource.get(makeQualifiedName(aTag));
268 value = new ArrayList();
269 aResource.set(makeQualifiedName(aTag), value);
272 value.add(aHandler.getValue());
278 aResource.set(makeQualifiedName(aTag), aHandler.getValue());
281 private String makeQualifiedName(mir.util.xml.XMLName aName) {
282 String result=aName.getLocalName();
283 RSSModule module = (RSSModule) namespaceURItoModule.get(aName.getNamespaceURI());
285 String prefix = (String) moduleToPrefix.get(module);
287 if (prefix!=null && prefix.length()>0)
288 result = prefix+":"+result;
294 private class RDFResourceSectionHandler extends mir.util.xml.AbstractSectionHandler {
295 private String image;
296 private mir.util.xml.XMLName currentTag;
297 private RDFResource resource;
299 public RDFResourceSectionHandler(String anRDFClass, String anIdentifier) {
300 resource = new RDFResource(anRDFClass, anIdentifier);
303 public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc {
306 return makePropertyValueSectionHandler(aTag, anAttributes);
309 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
310 if (aHandler instanceof PropertyValueSectionHandler) {
311 usePropertyValueSectionHandler(resource, (PropertyValueSectionHandler) aHandler, currentTag);
312 // resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getFieldValue());
316 public void characters(String aCharacters) throws XMLParserExc {
317 if (aCharacters.trim().length()>0)
318 throw new XMLParserExc("No character data allowed here");
321 public void finishSection() throws XMLParserExc {
324 public RDFResource getResource() {
325 if ((resource.getIdentifier()==null || resource.getIdentifier().length()==0) && resource.get("rss:link")!=null) {
326 resource.setIdentifier(resource.get("rss:link").toString());
333 private abstract class PropertyValueSectionHandler extends mir.util.xml.AbstractSectionHandler {
334 public abstract Object getValue();
337 private class FlexiblePropertyValueSectionHandler extends PropertyValueSectionHandler {
338 private StringBuffer stringData;
339 private Object structuredData;
341 public FlexiblePropertyValueSectionHandler() {
342 stringData = new StringBuffer();
346 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
347 if (aTag.equals(RDF_SEQUENCE_TAG))
348 return new RDFSequenceSectionHandler();
350 return new DiscardingSectionHandler();
353 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
354 if (aHandler instanceof RDFSequenceSectionHandler) {
355 structuredData= ((RDFSequenceSectionHandler) aHandler).getItems();
359 public void characters(String aCharacters) throws XMLParserExc {
360 stringData.append(aCharacters);
363 public void finishSection() throws XMLParserExc {
366 public String getData() {
367 return stringData.toString();
370 public Object getValue() {
371 if (structuredData==null)
372 return stringData.toString();
374 return structuredData;
378 private class RDFCollectionSectionHandler extends PropertyValueSectionHandler {
381 public RDFCollectionSectionHandler() {
382 items = new ArrayList();
385 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
386 if (aTag.equals(RDF_SEQUENCE_TAG))
387 return new RDFSequenceSectionHandler();
389 return new DiscardingSectionHandler();
392 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
393 if (aHandler instanceof RDFSequenceSectionHandler) {
394 items.addAll(((RDFSequenceSectionHandler) aHandler).getItems());
398 public void characters(String aCharacters) throws XMLParserExc {
399 if (aCharacters.trim().length()>0)
400 throw new XMLParserExc("No character data allowed here");
403 public void finishSection() throws XMLParserExc {
406 public List getItems() {
410 public Object getValue() {
415 private class PCDATASectionHandler extends PropertyValueSectionHandler {
416 private StringBuffer data;
418 public PCDATASectionHandler() {
419 data = new StringBuffer();
422 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
423 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
426 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
429 public void characters(String aCharacters) throws XMLParserExc {
430 data.append(aCharacters);
433 public void finishSection() throws XMLParserExc {
436 public String getData() {
437 return data.toString();
440 public Object getValue() {
441 return data.toString();
445 private class DateSectionHandler extends PropertyValueSectionHandler {
446 private StringBuffer data;
448 public DateSectionHandler() {
449 data = new StringBuffer();
452 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
453 throw new XMLParserFailure(new RSSExc("No subtags allowed here"));
456 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
459 public void characters(String aCharacters) throws XMLParserExc {
460 data.append(aCharacters);
463 public void finishSection() throws XMLParserExc {
466 public Object getValue() {
468 String expression = data.toString().trim();
470 return DateTimeFunctions.parseW3CDTFString(expression);
472 catch (Throwable t) {
480 private class RDFSequenceSectionHandler extends mir.util.xml.AbstractSectionHandler {
483 public RDFSequenceSectionHandler() {
484 items = new ArrayList();
487 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
488 if (aTag.equals("rdf:li")) {
489 String item = (String) anAttributes.get("rdf:resource");
495 return new DiscardingSectionHandler();
498 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
501 public void characters(String aCharacters) throws XMLParserExc {
504 public void finishSection() throws XMLParserExc {
507 public List getItems() {
512 private class RDFLiteralSectionHandler extends PropertyValueSectionHandler {
513 private StringBuffer data;
516 public RDFLiteralSectionHandler() {
517 data = new StringBuffer();
520 protected StringBuffer getData() {
524 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
526 data.append("<"+tag+">");
528 return new RDFLiteralSectionHandler();
531 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
532 data.append(((RDFLiteralSectionHandler) aHandler).getData());
533 data.append("</"+tag+">");
536 public void characters(String aCharacters) throws XMLParserExc {
537 data.append(aCharacters);
540 public void finishSection() throws XMLParserExc {
543 public Object getValue() {
544 return data.toString();
548 private class DiscardingSectionHandler extends mir.util.xml.AbstractSectionHandler {
549 public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc {
553 public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc {
556 public void characters(String aCharacters) throws XMLParserExc {
559 public void finishSection() throws XMLParserExc {