X-Git-Url: http://erislabs.net/gitweb/?a=blobdiff_plain;f=source%2Fmir%2Frss%2FRSSReader.java;h=6d89d49c22b768590033e996e34fb57cea6715cd;hb=63e0ee1fb8038eb6d8f0190cf38c3b3ab2727216;hp=61541fc3f3deb4acff016c345d8057210770c6d3;hpb=2721d94388d43e9fe053e6f616e21faa4fc640d9;p=mir.git diff --git a/source/mir/rss/RSSReader.java b/source/mir/rss/RSSReader.java index 61541fc3..6d89d49c 100755 --- a/source/mir/rss/RSSReader.java +++ b/source/mir/rss/RSSReader.java @@ -30,14 +30,16 @@ package mir.rss; import java.io.InputStream; -import java.net.URL; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; -import java.util.*; -import java.util.Vector; -import java.text.*; +import java.util.Map; -import mir.util.XMLReader; -import mir.util.*; +import mir.util.DateTimeFunctions; +import mir.util.HTTPClientHelper; +import mir.util.xml.XMLParserEngine; +import mir.util.xml.XMLParserExc; +import mir.util.xml.XMLParserFailure; /** * @@ -52,37 +54,59 @@ import mir.util.*; public class RSSReader { public static final String RDF_NAMESPACE_URI = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; public static final String RSS_1_0_NAMESPACE_URI = "http://purl.org/rss/1.0/"; + public static final String RSS_0_9_NAMESPACE_URI = "http://my.netscape.com/rdf/simple/0.9/"; public static final String DUBLINCORE_NAMESPACE_URI = "http://purl.org/dc/elements/1.1/"; public static final String EVENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/event/"; public static final String TAXONOMY_NAMESPACE_URI = "http://web.resource.org/rss/1.0/modules/taxonomy/"; + public static final String DUBLINCORE_TERMS_NAMESPACE_URI = "http://purl.org/dc/terms/"; + public static final String CONTENT_NAMESPACE_URI = "http://purl.org/rss/1.0/modules/content/"; - private static final XMLReader.XMLName RDF_ABOUT_PARAMETER = new XMLReader.XMLName(RDF_NAMESPACE_URI, "about"); - private static final XMLReader.XMLName RDF_SEQUENCE_TAG = new XMLReader.XMLName(RDF_NAMESPACE_URI, "Seq"); - private static final XMLReader.XMLName RDF_BAG_PARAMETER = new XMLReader.XMLName(RDF_NAMESPACE_URI, "Bag"); + // ML: to be localized: + public static final String V2V_NAMESPACE_URI = "http://v2v.cc/rss/"; - private static final XMLReader.XMLName RSS_CHANNEL_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "channel"); - private static final XMLReader.XMLName RSS_ITEM_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "item"); - private static final XMLReader.XMLName RSS_ITEMS_TAG = new XMLReader.XMLName(RSS_1_0_NAMESPACE_URI, "items"); + private static final mir.util.xml.XMLName RDF_ABOUT_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "about"); + private static final mir.util.xml.XMLName RDF_SEQUENCE_TAG = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Seq"); + private static final mir.util.xml.XMLName RDF_BAG_PARAMETER = new mir.util.xml.XMLName(RDF_NAMESPACE_URI, "Bag"); + + private static final mir.util.xml.XMLName RSS_CHANNEL_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "channel"); + private static final mir.util.xml.XMLName RSS_ITEM_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "item"); + private static final mir.util.xml.XMLName RSS_ITEMS_TAG = new mir.util.xml.XMLName(RSS_1_0_NAMESPACE_URI, "items"); private List modules; private Map namespaceURItoModule; private Map moduleToPrefix; - private RSSModule rdfModule; public RSSReader() { - modules = new Vector(); + modules = new ArrayList(); namespaceURItoModule = new HashMap(); moduleToPrefix = new HashMap(); registerModule(new RSSBasicModule(RDF_NAMESPACE_URI, "RDF module"), "rdf"); registerModule(new RSSBasicModule(RSS_1_0_NAMESPACE_URI, "RSS 1.0 module"), "rss"); + registerModule(new RSSBasicModule(RSS_0_9_NAMESPACE_URI, "RSS 0.9 module"), "rss"); - RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "Dublin Core RSS module 1.1"); - dcModule.addProperty("date", dcModule.W3CDTF_PROPERTY_TYPE); + RSSBasicModule dcModule = new RSSBasicModule(DUBLINCORE_NAMESPACE_URI, "RSS Dublin Core 1.1"); + dcModule.addProperty("date", RSSModule.W3CDTF_PROPERTY_TYPE); registerModule(dcModule, "dc"); + RSSBasicModule dcTermsModule = new RSSBasicModule(DUBLINCORE_TERMS_NAMESPACE_URI, "RSS Qualified Dublin core"); + dcTermsModule.addProperty("created", RSSModule.W3CDTF_PROPERTY_TYPE); + dcTermsModule.addProperty("issued", RSSModule.W3CDTF_PROPERTY_TYPE); + dcTermsModule.addProperty("modified", RSSModule.W3CDTF_PROPERTY_TYPE); + dcTermsModule.addProperty("dateAccepted", RSSModule.W3CDTF_PROPERTY_TYPE); + dcTermsModule.addProperty("dateCopyrighted", RSSModule.W3CDTF_PROPERTY_TYPE); + dcTermsModule.addProperty("dateSubmitted", RSSModule.W3CDTF_PROPERTY_TYPE); + registerModule(dcTermsModule, "dcterms"); + + RSSBasicModule v2vTermsModule = new RSSBasicModule(V2V_NAMESPACE_URI, "indymedia v2v RSS module"); + v2vTermsModule.addMultiValuedProperty("topic", RSSModule.PCDATA_PROPERTY_TYPE); + v2vTermsModule.addMultiValuedProperty("genre", RSSModule.PCDATA_PROPERTY_TYPE); + v2vTermsModule.addMultiValuedProperty("link", RSSModule.PCDATA_PROPERTY_TYPE); + registerModule(v2vTermsModule, "v2v"); + registerModule(new RSSBasicModule(EVENT_NAMESPACE_URI, "Event RSS module"), "ev"); registerModule(new RSSBasicModule(TAXONOMY_NAMESPACE_URI, "Taxonomy RSS module"), "taxo"); + registerModule(new RSSBasicModule(CONTENT_NAMESPACE_URI , "Content RSS module"), "content"); } public void registerModule(RSSModule aModule, String aPrefix) { @@ -93,9 +117,20 @@ public class RSSReader { public RSSData parseInputStream(InputStream aStream) throws RSSExc, RSSFailure { try { - XMLReader xmlReader = new XMLReader(true); RSSData result = new RSSData(); - xmlReader.parseInputStream(aStream, new RootSectionHandler(result)); + XMLParserEngine.getInstance().parse("xml", aStream, new RootSectionHandler(result)); + + return result; + } + catch (Throwable t) { + throw new RSSFailure(t); + } + } + + public RSSData parseInputStream(InputStream aStream, String anEncoding) throws RSSExc, RSSFailure { + try { + RSSData result = new RSSData(); + XMLParserEngine.getInstance().parse("xml", aStream, anEncoding, new RootSectionHandler(result)); return result; } @@ -106,46 +141,64 @@ public class RSSReader { public RSSData parseUrl(String anUrl) throws RSSExc, RSSFailure { try { - InputStream inputStream = (InputStream) new URL(anUrl).getContent(new Class[] {InputStream.class}); + HTTPClientHelper httpClientHelper = new HTTPClientHelper(); + InputStream inputStream = httpClientHelper.getUrl(anUrl); + if (inputStream==null) + throw new RSSExc("RSSChannel.parseUrl: Can't get url content"); + + RSSData theRSSData = parseInputStream(inputStream); + httpClientHelper.releaseHTTPConnection(); + return theRSSData; + } + catch (Throwable t) { + throw new RSSFailure(t); + } + } + public RSSData parseUrl(String anUrl, String anEncoding) throws RSSExc, RSSFailure { + try { + HTTPClientHelper httpClientHelper = new HTTPClientHelper(); + InputStream inputStream = httpClientHelper.getUrl(anUrl); if (inputStream==null) throw new RSSExc("RSSChannel.parseUrl: Can't get url content"); - return parseInputStream(inputStream); + RSSData theRSSData = parseInputStream(inputStream, anEncoding); + httpClientHelper.releaseHTTPConnection(); + return theRSSData; } catch (Throwable t) { throw new RSSFailure(t); } } - private class RootSectionHandler extends XMLReader.AbstractSectionHandler { + private class RootSectionHandler extends mir.util.xml.AbstractSectionHandler { private RSSData data; public RootSectionHandler(RSSData aData) { data = aData; } - public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc { if (aTag.getLocalName().equals("RDF")) { return new RDFSectionHandler(data); } else - throw new XMLReader.XMLReaderFailure(new RSSExc("'RDF' tag expected")); + throw new XMLParserFailure(new RSSExc("'RDF' tag expected")); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { if (aCharacters.trim().length()>0) - throw new XMLReader.XMLReaderExc("No character data allowed here"); + throw new XMLParserExc("No character data allowed here"); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; } - private class RDFSectionHandler extends XMLReader.AbstractSectionHandler { + private class RDFSectionHandler extends mir.util.xml.AbstractSectionHandler { private RSSData data; @@ -153,35 +206,31 @@ public class RSSReader { data = aData; } - public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc { String identifier = (String) anAttributes.get(RDF_ABOUT_PARAMETER); String rdfClass = makeQualifiedName(aTag); return new RDFResourceSectionHandler(rdfClass, identifier); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { if (aHandler instanceof RDFResourceSectionHandler) { data.addResource(((RDFResourceSectionHandler) aHandler).getResource()); } }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { if (aCharacters.trim().length()>0) - throw new XMLReader.XMLReaderExc("No character data allowed here"); + throw new XMLParserExc("No character data allowed here"); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; } - private XMLReader.SectionHandler makePropertyValueSectionHandler(XMLReader.XMLName aTag, Map anAttributes) { + private mir.util.xml.SectionHandler makePropertyValueSectionHandler(mir.util.xml.XMLName aTag, Map anAttributes) { RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI()); - if (aTag.getLocalName().equals("date")) - aTag = aTag; - - if (module!=null) { RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName()); @@ -201,13 +250,35 @@ public class RSSReader { return new DateSectionHandler(); } } - } return new FlexiblePropertyValueSectionHandler(); } - private String makeQualifiedName(XMLReader.XMLName aName) { + private void usePropertyValueSectionHandler(RDFResource aResource, PropertyValueSectionHandler aHandler, mir.util.xml.XMLName aTag) { + RSSModule module = (RSSModule) namespaceURItoModule.get(aTag.getNamespaceURI()); + + if (module!=null) { + RSSModule.RSSModuleProperty property = module.getPropertyForName(aTag.getLocalName()); + + if (property!=null && property.getIsMultiValued()) { + List value = (List) aResource.get(makeQualifiedName(aTag)); + + if (value==null) { + value = new ArrayList(); + aResource.set(makeQualifiedName(aTag), value); + } + + value.add(aHandler.getValue()); + + return; + } + } + + aResource.set(makeQualifiedName(aTag), aHandler.getValue()); + } + + private String makeQualifiedName(mir.util.xml.XMLName aName) { String result=aName.getLocalName(); RSSModule module = (RSSModule) namespaceURItoModule.get(aName.getNamespaceURI()); if (module!=null) { @@ -220,41 +291,46 @@ public class RSSReader { return result; } - private class RDFResourceSectionHandler extends XMLReader.AbstractSectionHandler { + private class RDFResourceSectionHandler extends mir.util.xml.AbstractSectionHandler { private String image; - private XMLReader.XMLName currentTag; + private mir.util.xml.XMLName currentTag; private RDFResource resource; public RDFResourceSectionHandler(String anRDFClass, String anIdentifier) { resource = new RDFResource(anRDFClass, anIdentifier); } - public XMLReader.SectionHandler startElement(XMLReader.XMLName aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(mir.util.xml.XMLName aTag, Map anAttributes) throws XMLParserExc { currentTag = aTag; return makePropertyValueSectionHandler(aTag, anAttributes); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { if (aHandler instanceof PropertyValueSectionHandler) { - resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getValue()); + usePropertyValueSectionHandler(resource, (PropertyValueSectionHandler) aHandler, currentTag); +// resource.set(makeQualifiedName(currentTag), ( (PropertyValueSectionHandler) aHandler).getFieldValue()); } }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { if (aCharacters.trim().length()>0) - throw new XMLReader.XMLReaderExc("No character data allowed here"); + throw new XMLParserExc("No character data allowed here"); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public RDFResource getResource() { + if ((resource.getIdentifier()==null || resource.getIdentifier().length()==0) && resource.get("rss:link")!=null) { + resource.setIdentifier(resource.get("rss:link").toString()); + } + return resource; } } - private abstract class PropertyValueSectionHandler extends XMLReader.AbstractSectionHandler { + private abstract class PropertyValueSectionHandler extends mir.util.xml.AbstractSectionHandler { public abstract Object getValue(); } @@ -267,24 +343,24 @@ public class RSSReader { structuredData=null; } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { if (aTag.equals(RDF_SEQUENCE_TAG)) return new RDFSequenceSectionHandler(); else return new DiscardingSectionHandler(); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { if (aHandler instanceof RDFSequenceSectionHandler) { structuredData= ((RDFSequenceSectionHandler) aHandler).getItems(); } }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { stringData.append(aCharacters); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public String getData() { @@ -303,28 +379,28 @@ public class RSSReader { private List items; public RDFCollectionSectionHandler() { - items = new Vector(); + items = new ArrayList(); } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { if (aTag.equals(RDF_SEQUENCE_TAG)) return new RDFSequenceSectionHandler(); else return new DiscardingSectionHandler(); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { if (aHandler instanceof RDFSequenceSectionHandler) { items.addAll(((RDFSequenceSectionHandler) aHandler).getItems()); } }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { if (aCharacters.trim().length()>0) - throw new XMLReader.XMLReaderExc("No character data allowed here"); + throw new XMLParserExc("No character data allowed here"); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public List getItems() { @@ -343,18 +419,18 @@ public class RSSReader { data = new StringBuffer(); } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { - throw new XMLReader.XMLReaderFailure(new RSSExc("No subtags allowed here")); + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { + throw new XMLParserFailure(new RSSExc("No subtags allowed here")); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { data.append(aCharacters); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public String getData() { @@ -373,24 +449,20 @@ public class RSSReader { data = new StringBuffer(); } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { - throw new XMLReader.XMLReaderFailure(new RSSExc("No subtags allowed here")); + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { + throw new XMLParserFailure(new RSSExc("No subtags allowed here")); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { data.append(aCharacters); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; - private final static String SPACE = "[\t\n\r ]*"; - private final static String NUMBER = "[0-9]*"; - private final static String SIGN = "[-+]"; - public Object getValue() { try { String expression = data.toString().trim(); @@ -405,14 +477,14 @@ public class RSSReader { } - private class RDFSequenceSectionHandler extends XMLReader.AbstractSectionHandler { + private class RDFSequenceSectionHandler extends mir.util.xml.AbstractSectionHandler { private List items; public RDFSequenceSectionHandler() { - items = new Vector(); + items = new ArrayList(); } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { if (aTag.equals("rdf:li")) { String item = (String) anAttributes.get("rdf:resource"); @@ -423,13 +495,13 @@ public class RSSReader { return new DiscardingSectionHandler(); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public List getItems() { @@ -449,23 +521,23 @@ public class RSSReader { return data; } - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { tag=aTag; data.append("<"+tag+">"); return new RDFLiteralSectionHandler(); }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { data.append(((RDFLiteralSectionHandler) aHandler).getData()); data.append(""); }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { data.append(aCharacters); }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; public Object getValue() { @@ -473,18 +545,18 @@ public class RSSReader { } } - private class DiscardingSectionHandler extends XMLReader.AbstractSectionHandler { - public XMLReader.SectionHandler startElement(String aTag, Map anAttributes) throws XMLReader.XMLReaderExc { + private class DiscardingSectionHandler extends mir.util.xml.AbstractSectionHandler { + public mir.util.xml.SectionHandler startElement(String aTag, Map anAttributes) throws XMLParserExc { return this; }; - public void endElement(XMLReader.SectionHandler aHandler) throws XMLReader.XMLReaderExc { + public void endElement(mir.util.xml.SectionHandler aHandler) throws XMLParserExc { }; - public void characters(String aCharacters) throws XMLReader.XMLReaderExc { + public void characters(String aCharacters) throws XMLParserExc { }; - public void finishSection() throws XMLReader.XMLReaderExc { + public void finishSection() throws XMLParserExc { }; } }