1// This program is free software: you can redistribute it and/or modify 2// it under the terms of the GNU General Public License as published by 3// the Free Software Foundation, either version 3 of the License, or 4// (at your option) any later version. 5// 6// This program is distributed in the hope that it will be useful, 7// but WITHOUT ANY WARRANTY; without even the implied warranty of 8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9// GNU General Public License for more details. 10// 11// You should have received a copy of the GNU General Public License 12// along with this program. If not, see <http://www.gnu.org/licenses/>. 13// 14// This is a slightly modified version of 'encoding/xml/read_test.go'. 15// Copyright 2009 The Go Authors. All rights reserved. Use of this 16// source code is governed by a BSD-style license that can be found in 17// the LICENSE file. 18 19package feedparser 20 21import ( 22 "encoding/xml" 23) 24 25// RssFeed represents an rss web feed. 26type RssFeed struct { 27 // XMLName. 28 XMLName xml.Name `xml:"rss"` 29 30 // Name of the channel (required). 31 Title string `xml:"channel>title"` 32 33 // URL to the website (required). 34 Link string `xml:"channel>link"` 35 36 // Description for the channel (required). 37 Description string `xml:"channel>description"` 38 39 // Items for the feed (required). 40 Items []RssItem `xml:"channel>item"` 41 42 // Language the channel is written in (optional). 43 Language string `xml:"channel>language"` 44 45 // Copyright notice for the content (optional). 46 Copyright string `xml:"channel>copyright"` 47 48 // Email address of the editor (optional). 49 Editor string `xml:"channel>managingEditor"` 50 51 // Email address of the web master (optional). 52 WebMaster string `xml:"channel>webMaster"` 53 54 // Publication date for the content (optional). 55 PubDate string `xml:"channel>pubDate"` 56 57 // Last time the content was updated (optional). 58 LastBuildDate string `xml:"channel>lastBuildDate"` 59 60 // Categories the feed belongs to (optional). 61 Categories []RssCategory `xml:"channel>category"` 62 63 // Program used to generate the channel (optional). 64 Generator string `xml:"channel>generator"` 65 66 // URL that points to documentation for the used format (optional). 67 Docs string `xml:"channel>docs"` 68 69 // Cloud for update notifications (optional). 70 Cloud RssCloud `xml:"channel>cloud"` 71 72 // How long the channel can be cached (optional). 73 TTL int `xml:"channel>ttl"` 74 75 // Image that can be displayed with the channel (optional). 76 Image RssImage `xml:"channel>image"` 77 78 // PICS rating for the channel (optional). 79 Rating string `xml:"channel>rating"` 80 81 // Text input box related to the channel (optional). 82 TextInput RssTextInput `xml:"channel>textInput"` 83 84 // Hint for aggregators telling them which hours can be skipped (optional). 85 SkipHours []RssHour `xml:"channel>skipHours"` 86 87 // Hint for aggregators telling them which days can be skipped (optional). 88 SkipDays []RssDay `xml:"channel>skipDays"` 89} 90 91// RssItem represents an rss item. 92type RssItem struct { 93 // Title of the item (required if description isn't present). 94 Title string `xml:"title"` 95 96 // The item synopsis (required if title isn't present). 97 Description string `xml:"description"` 98 99 // The URL of the item (optional).100 Link string `xml:"link"`101102 // Email address of the author of the item (optional).103 Author string `xml:"author"`104105 // Includes item in one or more categories (optional).106 Categories []RssCategory `xml:"category"`107108 // URL to a page for comments (optional).109 Comments string `xml:"comments"`110111 // Media object that is attached to the item (optional).112 Enclosure RssEnclosure `xml:"enclosure"`113114 // String that uniquely identifies the item (optional).115 GUID string `xml:"guid"`116117 // Time the item was published (optional).118 PubDate string `xml:"pubDate"`119120 // The RSS channel the item came from (optional).121 Source RssSource `xml:"source"`122}123124// RssEnclosure represents an rss enclosure.125type RssEnclosure struct {126 // Where the enclosure is located (required).127 URL string `xml:"url,attr"`128129 // Size of the enclosure in bytes (required).130 Length string `xml:"length,attr"`131132 // MIME type of the enclosure (required).133 Type string `xml:"type,attr"`134}135136// RssImage represents an rss image.137type RssImage struct {138 // URL to image that represents the channel (required).139 URL string `xml:"url"`140141 // Title which describes the image (required).142 Title string `xml:"title"`143144 // URL of the site itself (required).145 Link string `xml:"link"`146147 // Width of the image (optional).148 Width int `xml:"width"`149150 // Height of the image (optional).151 Height int `xml:"height"`152153 // Additional description of the image (optional).154 Description string `xml:"description"`155}156157// RssCloud represents the rss cloud tag.158type RssCloud struct {159 // Domain cloud service is running on (required).160 Domain string `xml:"domain,attr"`161162 // Port to use for TCP socket connection (required).163 Port int `xml:"port,attr"`164165 // Path to use for the request (required).166 Path string `xml:"path,attr"`167168 // Register procedure which should be used (required).169 RegisterProcedure string `xml:"registerProcedure,attr"`170171 // Protocol used for registration et cetera (required).172 Protocol string `xml:"protocol,attr"`173}174175// RssCategory represents the rss category tag.176type RssCategory struct {177 // Human readable category name (required).178 Name string `xml:",chardata"`179180 // Domain that identifies categorization taxonomy (optional).181 Domain string `xml:"domain,attr"`182}183184// RssTextInput represents the rss textInput tag.185type RssTextInput struct {186 // The label of the Submit button in the text input area (required).187 Title string `xml:"title"`188189 // Explains the text input area (required).190 Description string `xml:"description"`191192 // The name of the text object in the text input area (required).193 Name string `xml:"name"`194195 // The URL of the CGI script that processes text input requests (required).196 Link string `xml:"link"`197}198199// RssSource represents the rss source tag.200type RssSource struct {201 // URL which links to the XMLization source (required).202 URL string `xml:"url,attr"`203204 // Source name (required).205 Name string `xml:",chardata"`206}207208// RssHour represents the hour tag, a subelement of the skipHours tag.209type RssHour struct {210 // Number between 0 and 23 representing time in GMT (required).211 Hour int `xml:"hour"`212}213214// RssDay represents the day tag, a subelement of the skipDays tag.215type RssDay struct {216 // Weekday (e.g Monday) (required).217 Day string `xml:"day"`218}219220// parseRss parses an rss feed and returns a generic feed.221func parseRss(data []byte) (f Feed, err error) {222 var origFeed RssFeed223 if err = unmarshal(data, &origFeed); err != nil {224 return225 }226227 f = Feed{228 Type: "rss",229 Title: origFeed.Title,230 Link: origFeed.Link,231 Description: origFeed.Description,232 Image: origFeed.Image.URL,233 Generator: origFeed.Generator,234 Rights: origFeed.Copyright,235 Author: origFeed.Editor,236 }237238 if len(origFeed.LastBuildDate) > 0 {239 f.Updated, err = parseTime(origFeed.LastBuildDate)240 if err != nil {241 return242 }243 }244245 for _, category := range origFeed.Categories {246 f.Categories = append(f.Categories, category.Name)247 }248249 for _, entry := range origFeed.Items {250 item := Item{251 ID: entry.GUID,252 Title: entry.Title,253 Link: entry.Link,254 Content: entry.Description,255 Attachment: entry.Enclosure.URL,256 Author: entry.Author,257 }258259 for _, category := range entry.Categories {260 item.Categories = append(item.Categories, category.Name)261 }262263 item.PubDate, err = parseTime(entry.PubDate)264 if err != nil {265 return266 }267268 f.Items = append(f.Items, item)269 }270271 return272}