1// This program is free software: you can redistribute it and/or modify 2// it under the terms of the GNU General Public License as published by 3// the Free Software Foundation, either version 3 of the License, or 4// (at your option) any later version. 5// 6// This program is distributed in the hope that it will be useful, 7// but WITHOUT ANY WARRANTY; without even the implied warranty of 8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9// GNU General Public License for more details. 10// 11// You should have received a copy of the GNU General Public License 12// along with this program. If not, see <http://www.gnu.org/licenses/>. 13 14package feedparser 15 16import ( 17 "encoding/xml" 18) 19 20// AtomFeed represents an atom web feed. 21type AtomFeed struct { 22 // XMLName. 23 XMLName xml.Name `xml:"feed"` 24 25 // Universally unique feed ID (required). 26 ID string `xml:"id"` 27 28 // Human readable title for the feed (required). 29 Title AtomText `xml:"title"` 30 31 // Last time the feed was significantly modified (required). 32 Updated string `xml:"updated"` 33 34 // Entries for the feed (required). 35 Entries []AtomEntry `xml:"entry"` 36 37 // Authors of the feed (recommended). 38 Authors []AtomPerson `xml:"author"` 39 40 // Links which identify related web pages (recommended). 41 Links []AtomLink `xml:"link"` 42 43 // Categories the feed belongs to (optional). 44 Categories []AtomCategory `xml:"category"` 45 46 // Contributors to the feed (optional). 47 Contributors []AtomPerson `xml:"contributor"` 48 49 // Software used to generate the feed (optional). 50 Generator AtomGenerator `xml:"generator"` 51 52 // Small icon used for visual identification (optional). 53 Icon string `xml:"icon"` 54 55 // Larger logo for visual identification (optional). 56 Logo string `xml:"logo"` 57 58 // Information about rights, for example copyrights (optional). 59 Rights AtomText `xml:"rights"` 60 61 // Human readable description or subtitle (optional). 62 Subtitle AtomText `xml:"subtitle"` 63} 64 65// AtomEntry represents an atom entry. 66type AtomEntry struct { 67 // Universally unique feed ID (required). 68 ID string `xml:"id"` 69 70 // Human readable title for the entry (required). 71 Title AtomText `xml:"title"` 72 73 // Last time the feed was significantly modified (required). 74 Updated string `xml:"updated"` 75 76 // Authors of the entry (recommended). 77 Authors []AtomPerson `xml:"author"` 78 79 // Content of the entry (recommended). 80 Content AtomText `xml:"content"` 81 82 // Links which identify related web pages (recommended). 83 Links []AtomLink `xml:"link"` 84 85 // Short summary, abstract or excerpt of the entry (recommended). 86 Summary AtomText `xml:"summary"` 87 88 // Categories the entry belongs too (optional). 89 Categories []AtomCategory `xml:"category"` 90 91 // Contributors to the entry (optional). 92 Contributors []AtomPerson `xml:"contributor"` 93 94 // Time of the initial creation of the entry (optional). 95 Published string `xml:"published"` 96 97 // FIXME 98 // Feed's metadata, only used when entry was copied from another feed (optional). 99 // Source AtomFeed `xml:"source"`100101 // Information about rights, for example copyrights (optional).102 Rights AtomText `xml:"rights"`103}104105// AtomLink represents the atom link tag.106type AtomLink struct {107 // Hypertext reference (required).108 Href string `xml:"href,attr"`109110 // Single Link relation type (optional).111 Rel string `xml:"rel,attr"`112113 // Media type of the resource (optional).114 Type string `xml:"type,attr"`115116 // Language of referenced resource (optional).117 HrefLang string `xml:"hreflang,attr"`118119 // Human readable information about the link (optional).120 Title string `xml:"title,attr"`121122 // Length of the resource in bytes (optional).123 Length string `xml:"length,attr"`124}125126// AtomPerson represents a person, corporation, et cetera.127type AtomPerson struct {128 // Human readable name for the person (required).129 Name string `xml:"name"`130131 // Home page for the person (optional).132 URI string `xml:"uri"`133134 // Email address for the person (optional).135 Email string `xml:"email"`136}137138// AtomCategory identifies the category.139type AtomCategory struct {140 // Identifier for this category (required).141 Term string `xml:"term,attr"`142143 // Categorization scheme via a URI (optional).144 Scheme string `xml:"scheme,attr"`145146 // Human readable label for display (optional).147 Label string `xml:"label,attr"`148}149150// AtomGenerator identifies the generator.151type AtomGenerator struct {152 // Generator name (required).153 Name string `xml:",chardata"`154155 // URI for this generator (optional).156 URI string `xml:"uri,attr"`157158 // Version for this generator (optional).159 Version string `xml:"version,attr"`160}161162// AtomText identifies human readable text.163type AtomText struct {164 // Text body (required).165 Body string `xml:",chardata"`166167 // InnerXML data (optional).168 InnerXML string `xml:",innerxml"`169170 // Text type (optional).171 Type string `xml:"type,attr"`172173 // URI where the content can be found (optional for <content>).174 URI string `xml:"uri,att"`175}176177// parseAtom parses an atom feed and returns a generic feed.178func parseAtom(data []byte) (f Feed, err error) {179 var origFeed AtomFeed180 if err = unmarshal(data, &origFeed); err != nil {181 return182 }183184 f = Feed{185 Type: "atom",186 Title: origFeed.Title.Body,187 Link: findLink(origFeed.Links).Href,188 Description: origFeed.Subtitle.Body,189 Image: origFeed.Logo,190 Generator: origFeed.Generator.Name,191 Rights: origFeed.Rights.Body,192 }193194 if len(origFeed.Authors) > 0 {195 f.Author = origFeed.Authors[0].Email196 }197198 f.Updated, err = parseTime(origFeed.Updated)199 if err != nil {200 return201 }202203 for _, category := range origFeed.Categories {204 f.Categories = append(f.Categories, category.Term)205 }206207 for _, entry := range origFeed.Entries {208 item := Item{209 ID: entry.ID,210 Title: entry.Title.Body,211 Link: findLink(entry.Links).Href,212 Content: entry.Content.Body,213 Attachment: findAttachment(entry.Links).Href,214 }215216 if len(entry.Authors) > 0 {217 item.Author = entry.Authors[0].Email218 }219220 for _, category := range entry.Categories {221 item.Categories = append(item.Categories, category.Term)222 }223224 timeStr := entry.Updated225 if len(entry.Published) > 0 {226 timeStr = entry.Published227 }228229 item.PubDate, err = parseTime(timeStr)230 if err != nil {231 return232 }233234 f.Items = append(f.Items, item)235 }236237 return238}239240// findLink attempts to find the most relevant link.241func findLink(links []AtomLink) AtomLink {242 var score int243 var match AtomLink244245 for _, link := range links {246 switch {247 case link.Rel == "alternate" && link.Type == "text/html":248 return link249 case score < 3 && link.Type == "text/html":250 score = 3251 match = link252 case score < 2 && link.Rel == "self":253 score = 2254 match = link255 case score < 1 && link.Rel == "":256 score = 1257 match = link258 case &match == nil:259 match = link260 }261 }262263 return match264}265266// findAttachment attempts to find a link which represents an attachment.267func findAttachment(links []AtomLink) AtomLink {268 for _, link := range links {269 if link.Rel == "enclosure" {270 return link271 }272 }273274 return AtomLink{}275}