1// This program is free software: you can redistribute it and/or modify2// it under the terms of the GNU General Public License as published by3// the Free Software Foundation, either version 3 of the License, or4// (at your option) any later version.5//6// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License for more details.10//11// You should have received a copy of the GNU General Public License12// along with this program. If not, see <http://www.gnu.org/licenses/>.1314package feedparser1516import (17 "encoding/xml"18)1920// AtomFeed represents an atom web feed.21type AtomFeed struct {22 // XMLName.23 XMLName xml.Name `xml:"feed"`2425 // Universally unique feed ID (required).26 ID string `xml:"id"`2728 // Human readable title for the feed (required).29 Title AtomText `xml:"title"`3031 // Last time the feed was significantly modified (required).32 Updated string `xml:"updated"`3334 // Entries for the feed (required).35 Entries []AtomEntry `xml:"entry"`3637 // Authors of the feed (recommended).38 Authors []AtomPerson `xml:"author"`3940 // Links which identify related web pages (recommended).41 Links []AtomLink `xml:"link"`4243 // Categories the feed belongs to (optional).44 Categories []AtomCategory `xml:"category"`4546 // Contributors to the feed (optional).47 Contributors []AtomPerson `xml:"contributor"`4849 // Software used to generate the feed (optional).50 Generator AtomGenerator `xml:"generator"`5152 // Small icon used for visual identification (optional).53 Icon string `xml:"icon"`5455 // Larger logo for visual identification (optional).56 Logo string `xml:"logo"`5758 // Information about rights, for example copyrights (optional).59 Rights AtomText `xml:"rights"`6061 // Human readable description or subtitle (optional).62 Subtitle AtomText `xml:"subtitle"`63}6465// AtomEntry represents an atom entry.66type AtomEntry struct {67 // Universally unique feed ID (required).68 ID string `xml:"id"`6970 // Human readable title for the entry (required).71 Title AtomText `xml:"title"`7273 // Last time the feed was significantly modified (required).74 Updated string `xml:"updated"`7576 // Authors of the entry (recommended).77 Authors []AtomPerson `xml:"author"`7879 // Content of the entry (recommended).80 Content AtomText `xml:"content"`8182 // Links which identify related web pages (recommended).83 Links []AtomLink `xml:"link"`8485 // Short summary, abstract or excerpt of the entry (recommended).86 Summary AtomText `xml:"summary"`8788 // Categories the entry belongs too (optional).89 Categories []AtomCategory `xml:"category"`9091 // Contributors to the entry (optional).92 Contributors []AtomPerson `xml:"contributor"`9394 // Time of the initial creation of the entry (optional).95 Published string `xml:"published"`9697 // FIXME98 // Feed's metadata, only used when entry was copied from another feed (optional).99 // Source AtomFeed `xml:"source"`100101 // Information about rights, for example copyrights (optional).102 Rights AtomText `xml:"rights"`103}104105// AtomLink represents the atom link tag.106type AtomLink struct {107 // Hypertext reference (required).108 Href string `xml:"href,attr"`109110 // Single Link relation type (optional).111 Rel string `xml:"rel,attr"`112113 // Media type of the resource (optional).114 Type string `xml:"type,attr"`115116 // Language of referenced resource (optional).117 HrefLang string `xml:"hreflang,attr"`118119 // Human readable information about the link (optional).120 Title string `xml:"title,attr"`121122 // Length of the resource in bytes (optional).123 Length string `xml:"length,attr"`124}125126// AtomPerson represents a person, corporation, et cetera.127type AtomPerson struct {128 // Human readable name for the person (required).129 Name string `xml:"name"`130131 // Home page for the person (optional).132 URI string `xml:"uri"`133134 // Email address for the person (optional).135 Email string `xml:"email"`136}137138// AtomCategory identifies the category.139type AtomCategory struct {140 // Identifier for this category (required).141 Term string `xml:"term,attr"`142143 // Categorization scheme via a URI (optional).144 Scheme string `xml:"scheme,attr"`145146 // Human readable label for display (optional).147 Label string `xml:"label,attr"`148}149150// AtomGenerator identifies the generator.151type AtomGenerator struct {152 // Generator name (required).153 Name string `xml:",chardata"`154155 // URI for this generator (optional).156 URI string `xml:"uri,attr"`157158 // Version for this generator (optional).159 Version string `xml:"version,attr"`160}161162// AtomText identifies human readable text.163type AtomText struct {164 // Text body (required).165 Body string `xml:",chardata"`166167 // InnerXML data (optional).168 InnerXML string `xml:",innerxml"`169170 // Text type (optional).171 Type string `xml:"type,attr"`172173 // URI where the content can be found (optional for <content>).174 URI string `xml:"uri,att"`175}176177// parseAtom parses an atom feed and returns a generic feed.178func parseAtom(data []byte) (f Feed, err error) {179 var origFeed AtomFeed180 if err = unmarshal(data, &origFeed); err != nil {181 return182 }183184 f = Feed{185 Type: "atom",186 Title: origFeed.Title.Body,187 Link: findLink(origFeed.Links).Href,188 Description: origFeed.Subtitle.Body,189 Image: origFeed.Logo,190 Generator: origFeed.Generator.Name,191 Rights: origFeed.Rights.Body,192 }193194 if len(origFeed.Authors) > 0 {195 f.Author = origFeed.Authors[0].Email196 }197198 f.Updated, err = parseTime(origFeed.Updated)199 if err != nil {200 return201 }202203 for _, category := range origFeed.Categories {204 f.Categories = append(f.Categories, category.Term)205 }206207 for _, entry := range origFeed.Entries {208 item := Item{209 ID: entry.ID,210 Title: entry.Title.Body,211 Link: findLink(entry.Links).Href,212 Content: entry.Content.Body,213 Attachment: findAttachment(entry.Links).Href,214 }215216 if len(entry.Authors) > 0 {217 item.Author = entry.Authors[0].Email218 }219220 for _, category := range entry.Categories {221 item.Categories = append(item.Categories, category.Term)222 }223224 timeStr := entry.Updated225 if len(entry.Published) > 0 {226 timeStr = entry.Published227 }228229 item.PubDate, err = parseTime(timeStr)230 if err != nil {231 return232 }233234 f.Items = append(f.Items, item)235 }236237 return238}239240// findLink attempts to find the most relevant link.241func findLink(links []AtomLink) AtomLink {242 var score int243 var match AtomLink244245 for _, link := range links {246 switch {247 case link.Rel == "alternate" && link.Type == "text/html":248 return link249 case score < 3 && link.Type == "text/html":250 score = 3251 match = link252 case score < 2 && link.Rel == "self":253 score = 2254 match = link255 case score < 1 && link.Rel == "":256 score = 1257 match = link258 case &match == nil:259 match = link260 }261 }262263 return match264}265266// findAttachment attempts to find a link which represents an attachment.267func findAttachment(links []AtomLink) AtomLink {268 for _, link := range links {269 if link.Rel == "enclosure" {270 return link271 }272 }273274 return AtomLink{}275}