go-feedparser

Simple RSS and ATOM feed parser

git clone https://git.8pit.net/go-feedparser.git

  1// This program is free software: you can redistribute it and/or modify
  2// it under the terms of the GNU General Public License as published by
  3// the Free Software Foundation, either version 3 of the License, or
  4// (at your option) any later version.
  5//
  6// This program is distributed in the hope that it will be useful,
  7// but WITHOUT ANY WARRANTY; without even the implied warranty of
  8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9// GNU General Public License for more details.
 10//
 11// You should have received a copy of the GNU General Public License
 12// along with this program. If not, see <http://www.gnu.org/licenses/>.
 13//
 14// This is a slightly modified version of 'encoding/xml/read_test.go'.
 15// Copyright 2009 The Go Authors. All rights reserved. Use of this
 16// source code is governed by a BSD-style license that can be found in
 17// the LICENSE file.
 18
 19package feedparser
 20
 21import (
 22	"bytes"
 23	"encoding/xml"
 24	"golang.org/x/net/html/charset"
 25	"time"
 26)
 27
 28// dateFormats describes multiple possible formats for dates.
 29// Originally imported from goread <https://github.com/mjibson/goread>.
 30var dateFormats = []string{
 31	"01-02-2006",
 32	"01/02/2006",
 33	"01/02/2006 - 15:04",
 34	"01/02/2006 15:04:05 MST",
 35	"01/02/2006 3:04 PM",
 36	"02-01-2006",
 37	"02/01/2006",
 38	"02.01.2006 -0700",
 39	"02/01/2006 - 15:04",
 40	"02.01.2006 15:04",
 41	"02/01/2006 15:04:05",
 42	"02.01.2006 15:04:05",
 43	"02-01-2006 15:04:05 MST",
 44	"02/01/2006 15:04 MST",
 45	"02 Jan 2006",
 46	"02 Jan 2006 15:04:05",
 47	"02 Jan 2006 15:04:05 -0700",
 48	"02 Jan 2006 15:04:05 MST",
 49	"02 Jan 2006 15:04:05 UT",
 50	"02 Jan 2006 15:04 MST",
 51	"02 Monday, Jan 2006 15:04",
 52	"06-1-2 15:04",
 53	"06/1/2 15:04",
 54	"1/2/2006",
 55	"1/2/2006 15:04:05 MST",
 56	"1/2/2006 3:04:05 PM",
 57	"1/2/2006 3:04:05 PM MST",
 58	"15:04 02.01.2006 -0700",
 59	"2006-01-02",
 60	"2006/01/02",
 61	"2006-01-02 00:00:00.0 15:04:05.0 -0700",
 62	"2006-01-02 15:04",
 63	"2006-01-02 15:04:05 -0700",
 64	"2006-01-02 15:04:05-07:00",
 65	"2006-01-02 15:04:05-0700",
 66	"2006-01-02 15:04:05 MST",
 67	"2006-01-02 15:04:05Z",
 68	"2006-01-02 at 15:04:05",
 69	"2006-01-02T15:04:05",
 70	"2006-01-02T15:04:05:00",
 71	"2006-01-02T15:04:05 -0700",
 72	"2006-01-02T15:04:05-07:00",
 73	"2006-01-02T15:04:05-0700",
 74	"2006-01-02T15:04:05:-0700",
 75	"2006-01-02T15:04:05-07:00",
 76	"2006-01-02T15:04:05-07:00:00",
 77	"2006-01-02T15:04:05Z",
 78	"2006-01-02T15:04-07:00",
 79	"2006-01-02T15:04Z",
 80	"2006-1-02T15:04:05Z",
 81	"2006-1-2",
 82	"2006-1-2 15:04:05",
 83	"2006-1-2T15:04:05Z",
 84	"2006 January 02",
 85	"2-1-2006",
 86	"2/1/2006",
 87	"2.1.2006 15:04:05",
 88	"2 Jan 2006",
 89	"2 Jan 2006 15:04:05 -0700",
 90	"2 Jan 2006 15:04:05 MST",
 91	"2 Jan 2006 15:04:05 Z",
 92	"2 January 2006",
 93	"2 January 2006 15:04:05 -0700",
 94	"2 January 2006 15:04:05 MST",
 95	"6-1-2 15:04",
 96	"6/1/2 15:04",
 97	"Jan 02, 2006",
 98	"Jan 02 2006 03:04:05PM",
 99	"Jan 2, 2006",
100	"Jan 2, 2006 15:04:05 MST",
101	"Jan 2, 2006 3:04:05 PM",
102	"Jan 2, 2006 3:04:05 PM MST",
103	"January 02, 2006",
104	"January 02, 2006 03:04 PM",
105	"January 02, 2006 15:04",
106	"January 02, 2006 15:04:05 MST",
107	"January 2, 2006",
108	"January 2, 2006 03:04 PM",
109	"January 2, 2006 15:04:05",
110	"January 2, 2006 15:04:05 MST",
111	"January 2, 2006, 3:04 p.m.",
112	"January 2, 2006 3:04 PM",
113	"Mon, 02 Jan 06 15:04:05 MST",
114	"Mon, 02 Jan 2006",
115	"Mon, 02 Jan 2006 15:04:05",
116	"Mon, 02 Jan 2006 15:04:05 00",
117	"Mon, 02 Jan 2006 15:04:05 -07",
118	"Mon 02 Jan 2006 15:04:05 -0700",
119	"Mon, 02 Jan 2006 15:04:05 --0700",
120	"Mon, 02 Jan 2006 15:04:05 -07:00",
121	"Mon, 02 Jan 2006 15:04:05 -0700",
122	"Mon,02 Jan 2006 15:04:05 -0700",
123	"Mon, 02 Jan 2006 15:04:05 GMT-0700",
124	"Mon , 02 Jan 2006 15:04:05 MST",
125	"Mon, 02 Jan 2006 15:04:05 MST",
126	"Mon, 02 Jan 2006 15:04:05MST",
127	"Mon, 02 Jan 2006, 15:04:05 MST",
128	"Mon, 02 Jan 2006 15:04:05 MST -0700",
129	"Mon, 02 Jan 2006 15:04:05 MST-07:00",
130	"Mon, 02 Jan 2006 15:04:05 UT",
131	"Mon, 02 Jan 2006 15:04:05 Z",
132	"Mon, 02 Jan 2006 15:04 -0700",
133	"Mon, 02 Jan 2006 15:04 MST",
134	"Mon,02 Jan 2006 15:04 MST",
135	"Mon, 02 Jan 2006 15 -0700",
136	"Mon, 02 Jan 2006 3:04:05 PM MST",
137	"Mon, 02 January 2006",
138	"Mon,02 January 2006 14:04:05 MST",
139	"Mon, 2006-01-02 15:04",
140	"Mon, 2 Jan 06 15:04:05 -0700",
141	"Mon, 2 Jan 06 15:04:05 MST",
142	"Mon, 2 Jan 15:04:05 MST",
143	"Mon, 2 Jan 2006",
144	"Mon,2 Jan 2006",
145	"Mon, 2 Jan 2006 15:04",
146	"Mon, 2 Jan 2006 15:04:05",
147	"Mon, 2 Jan 2006 15:04:05 -0700",
148	"Mon, 2 Jan 2006 15:04:05-0700",
149	"Mon, 2 Jan 2006 15:04:05 -0700 MST",
150	"mon,2 Jan 2006 15:04:05 MST",
151	"Mon 2 Jan 2006 15:04:05 MST",
152	"Mon, 2 Jan 2006 15:04:05 MST",
153	"Mon, 2 Jan 2006 15:04:05MST",
154	"Mon, 2 Jan 2006 15:04:05 UT",
155	"Mon, 2 Jan 2006 15:04 -0700",
156	"Mon, 2 Jan 2006, 15:04 -0700",
157	"Mon, 2 Jan 2006 15:04 MST",
158	"Mon, 2, Jan 2006 15:4",
159	"Mon, 2 Jan 2006 15:4:5 -0700 GMT",
160	"Mon, 2 Jan 2006 15:4:5 MST",
161	"Mon, 2 Jan 2006 3:04:05 PM -0700",
162	"Mon, 2 January 2006",
163	"Mon, 2 January 2006 15:04:05 -0700",
164	"Mon, 2 January 2006 15:04:05 MST",
165	"Mon, 2 January 2006, 15:04:05 MST",
166	"Mon, 2 January 2006, 15:04 -0700",
167	"Mon, 2 January 2006 15:04 MST",
168	"Monday, 02 January 2006 15:04:05",
169	"Monday, 02 January 2006 15:04:05 -0700",
170	"Monday, 02 January 2006 15:04:05 MST",
171	"Monday, 2 Jan 2006 15:04:05 -0700",
172	"Monday, 2 Jan 2006 15:04:05 MST",
173	"Monday, 2 January 2006 15:04:05 -0700",
174	"Monday, 2 January 2006 15:04:05 MST",
175	"Monday, January 02, 2006",
176	"Monday, January 2, 2006",
177	"Monday, January 2, 2006 03:04 PM",
178	"Monday, January 2, 2006 15:04:05 MST",
179	"Mon Jan 02 2006 15:04:05 -0700",
180	"Mon, Jan 02,2006 15:04:05 MST",
181	"Mon Jan 02, 2006 3:04 pm",
182	"Mon Jan 2 15:04:05 2006 MST",
183	"Mon Jan 2 15:04 2006",
184	"Mon, Jan 2 2006 15:04:05 -0700",
185	"Mon, Jan 2 2006 15:04:05 -700",
186	"Mon, Jan 2, 2006 15:04:05 MST",
187	"Mon, Jan 2 2006 15:04 MST",
188	"Mon, Jan 2, 2006 15:04 MST",
189	"Mon, January 02, 2006 15:04:05 MST",
190	"Mon, January 02, 2006, 15:04:05 MST",
191	"Mon, January 2 2006 15:04:05 -0700",
192	time.ANSIC,
193	time.RFC1123,
194	time.RFC1123Z,
195	time.RFC3339,
196	time.RFC822,
197	time.RFC822Z,
198	time.RFC850,
199	time.RubyDate,
200	time.UnixDate,
201}
202
203// unmarshal unmarshals an xml document to the given interface.
204// It uses a custom charsetReader and therefore supports non-utf8
205// xml encodings.
206func unmarshal(data []byte, v interface{}) error {
207	decoder := xml.NewDecoder(bytes.NewReader(data))
208	decoder.CharsetReader = charset.NewReaderLabel
209	return decoder.Decode(v)
210}
211
212// parseTime tries to parse the given string as a date by trying
213// various different date formats.
214func parseTime(data string) (date time.Time, err error) {
215	for _, format := range dateFormats {
216		date, err = time.Parse(format, data)
217		if err == nil {
218			return
219		}
220	}
221
222	return
223}