OLD | NEW |
(Empty) | |
| 1 """ |
| 2 Syndication feed generation library -- used for generating RSS, etc. |
| 3 |
| 4 Sample usage: |
| 5 |
| 6 >>> from django.utils import feedgenerator |
| 7 >>> feed = feedgenerator.Rss201rev2Feed( |
| 8 ... title="Poynter E-Media Tidbits", |
| 9 ... link="http://www.poynter.org/column.asp?id=31", |
| 10 ... description="A group Weblog by the sharpest minds in online media/journa
lism/publishing.", |
| 11 ... language="en", |
| 12 ... ) |
| 13 >>> feed.add_item( |
| 14 ... title="Hello", |
| 15 ... link="http://www.holovaty.com/test/", |
| 16 ... description="Testing." |
| 17 ... ) |
| 18 >>> with open('test.rss', 'w') as fp: |
| 19 ... feed.write(fp, 'utf-8') |
| 20 |
| 21 For definitions of the different versions of RSS, see: |
| 22 http://web.archive.org/web/20110718035220/http://diveintomark.org/archives/2004/
02/04/incompatible-rss |
| 23 """ |
| 24 from __future__ import unicode_literals |
| 25 |
| 26 import datetime |
| 27 import warnings |
| 28 |
| 29 from django.utils import datetime_safe, six |
| 30 from django.utils.deprecation import RemovedInDjango20Warning |
| 31 from django.utils.encoding import force_text, iri_to_uri |
| 32 from django.utils.six import StringIO |
| 33 from django.utils.six.moves.urllib.parse import urlparse |
| 34 from django.utils.timezone import utc |
| 35 from django.utils.xmlutils import SimplerXMLGenerator |
| 36 |
| 37 |
| 38 def rfc2822_date(date): |
| 39 # We can't use strftime() because it produces locale-dependent results, so |
| 40 # we have to map english month and day names manually |
| 41 months = ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oc
t', 'Nov', 'Dec',) |
| 42 days = ('Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun') |
| 43 # Support datetime objects older than 1900 |
| 44 date = datetime_safe.new_datetime(date) |
| 45 # We do this ourselves to be timezone aware, email.Utils is not tz aware. |
| 46 dow = days[date.weekday()] |
| 47 month = months[date.month - 1] |
| 48 time_str = date.strftime('%s, %%d %s %%Y %%H:%%M:%%S ' % (dow, month)) |
| 49 if six.PY2: # strftime returns a byte string in Python 2 |
| 50 time_str = time_str.decode('utf-8') |
| 51 offset = date.utcoffset() |
| 52 # Historically, this function assumes that naive datetimes are in UTC. |
| 53 if offset is None: |
| 54 return time_str + '-0000' |
| 55 else: |
| 56 timezone = (offset.days * 24 * 60) + (offset.seconds // 60) |
| 57 hour, minute = divmod(timezone, 60) |
| 58 return time_str + '%+03d%02d' % (hour, minute) |
| 59 |
| 60 |
| 61 def rfc3339_date(date): |
| 62 # Support datetime objects older than 1900 |
| 63 date = datetime_safe.new_datetime(date) |
| 64 time_str = date.strftime('%Y-%m-%dT%H:%M:%S') |
| 65 if six.PY2: # strftime returns a byte string in Python 2 |
| 66 time_str = time_str.decode('utf-8') |
| 67 offset = date.utcoffset() |
| 68 # Historically, this function assumes that naive datetimes are in UTC. |
| 69 if offset is None: |
| 70 return time_str + 'Z' |
| 71 else: |
| 72 timezone = (offset.days * 24 * 60) + (offset.seconds // 60) |
| 73 hour, minute = divmod(timezone, 60) |
| 74 return time_str + '%+03d:%02d' % (hour, minute) |
| 75 |
| 76 |
| 77 def get_tag_uri(url, date): |
| 78 """ |
| 79 Creates a TagURI. |
| 80 |
| 81 See http://web.archive.org/web/20110514113830/http://diveintomark.org/archiv
es/2004/05/28/howto-atom-id |
| 82 """ |
| 83 bits = urlparse(url) |
| 84 d = '' |
| 85 if date is not None: |
| 86 d = ',%s' % datetime_safe.new_datetime(date).strftime('%Y-%m-%d') |
| 87 return 'tag:%s%s:%s/%s' % (bits.hostname, d, bits.path, bits.fragment) |
| 88 |
| 89 |
| 90 class SyndicationFeed(object): |
| 91 "Base class for all syndication feeds. Subclasses should provide write()" |
| 92 def __init__(self, title, link, description, language=None, author_email=Non
e, |
| 93 author_name=None, author_link=None, subtitle=None, categories=N
one, |
| 94 feed_url=None, feed_copyright=None, feed_guid=None, ttl=None, *
*kwargs): |
| 95 def to_unicode(s): |
| 96 return force_text(s, strings_only=True) |
| 97 if categories: |
| 98 categories = [force_text(c) for c in categories] |
| 99 if ttl is not None: |
| 100 # Force ints to unicode |
| 101 ttl = force_text(ttl) |
| 102 self.feed = { |
| 103 'title': to_unicode(title), |
| 104 'link': iri_to_uri(link), |
| 105 'description': to_unicode(description), |
| 106 'language': to_unicode(language), |
| 107 'author_email': to_unicode(author_email), |
| 108 'author_name': to_unicode(author_name), |
| 109 'author_link': iri_to_uri(author_link), |
| 110 'subtitle': to_unicode(subtitle), |
| 111 'categories': categories or (), |
| 112 'feed_url': iri_to_uri(feed_url), |
| 113 'feed_copyright': to_unicode(feed_copyright), |
| 114 'id': feed_guid or link, |
| 115 'ttl': ttl, |
| 116 } |
| 117 self.feed.update(kwargs) |
| 118 self.items = [] |
| 119 |
| 120 def add_item(self, title, link, description, author_email=None, |
| 121 author_name=None, author_link=None, pubdate=None, comments=None
, |
| 122 unique_id=None, unique_id_is_permalink=None, enclosure=None, |
| 123 categories=(), item_copyright=None, ttl=None, updateddate=None, |
| 124 enclosures=None, **kwargs): |
| 125 """ |
| 126 Adds an item to the feed. All args are expected to be Python Unicode |
| 127 objects except pubdate and updateddate, which are datetime.datetime |
| 128 objects, and enclosures, which is an iterable of instances of the |
| 129 Enclosure class. |
| 130 """ |
| 131 def to_unicode(s): |
| 132 return force_text(s, strings_only=True) |
| 133 if categories: |
| 134 categories = [to_unicode(c) for c in categories] |
| 135 if ttl is not None: |
| 136 # Force ints to unicode |
| 137 ttl = force_text(ttl) |
| 138 if enclosure is None: |
| 139 enclosures = [] if enclosures is None else enclosures |
| 140 else: |
| 141 warnings.warn( |
| 142 "The enclosure keyword argument is deprecated, " |
| 143 "use enclosures instead.", |
| 144 RemovedInDjango20Warning, |
| 145 stacklevel=2, |
| 146 ) |
| 147 enclosures = [enclosure] |
| 148 item = { |
| 149 'title': to_unicode(title), |
| 150 'link': iri_to_uri(link), |
| 151 'description': to_unicode(description), |
| 152 'author_email': to_unicode(author_email), |
| 153 'author_name': to_unicode(author_name), |
| 154 'author_link': iri_to_uri(author_link), |
| 155 'pubdate': pubdate, |
| 156 'updateddate': updateddate, |
| 157 'comments': to_unicode(comments), |
| 158 'unique_id': to_unicode(unique_id), |
| 159 'unique_id_is_permalink': unique_id_is_permalink, |
| 160 'enclosures': enclosures, |
| 161 'categories': categories or (), |
| 162 'item_copyright': to_unicode(item_copyright), |
| 163 'ttl': ttl, |
| 164 } |
| 165 item.update(kwargs) |
| 166 self.items.append(item) |
| 167 |
| 168 def num_items(self): |
| 169 return len(self.items) |
| 170 |
| 171 def root_attributes(self): |
| 172 """ |
| 173 Return extra attributes to place on the root (i.e. feed/channel) element
. |
| 174 Called from write(). |
| 175 """ |
| 176 return {} |
| 177 |
| 178 def add_root_elements(self, handler): |
| 179 """ |
| 180 Add elements in the root (i.e. feed/channel) element. Called |
| 181 from write(). |
| 182 """ |
| 183 pass |
| 184 |
| 185 def item_attributes(self, item): |
| 186 """ |
| 187 Return extra attributes to place on each item (i.e. item/entry) element. |
| 188 """ |
| 189 return {} |
| 190 |
| 191 def add_item_elements(self, handler, item): |
| 192 """ |
| 193 Add elements on each item (i.e. item/entry) element. |
| 194 """ |
| 195 pass |
| 196 |
| 197 def write(self, outfile, encoding): |
| 198 """ |
| 199 Outputs the feed in the given encoding to outfile, which is a file-like |
| 200 object. Subclasses should override this. |
| 201 """ |
| 202 raise NotImplementedError('subclasses of SyndicationFeed must provide a
write() method') |
| 203 |
| 204 def writeString(self, encoding): |
| 205 """ |
| 206 Returns the feed in the given encoding as a string. |
| 207 """ |
| 208 s = StringIO() |
| 209 self.write(s, encoding) |
| 210 return s.getvalue() |
| 211 |
| 212 def latest_post_date(self): |
| 213 """ |
| 214 Returns the latest item's pubdate or updateddate. If no items |
| 215 have either of these attributes this returns the current UTC date/time. |
| 216 """ |
| 217 latest_date = None |
| 218 date_keys = ('updateddate', 'pubdate') |
| 219 |
| 220 for item in self.items: |
| 221 for date_key in date_keys: |
| 222 item_date = item.get(date_key) |
| 223 if item_date: |
| 224 if latest_date is None or item_date > latest_date: |
| 225 latest_date = item_date |
| 226 |
| 227 # datetime.now(tz=utc) is slower, as documented in django.utils.timezone
.now |
| 228 return latest_date or datetime.datetime.utcnow().replace(tzinfo=utc) |
| 229 |
| 230 |
| 231 class Enclosure(object): |
| 232 "Represents an RSS enclosure" |
| 233 def __init__(self, url, length, mime_type): |
| 234 "All args are expected to be Python Unicode objects" |
| 235 self.length, self.mime_type = length, mime_type |
| 236 self.url = iri_to_uri(url) |
| 237 |
| 238 |
| 239 class RssFeed(SyndicationFeed): |
| 240 content_type = 'application/rss+xml; charset=utf-8' |
| 241 |
| 242 def write(self, outfile, encoding): |
| 243 handler = SimplerXMLGenerator(outfile, encoding) |
| 244 handler.startDocument() |
| 245 handler.startElement("rss", self.rss_attributes()) |
| 246 handler.startElement("channel", self.root_attributes()) |
| 247 self.add_root_elements(handler) |
| 248 self.write_items(handler) |
| 249 self.endChannelElement(handler) |
| 250 handler.endElement("rss") |
| 251 |
| 252 def rss_attributes(self): |
| 253 return {"version": self._version, |
| 254 "xmlns:atom": "http://www.w3.org/2005/Atom"} |
| 255 |
| 256 def write_items(self, handler): |
| 257 for item in self.items: |
| 258 handler.startElement('item', self.item_attributes(item)) |
| 259 self.add_item_elements(handler, item) |
| 260 handler.endElement("item") |
| 261 |
| 262 def add_root_elements(self, handler): |
| 263 handler.addQuickElement("title", self.feed['title']) |
| 264 handler.addQuickElement("link", self.feed['link']) |
| 265 handler.addQuickElement("description", self.feed['description']) |
| 266 if self.feed['feed_url'] is not None: |
| 267 handler.addQuickElement("atom:link", None, {"rel": "self", "href": s
elf.feed['feed_url']}) |
| 268 if self.feed['language'] is not None: |
| 269 handler.addQuickElement("language", self.feed['language']) |
| 270 for cat in self.feed['categories']: |
| 271 handler.addQuickElement("category", cat) |
| 272 if self.feed['feed_copyright'] is not None: |
| 273 handler.addQuickElement("copyright", self.feed['feed_copyright']) |
| 274 handler.addQuickElement("lastBuildDate", rfc2822_date(self.latest_post_d
ate())) |
| 275 if self.feed['ttl'] is not None: |
| 276 handler.addQuickElement("ttl", self.feed['ttl']) |
| 277 |
| 278 def endChannelElement(self, handler): |
| 279 handler.endElement("channel") |
| 280 |
| 281 @property |
| 282 def mime_type(self): |
| 283 warnings.warn( |
| 284 'The mime_type attribute of RssFeed is deprecated. ' |
| 285 'Use content_type instead.', |
| 286 RemovedInDjango20Warning, stacklevel=2 |
| 287 ) |
| 288 return self.content_type |
| 289 |
| 290 |
| 291 class RssUserland091Feed(RssFeed): |
| 292 _version = "0.91" |
| 293 |
| 294 def add_item_elements(self, handler, item): |
| 295 handler.addQuickElement("title", item['title']) |
| 296 handler.addQuickElement("link", item['link']) |
| 297 if item['description'] is not None: |
| 298 handler.addQuickElement("description", item['description']) |
| 299 |
| 300 |
| 301 class Rss201rev2Feed(RssFeed): |
| 302 # Spec: http://blogs.law.harvard.edu/tech/rss |
| 303 _version = "2.0" |
| 304 |
| 305 def add_item_elements(self, handler, item): |
| 306 handler.addQuickElement("title", item['title']) |
| 307 handler.addQuickElement("link", item['link']) |
| 308 if item['description'] is not None: |
| 309 handler.addQuickElement("description", item['description']) |
| 310 |
| 311 # Author information. |
| 312 if item["author_name"] and item["author_email"]: |
| 313 handler.addQuickElement("author", "%s (%s)" % (item['author_email'],
item['author_name'])) |
| 314 elif item["author_email"]: |
| 315 handler.addQuickElement("author", item["author_email"]) |
| 316 elif item["author_name"]: |
| 317 handler.addQuickElement( |
| 318 "dc:creator", item["author_name"], {"xmlns:dc": "http://purl.org
/dc/elements/1.1/"} |
| 319 ) |
| 320 |
| 321 if item['pubdate'] is not None: |
| 322 handler.addQuickElement("pubDate", rfc2822_date(item['pubdate'])) |
| 323 if item['comments'] is not None: |
| 324 handler.addQuickElement("comments", item['comments']) |
| 325 if item['unique_id'] is not None: |
| 326 guid_attrs = {} |
| 327 if isinstance(item.get('unique_id_is_permalink'), bool): |
| 328 guid_attrs['isPermaLink'] = str(item['unique_id_is_permalink']).
lower() |
| 329 handler.addQuickElement("guid", item['unique_id'], guid_attrs) |
| 330 if item['ttl'] is not None: |
| 331 handler.addQuickElement("ttl", item['ttl']) |
| 332 |
| 333 # Enclosure. |
| 334 if item['enclosures']: |
| 335 enclosures = list(item['enclosures']) |
| 336 if len(enclosures) > 1: |
| 337 raise ValueError( |
| 338 "RSS feed items may only have one enclosure, see " |
| 339 "http://www.rssboard.org/rss-profile#element-channel-item-en
closure" |
| 340 ) |
| 341 enclosure = enclosures[0] |
| 342 handler.addQuickElement('enclosure', '', { |
| 343 'url': enclosure.url, |
| 344 'length': enclosure.length, |
| 345 'type': enclosure.mime_type, |
| 346 }) |
| 347 |
| 348 # Categories. |
| 349 for cat in item['categories']: |
| 350 handler.addQuickElement("category", cat) |
| 351 |
| 352 |
| 353 class Atom1Feed(SyndicationFeed): |
| 354 # Spec: https://tools.ietf.org/html/rfc4287 |
| 355 content_type = 'application/atom+xml; charset=utf-8' |
| 356 ns = "http://www.w3.org/2005/Atom" |
| 357 |
| 358 def write(self, outfile, encoding): |
| 359 handler = SimplerXMLGenerator(outfile, encoding) |
| 360 handler.startDocument() |
| 361 handler.startElement('feed', self.root_attributes()) |
| 362 self.add_root_elements(handler) |
| 363 self.write_items(handler) |
| 364 handler.endElement("feed") |
| 365 |
| 366 def root_attributes(self): |
| 367 if self.feed['language'] is not None: |
| 368 return {"xmlns": self.ns, "xml:lang": self.feed['language']} |
| 369 else: |
| 370 return {"xmlns": self.ns} |
| 371 |
| 372 def add_root_elements(self, handler): |
| 373 handler.addQuickElement("title", self.feed['title']) |
| 374 handler.addQuickElement("link", "", {"rel": "alternate", "href": self.fe
ed['link']}) |
| 375 if self.feed['feed_url'] is not None: |
| 376 handler.addQuickElement("link", "", {"rel": "self", "href": self.fee
d['feed_url']}) |
| 377 handler.addQuickElement("id", self.feed['id']) |
| 378 handler.addQuickElement("updated", rfc3339_date(self.latest_post_date())
) |
| 379 if self.feed['author_name'] is not None: |
| 380 handler.startElement("author", {}) |
| 381 handler.addQuickElement("name", self.feed['author_name']) |
| 382 if self.feed['author_email'] is not None: |
| 383 handler.addQuickElement("email", self.feed['author_email']) |
| 384 if self.feed['author_link'] is not None: |
| 385 handler.addQuickElement("uri", self.feed['author_link']) |
| 386 handler.endElement("author") |
| 387 if self.feed['subtitle'] is not None: |
| 388 handler.addQuickElement("subtitle", self.feed['subtitle']) |
| 389 for cat in self.feed['categories']: |
| 390 handler.addQuickElement("category", "", {"term": cat}) |
| 391 if self.feed['feed_copyright'] is not None: |
| 392 handler.addQuickElement("rights", self.feed['feed_copyright']) |
| 393 |
| 394 def write_items(self, handler): |
| 395 for item in self.items: |
| 396 handler.startElement("entry", self.item_attributes(item)) |
| 397 self.add_item_elements(handler, item) |
| 398 handler.endElement("entry") |
| 399 |
| 400 def add_item_elements(self, handler, item): |
| 401 handler.addQuickElement("title", item['title']) |
| 402 handler.addQuickElement("link", "", {"href": item['link'], "rel": "alter
nate"}) |
| 403 |
| 404 if item['pubdate'] is not None: |
| 405 handler.addQuickElement('published', rfc3339_date(item['pubdate'])) |
| 406 |
| 407 if item['updateddate'] is not None: |
| 408 handler.addQuickElement('updated', rfc3339_date(item['updateddate'])
) |
| 409 |
| 410 # Author information. |
| 411 if item['author_name'] is not None: |
| 412 handler.startElement("author", {}) |
| 413 handler.addQuickElement("name", item['author_name']) |
| 414 if item['author_email'] is not None: |
| 415 handler.addQuickElement("email", item['author_email']) |
| 416 if item['author_link'] is not None: |
| 417 handler.addQuickElement("uri", item['author_link']) |
| 418 handler.endElement("author") |
| 419 |
| 420 # Unique ID. |
| 421 if item['unique_id'] is not None: |
| 422 unique_id = item['unique_id'] |
| 423 else: |
| 424 unique_id = get_tag_uri(item['link'], item['pubdate']) |
| 425 handler.addQuickElement("id", unique_id) |
| 426 |
| 427 # Summary. |
| 428 if item['description'] is not None: |
| 429 handler.addQuickElement("summary", item['description'], {"type": "ht
ml"}) |
| 430 |
| 431 # Enclosures. |
| 432 for enclosure in item['enclosures']: |
| 433 handler.addQuickElement('link', '', { |
| 434 'rel': 'enclosure', |
| 435 'href': enclosure.url, |
| 436 'length': enclosure.length, |
| 437 'type': enclosure.mime_type, |
| 438 }) |
| 439 |
| 440 # Categories. |
| 441 for cat in item['categories']: |
| 442 handler.addQuickElement("category", "", {"term": cat}) |
| 443 |
| 444 # Rights. |
| 445 if item['item_copyright'] is not None: |
| 446 handler.addQuickElement("rights", item['item_copyright']) |
| 447 |
| 448 @property |
| 449 def mime_type(self): |
| 450 warnings.warn( |
| 451 'The mime_type attribute of Atom1Feed is deprecated. ' |
| 452 'Use content_type instead.', |
| 453 RemovedInDjango20Warning, stacklevel=2 |
| 454 ) |
| 455 return self.content_type |
| 456 |
| 457 |
| 458 # This isolates the decision of what the system default is, so calling code can |
| 459 # do "feedgenerator.DefaultFeed" instead of "feedgenerator.Rss201rev2Feed". |
| 460 DefaultFeed = Rss201rev2Feed |
OLD | NEW |