1 # Blosxom Plugin: atomfeed -*-cperl-*-
2 # Author(s): Original plugin: Rael Dornfest <rael@oreilly.com>
3 # XML::Parser: Sam Ruby
4 # UTC and <modified> fixes for 0.3: Frank Hecker
5 # Enclosures support: Dave Slusher and Keith Irwin
6 # Upgrade to Atom 1.0 spec: Sam Pearson
8 # Docs: Included below: type "perldoc atomfeed", or scroll down
9 # Blosxom Home/Docs/Licensing: http://www.raelity.org/apps/blosxom/
10 # Blosxom Plugin Docs: http://www.raelity.org/apps/blosxom/plugin.shtml
14 # ----- Mandatory configurable variables -----
16 # For a basic atom feed, you only need set $default_author and $feed_yr.
17 # If you do not, the plugin will exit quietly.
18 # All other configuration is optional, and can be safely ignored.
20 # Who would you like your feed to credit as the default author of each entry?
21 # Leave blank and the atomfeed plugin will attempt to use the whoami and
25 # What year was your weblog started? This will be used
26 # to form part of your weblog's unique ID.
29 # ----- Optional configurable variables -----
31 # What is the default author's URL?
32 # Blank defaults to $blosxom::url
35 # What is the default author's email address?
36 # Leave blank to exclude.
39 # Copyright statement:
40 # leave blank to exclude.
43 # What domain should Blosxom use in ID tags?
44 # Leave blank if you don't understand or for Blosxom to use the domain in $url.
48 # Set the URL of the atom feed here. Defaults to $blosxom::url/index.atom
52 # Put the URL for a site icon here (for example, your site's favicon). Leave blank to exclude.
56 # Set to the URL for your site logo. Leave blank to exclude.
59 # What template placeholder in your flavour template should I replace with feed-level <updated>?
60 # If you are using the built-in templates, leave this alone.
61 my $template_placeholder = "{{{updated}}}";
66 # You can add enclosures to your atom feed by linking to them in your post
67 # and giving the anchor tag a rel attribute of "enclosure".
69 # Set $use_full_enclosures to 1 if you wish to add length and content-type
70 # to your enclosures. This function relies upon your webserver having
71 # LWP modules installed.
72 $use_full_enclosures = '0';
74 # Name of a file to cache info about your enclosures:
75 $DataFile = "$blosxom::plugin_state_dir/enclosures.dat";
80 # If you have a stylesheet to associate with your atom feed, place it's URL here.
83 # You can specify the type of stylesheet here:
84 $css_type = "text/css";
88 # --- Plug-in package variables -----
95 # Try to glean the domain from $url
96 $id_domain or ($id_domain) = $blosxom::url =~ m#http://(?:www\.)?([^\/]+)#;
100 use vars qw/$feed_utc_date/;
105 # ----- plugin subroutines -----
109 # Check for our two mandatory variables:
110 unless ( ( eval { whoami::start() or fauxami::start() } or $default_author ) and $feed_yr ) {
111 warn 'Blosxom plugin: atomfeed > Please set $default_author and $feed_yr. Exiting.\n';
115 # Check for the existence of already-loaded flavour templates or theme,
116 # loading templates if there's nothing:
117 # Note that it looks like this condition should *never* be met, so why
118 # did Rael put this code here? Can't we just do _load_templates();
120 $blosxom::template{'atom'}{'head'} or _load_templates();
122 # changed to require from use to make plugin work for those
123 # without XML::Parser. Consequence: entries will never be labelled
124 # type='xhtml', only 'text' or 'html'. Thanks, S2!
125 eval { require XML::Parser; $parser = new XML::Parser; };
127 %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
128 $escape_re = join '|' => keys %escape;
130 foreach ( keys %escape ) { $unescape{$escape{$_}} = $_; }
131 $unescape_re = join '|' => keys %unescape;
133 # If required, initialise the enclosures data cache:
134 $use_full_enclosures and _load_cache();
141 # Make adjustments to plugin variables here, so that users
142 # can modify their defaults using the config and prefs plugins.
143 # Note that these plugins will have to run *before* atomfeed for this to work as intended.
145 $css_url and $css_url = "\n<?xml-stylesheet href=\"$css_url\" type=\"$css_type\"?>";
147 $feed_url or $feed_url = "$blosxom::url/index.atom";
149 $copyright and $copyright = "<rights>$copyright</rights>";
151 $author_uri or $author_uri = "$blosxom::url";
152 $author_uri = "<uri>$author_uri</uri>";
154 $author_email and $author_email = "\n <email>$author_email</email>";
155 $icon_url and $icon_url = "<icon>$icon_url</icon>";
156 $logo_url and $logo_url = "<logo>$logo_url</logo>";
158 # Check and prepare a <title> and <subtitle>:
160 ($blog_title_type, $blog_title) = _parse_markup($blosxom::blog_title);
161 ($blog_description_type, $blog_description) = _parse_markup($blosxom::blog_description);
168 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
174 $category = "<category term=\"$path\"/>";
177 # <published>: derive from %blosxom::files
178 my @published_utc = gmtime($blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"});
179 $published_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
180 $published_utc[5]+1900,
186 # <updated>: derive by stat()ing the file for its mtime:
187 my @updated_utc = gmtime(stat("$blosxom::datadir$path/$filename.$blosxom::file_extension")->mtime);
188 $updated_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
189 $updated_utc[5]+1900,
195 # Date/time of most recently-modified story becomes date/time of the feed.
196 $feed_utc_date = $updated_utc_date if $updated_utc_date > $feed_utc_date;
198 # use %blosxom::files for the year component of feed-level <atom:id>
199 # in case the creation time is cached somewhere.
200 $utc_yr = $published_utc[5]+1900;
202 # Set authorship if available, falling back to $atomfeed::author
203 $author = $whoami::fullname || $fauxami::name || $default_author || '';
205 # Setup $summary. Adapted from Rael's foreshortened plugin.
206 # For simplicities sake, we're going to provide plaint text summaries.
207 $summary = $$body_ref;
209 $summary =~ s/<.+?>//gs;
210 # then unescape any entities:
211 $summary =~ s/($unescape_re)/$unescape{$1}/g;
212 # truncate to what looks like first sentence:
213 $summary =~ s/[\.\!\?].+$/.../s;
214 # Remove newlines and carriage returns:
215 $summary =~ s/[\r\n]/ /g;
216 # Prepare for use in tempate:
217 $summary = "<summary type=\"text\">$summary</summary>";
219 # take look through $$body_ref for any enclosures or via/related links:
220 my @anchors = ( $$body_ref =~ /(<a [^>]+>)/gis );
222 foreach my $anchor ( @anchors ) {
223 if ( $anchor =~ /rel\s*=\s*"?\s*(via|enclosure|related)"?/is ) {
226 if ( $anchor =~ /href\s*=\s*"([^"]+)"/is ) {
229 elsif ( $anchor =~ /href\s*=\s*([^\s]+)/is ) {
234 if ( $use_full_enclosures && ( $type eq "enclosure" ) ) {
235 my( $mime, $length );
236 # Check for presence of enclosure in $info:
237 unless ( $info->{$href} ) { _get_info($href); }
238 if ( $info->{$href} ) {
239 # Check again for data on enclosure in $info, just in case of problems getting it.
240 $mime = $info->{$href}->{type};
241 $length = $info->{$href}->{length};
242 $links .= " <link rel=\"$type\" href=\"$href\" type=\"$mime\" length=\"$length\"/>\n";
245 # Fall back on a basic link:
246 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
251 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
258 ($title_type, $title) = _parse_markup($$title_ref);
260 # Parse the post body:
261 ($body_type, $body) = _parse_markup($$body_ref);
267 my($pkg, $currentdir, $foot_ref) = @_;
268 # Replace the placeholder with the feed-level <updated> element:
269 $feed_utc_date = "<updated>$feed_utc_date</updated>";
270 $blosxom::output =~ s/$template_placeholder/$feed_utc_date/m;
274 # ----- private subroutines -----
278 # Pass in some test to parse, and I'll return a type and the text suitably configured.
282 # First, check to see if $text appears to contain markup.
283 # This regex should match any tag-like string: opening, closing or orphan tags.
284 if ( $text =~ m!</?[a-zA-Z0-9]+ ?/?>! ) {
285 # OK, looks like markup in there.
286 # Now, check to see if it looks well-formed:
287 if ( eval{$parser->parse("<div>$text</div>")}) {
288 # Yes? XHTML it is, then. I hope.
290 $text = "<div xmlns=\"http://www.w3.org/1999/xhtml\">$text</div>";
293 # No? Good old tag soup.
295 $text =~ s/($escape_re)/$escape{$1}/g;
299 # We'll assume it's plaintext then.
303 # Out go the results:
309 # Loads the data stored in $DataFile:
313 if( -e "$DataFile") {
314 open FH, "$DataFile" or return $info;
319 my ($url, $size, $type) = split (/ /, $_);
320 $info->{$url}->{length} = $size;
321 $info->{$url}->{type} = $type;
328 # Saves enclosure data structure in $info out to $DataFile
330 open FH, ">$DataFile" or return 0;
332 foreach $url (keys (%{$info})) {
333 print FH $url." ".$info->{$url}->{length} ." ". $info->{$url}->{type}."\n";
340 # Uses LWP to get content-type and content-length data
341 # for a given URL, adds this to the $info data structure
342 # and then calls _save_cache to preserve $info
343 return 0 unless eval "require LWP::UserAgent";
345 my $ua = LWP::UserAgent->new;
346 $ua->agent('BlosxomAtomFeed/0.5');
347 my $req = HTTP::Request->new(HEAD => "$url");
348 my $res = $ua->request($req);
350 if ( $res->is_success ){
351 $ct = $res->header('content-type');
352 $cl = $res->header('content-length');
353 $info->{$url}->{type} = $ct;
354 $info->{$url}->{length} = $cl;
361 sub _load_templates {
362 $blosxom::template{'atom'}{'content_type'} = 'application/atom+xml';
364 $blosxom::template{'atom'}{'date'} = "\n";
366 $blosxom::template{'atom'}{'head'} =<<'HEAD';
367 <?xml version="1.0" encoding="utf-8"?>$atomfeed::css_url
368 <feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://$atomfeed::id_domain">
369 <title type="$atomfeed::blog_title_type">$atomfeed::blog_title</title>
370 <subtitle type="$atomfeed::blog_description_type">$atomfeed::blog_description</subtitle>
371 <link rel="self" type="application/atom+xml" href="$atomfeed::feed_url"/>
372 <link rel="alternate" type="text/html" hreflang="$blosxom::blog_language" href="$blosxom::url" />
373 <id>tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::feed_yr$atomfeed::colon/$blosxom::path_info</id>
374 <generator uri="http://www.blosxom.com/" version="$blosxom::version">Blosxom</generator>
381 $blosxom::template{'atom'}{'story'} =<<'STORY';
383 <id>tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::utc_yr$atomfeed::colon$path/$fn</id>
384 <link rel="alternate" type="text/html" href="$blosxom::url$blosxom::path/$blosxom::fn.$blosxom::default_flavour" />$atomfeed::links
385 <title type="$atomfeed::title_type">$atomfeed::title</title>
386 <published>$atomfeed::published_utc_date</published>
387 <updated>$atomfeed::updated_utc_date</updated>
390 <name>$atomfeed::author</name>
391 $atomfeed::author_uri$atomfeed::author_email
393 <content type="$atomfeed::body_type" xml:base="http://$atomfeed::id_domain" xml:lang="$blosxom::blog_language">
400 $blosxom::template{'atom'}{'foot'} =<<'FOOT';
413 Blosxom Plug-in: atomfeed
417 Provides an Atom 1.0 feed of your weblog.
419 The plugin has all you need right on-board, including the appropriate
420 flavour template components and a couple-three configuration
423 It supports the majorty of the Atom 1.0 spec exluding the <source>
424 element, which seems intended for use in feeds that contain items
425 aggregated from other feeds, and currently the <contributor> element,
426 which could be included using the meta plugin.
428 Point you browser/Atom feed reader at http://yoururl/index.atom.
436 Rael Dornfest <rael@oreilly.com>, http://www.raelity.org/
437 - wrote the original plugin based on the 0.3 spec
439 Sam Ruby <sam@intertwingly.net>, http://www.intertwingly.net/
440 - contributed the XML::Parser magic
442 Frank Hecker <hecker@hecker.org>, http://www.hecker.org/
443 - contributed patches for Atom 0.3 compliance, UTC date/time fix
445 Sam Pearson <sam@sgp.me.uk>, http://sgp.me.uk/
446 - Upgraded the plugin to handle Atom 1.0
448 Additional code was incorporated in the Atom 1.0 revision from the
449 enclosures plugin originally written by:
451 Dave Slusher, http://www.evilgeniuschronicles.org/wordpress/ and Keith
452 Irwin, http://www.asyserver.com/~kirwin/.
456 To get an Atom feed up and running in a jiffy, you need only set the
457 following variables and drop the plugin into your plugins directory:
459 B<$default_author> is where you specify who to credit as the default
460 author of each entry. This can be overidden with the value provided
461 by the B<whoami> or B<fauxami> plugins.
463 B<$feed_yr> is where you specify the year your site began. This is
464 important as atomfeed needs to create a unique, unchanging ID for
465 your weblog and it need this information to do so.
467 Everything else is optional.
471 There are a lot of variables available in the plugin you can use to
472 customise your Atom feed. These are all listed under B<CONFIGURABLE
473 VARIABLES>, below, with some notes as to their intended usage. Some
474 have defaults already specified, others will silently be excluded
477 As there are some variables generated entirely by the plugin, and as
478 some of the configurable variables are modified by the plugin, there
479 is also a complete list of all the variables available for use in
480 templates with notes on their form under B<TEMPLATE VARIABLES>.
482 If you wish to include enclosures or other types of <link> element in
483 your feed, see the section B<ENCLOSURES AND LINK ELEMENTS>, below.
485 Although you can use this plugin without anything other than blosxom
486 itself and a standard perl installation, it will perform better with
487 some optional extras available. See B<PERL MODULES> and B<OTHER
488 PLUGINS> for more information, particularly if you intend to use the
489 B<config> or B<prefs> plugins, any plugin that modifies your posts'
490 actual content (particularly by introducing markup), or any plugin
491 that operates on Blosxom's variable interpolation, such as
492 B<interpolate_fancy>.
496 In addition to B<$default_author> and B<$feed_yr>, the plugin has the
497 following user-configurable variables. Note that when setting
498 variables that are to be used at feed level and that contain URLs, any
499 relative URLs will be interpreted in relation to the value of the
500 variable B<$id_domain>. This is also true of any URLs included in
503 B<$author_uri> provides a URI for your default author. If you
504 leave this blank, it defaults to B<$blosxom::url>.
506 B<$author_email> Set this if you wish to include an email address for
507 the author of each entry. Leave it blank to exclude this element of
510 B<$copyright> Set this variable to a statement of copyright for your
511 site. Leave blank to exclude.
513 B<$id_domain> Atom associates unique ID tags with the feed itself and
514 individual entries. By default it'll attempt to glean your domain
515 from the specified or calculated value of B<$blosxom::url>, but you can
516 override this by setting this variable.
518 B<$feed_url> Atom feeds contain pointers to themselves, so you can set
519 this variable to the location of your atom feed. If you leave in
520 blank, it will use B<$blosxom::url/index.atom>, which in most cases will
523 B<$icon_url> Set this variable to the URL of an icon to associate with
524 your site. This should be a small image with a 1:1 aspect ratio -
525 favicons are ideal. Leave blank to exclude.
527 B<$logo_url> Set this variable to the URL of a logo to associate with
528 your site. This can be larger than the icon, and should have an
529 aspect ratio of 2:1. Leave blank to exclude.
531 B<$template_placeholder> Set this varibale to the string used in your
532 head.atom flavour template to identify where you would like the
533 feed-level updated element to appear. If you are using the built-in
534 templates, there is no need to change the default value.
536 B<$use_full_enclosures> If you are including enclosures in your Atom
537 feed, set this variable to 1 if you would like to include length and
538 type attributes. This requires that you have the LWP modules
539 installed on your webserver to work. See B<ENCLOSURES AND LINK
540 ELEMENTS>, below, for more information.
542 B<$DataFile> Set this variable to the name of a file where length and
543 type data on your enclosures is stored.
545 B<$css_url> Set this variable to the location of a stylesheet you
546 would like to have applied to your Atom feed. Leave blank to exclude
549 B<$css_type> Set this variable to the correct MIME type for the
550 stylesheet you are including in your feed. Defaults to 'text/css'.
554 The following notes will be of use if you intend to create your own
555 atom flavour templates.
557 Note that some variables have the necessary markup included, while
558 others do not; it is stated clearly when a variable contains the
559 required markup. This is so that they can be included in templates
560 without leaving empty elements when they are not required.
562 B<$atomfeed::author> contains the contents for the author section's
565 B<$atomfeed::author_email> contains any <email> element for the
566 author. Includes the required opening and closing tags.
568 B<$atomfeed::author_uri> contains any <uri> element for the author.
569 Includes the required opening and closing tags.
571 B<$atomfeed::blog_description> contains the contents for the
574 B<$atomfeed::blog_description_type> contains the value for the type
575 attribute of the <subtitle> element of the feed.
577 B<$atomfeed::blog_title> contains the title of your blog, suitably
578 prepared for use as the content of the feed-level <title> element.
580 B<$atomfeed::blog_title_type> contains the value required for the type
581 attribute for the feed-level <title> element.
583 B<$atomfeed::body> contains the full text of the body of your weblog
584 post, suitably formatted for use as the contents of the <content>
587 B<$atomfeed::body_type> contains the value for the type attribute of
588 the <content> element.
590 B<$atomfeed::category> contains a <category> for an entry, derived
591 from a story's path. This variable contains the required opening and
594 B<$atomfeed::colon> simply contains a colon character, for use in the
595 <id> elements - helps avoid confusion with variable interpolation.
597 B<$atomfeed::copyright> contains any copyright statement. This
598 variable includes the required opening and closing tags.
600 B<$atomfeed::css_url> contains everything you need to link to a
601 stylesheet, including the required opening and closing tags. Note
602 that this element belongs before the opening <feed> tag, as it is a
605 B<$atomfeed::feed_url> contains the value for the href attribute of a
606 feed-level <link rel="self"> element which points back at the feed
609 B<$atomfeed::feed_yr> contains the year your weblog started.
611 B<$atomfeed::icon_url> contains a complete <icon> element, including
612 the required opening and closing tags.
614 B<$atomfeed::id_domain> contains the root domain for your weblog.
616 B<$atomfeed::links> contains all the via, related and enclosure links
617 for an entry. This variable contains all the required markup.
619 B<$atomfeed::logo_url> contains a complete <logo> element, including
620 the required opening and closing tags.
622 B<$atomfeed::published_utc_date> contains the timestamp for an entry
623 based on the value stored in the B<%blosxom::files> hash.
625 B<$atomfeed::summary> contains a trimmed <summary> element, including
626 the opening and closing tags. Derived by truncating the entry down to
627 the first sentence, similar to the B<foreshortened> plugin.
629 B<$atomfeed::title> contains the contents for the story-level <title>
632 B<$atomfeed::title_type> contains the value required for the type
633 attribute of the story-level <title> element.
635 B<$atomfeed::updated_utc_date> contains the timestamp for an entry
636 based on a direct stat on the story file itself.
638 B<$atomfeed::utc_yr> contains the year in which an entry was made,
639 based upon the value stored in the B<%blosxom::files> hash.
643 Atom provides an elegant method for expressing relationships between
644 different resources using the rel attribute of its <link> element.
645 This includes the method Atom uses to support enclosures, used to
646 deliver additional content - often audio or video data - to the
647 receipient of the feed.
649 To take advantage of this, the plugin supports rel attribute values of
650 via, related and enclosure. To have these included in your feed,
651 simply link the the resource in the body of your weblog post and make
652 sure that the anchor tag has an appropriate rel attribute of
653 enclosure, via or related, depending upon the kind of relationship you
656 Ideally, enclosures should also contain information on their length
657 (the size of the file) and MIME type. The atomfeed plugin will try to
658 determine this information if you set the B<$use_full_enclosures>
659 variable to '1'. To make sure this works correctly, you should link
660 to the anclosure using an absolute URL rather than a relative one -
661 "http://example.com/podcasts/july-05.mp3" instead of
662 "/podcasts/july-05.mp3" - even if the enclosure is hosted under the
665 If you are unsure as to whether your server has this module installed,
666 you should be able to experiment by setting the variable anyway, as
667 the plugin should continue to function even if it is not present.
671 This plugin will work at its best if your server has B<XML::Parser>
672 and B<LWP> modules installed, although it will function adequately
677 In order for the <published> and <updated> timestamps to make sense,
678 you should be running a plugin like B<entries_cache> that retains the
679 original timestamps of your entries and places them into the
680 B<%blosxom::files> hash. If you are not, you should remove the
681 <published> element from the story template.
683 The atomfeed plugin assumes you're not running any fancy interpolation
684 plugin (e.g. B<interpolate_fancy>) which changes the way variables are
685 specified in a template (e.g. <$foo /> rather than $foo). If you are
686 running B<interpolate_fancy> or the like use the B<config> plugin and
687 a config.atom file in your blosxom B<$datadir> consisting of:
689 $blosxom::plugins{"interpolate_fancy"} = 0;
691 Where "interpolate_fancy" is the name of the interpolation plugin
692 you're turning off _just for the atom feed_.
694 If you are planning on using the B<config> or B<prefs> plugins to alter
695 varibales in the atomfeed namespace, you will need to ensure that
696 these plugins run B<before> the atomfeed plugin. You can do this by
697 prefixing a number to the name of the relevant plugin, such as B<1config>
700 Similarly, if you are running any plugins that alter the content of
701 your posts - for example by escaping characters or adding markup -
702 these should also be set to run before atomfeed. Essentially, you
703 want atomfeed to get each post as it would be sent to a normal web
704 browser for it to work as intended.
708 Blosxom Home/Docs/Licensing:
709 http://www.raelity.org/apps/blosxom/
712 http://www.raelity.org/apps/blosxom/plugin.shtml
714 1.0 Update Release Notes:
715 http://sgp.me.uk/sam/2005/08/04/atom-for-blosxom
717 Atom 1.0 Specification:
718 http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
722 Address bug reports and comments to the Blosxom mailing list:
723 http://www.yahoogroups.com/group/blosxom
727 Blosxom and this Blosxom Plug-in
728 Copyright 2003, Rael Dornfest
730 Permission is hereby granted, free of charge, to any person obtaining
731 a copy of this software and associated documentation files (the
732 "Software"), to deal in the Software without restriction, including
733 without limitation the rights to use, copy, modify, merge, publish,
734 distribute, sublicense, and/or sell copies of the Software, and to
735 permit persons to whom the Software is furnished to do so, subject to
736 the following conditions:
738 The above copyright notice and this permission notice shall be
739 included in all copies or substantial portions of the Software.