1 # Blosxom Plugin: atomfeed -*-cperl-*-
2 # Author(s): Original plugin: Rael Dornfest <rael@oreilly.com>
3 # XML::Parser: Sam Ruby
4 # UTC and <modified> fixes for 0.3: Frank Hecker
5 # Enclosures support: Dave Slusher and Keith Irwin
6 # Upgrade to Atom 1.0 spec: Sam Pearson
7 # Replace $feed_url with $path_info_full: Gavin Carr
9 # Docs: Included below: type "perldoc atomfeed", or scroll down
10 # Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/
14 # ----- Mandatory configurable variables -----
16 # For a basic atom feed, you only need set $default_author and $feed_yr.
17 # If you do not, the plugin will exit quietly.
18 # All other configuration is optional, and can be safely ignored.
20 # Who would you like your feed to credit as the default author of each entry?
21 # Leave blank and the atomfeed plugin will attempt to use the whoami and
25 # What year was your weblog started? This will be used
26 # to form part of your weblog's unique ID.
29 # ----- Optional configurable variables -----
31 # What is the default author's URL?
32 # Blank defaults to $blosxom::url
35 # What is the default author's email address?
36 # Leave blank to exclude.
39 # Copyright statement:
40 # leave blank to exclude.
43 # What domain should Blosxom use in ID tags?
44 # Leave blank if you don't understand or for Blosxom to use the domain in $url.
48 # Put the URL for a site icon here (for example, your site's favicon). Leave blank to exclude.
52 # Set to the URL for your site logo. Leave blank to exclude.
55 # What template placeholder in your flavour template should I replace with feed-level <updated>?
56 # If you are using the built-in templates, leave this alone.
57 my $template_placeholder = "{{{updated}}}";
59 # Generator that produced this feed
60 $generator_url = "http://blosxom.sourceforge.net/";
65 # You can add enclosures to your atom feed by linking to them in your post
66 # and giving the anchor tag a rel attribute of "enclosure".
68 # Set $use_full_enclosures to 1 if you wish to add length and content-type
69 # to your enclosures. This function relies upon your webserver having
70 # LWP modules installed.
71 $use_full_enclosures = '0';
73 # Name of a file to cache info about your enclosures:
74 $DataFile = "$blosxom::plugin_state_dir/enclosures.dat";
79 # If you have a stylesheet to associate with your atom feed, place it's URL here.
82 # You can specify the type of stylesheet here:
83 $css_type = "text/css";
85 # ----- END OF CONFIGURABLE VARIABLES -----
89 # --- Plug-in package variables -----
96 # Try to glean the domain from $url
97 $id_domain or ($id_domain) = $blosxom::url =~ m#http://(?:www\.)?([^\/]+)#;
100 use vars qw/$feed_utc_date/;
105 # ----- plugin subroutines -----
109 # Check for our two mandatory variables:
110 unless ( ( eval { whoami::start() or fauxami::start() } or $default_author ) and $feed_yr ) {
111 warn 'Blosxom plugin: atomfeed > Please set $default_author and $feed_yr. Exiting.\n';
115 # Check for the existence of already-loaded flavour templates or theme,
116 # loading templates if there's nothing:
117 # Note that it looks like this condition should *never* be met, so why
118 # did Rael put this code here? Can't we just do _load_templates();
120 $blosxom::template{'atom'}{'head'} or _load_templates();
122 # changed to require from use to make plugin work for those
123 # without XML::Parser. Consequence: entries will never be labelled
124 # type='xhtml', only 'text' or 'html'. Thanks, S2!
125 eval { require XML::Parser; $parser = new XML::Parser; };
127 %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
128 $escape_re = join '|' => keys %escape;
130 foreach ( keys %escape ) { $unescape{$escape{$_}} = $_; }
131 $unescape_re = join '|' => keys %unescape;
133 # If required, initialise the enclosures data cache:
134 $use_full_enclosures and _load_cache();
141 # Make adjustments to plugin variables here, so that users
142 # can modify their defaults using the config and prefs plugins.
143 # Note that these plugins will have to run *before* atomfeed for this to work as intended.
145 $css_url and $css_url = "\n<?xml-stylesheet href=\"$css_url\" type=\"$css_type\"?>";
147 $copyright and $copyright = "<rights>$copyright</rights>";
149 $author_uri or $author_uri = "$blosxom::url";
150 $author_uri = "<uri>$author_uri</uri>";
152 $author_email and $author_email = "\n <email>$author_email</email>";
153 $icon_url and $icon_url = "<icon>$icon_url</icon>";
154 $logo_url and $logo_url = "<logo>$logo_url</logo>";
156 # Check and prepare a <title> and <subtitle>:
158 ($blog_title_type, $blog_title) = _parse_markup($blosxom::blog_title);
159 ($blog_description_type, $blog_description) = _parse_markup($blosxom::blog_description);
168 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
174 $category = "<category term=\"$path\"/>";
177 # <published>: derive from %blosxom::files
178 my @published_utc = gmtime($blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"});
179 $published_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
180 $published_utc[5]+1900,
186 # <updated>: derive by stat()ing the file for its mtime:
187 my @updated_utc = gmtime(stat("$blosxom::datadir$path/$filename.$blosxom::file_extension")->mtime);
188 $updated_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
189 $updated_utc[5]+1900,
195 # Date/time of most recently-modified story becomes date/time of the feed.
196 $feed_utc_date = $updated_utc_date if $updated_utc_date gt $feed_utc_date;
198 # use %blosxom::files for the year component of feed-level <atom:id>
199 # in case the creation time is cached somewhere.
200 $utc_yr = $published_utc[5]+1900;
202 # Set authorship if available, falling back to $atomfeed::author
203 $author = $whoami::fullname || $fauxami::name || $default_author || '';
205 # Setup $summary. Adapted from Rael's foreshortened plugin.
206 # For simplicities sake, we're going to provide plaint text summaries.
207 $summary = $$body_ref;
209 $summary =~ s/<.+?>//gs;
210 # then unescape any entities:
211 $summary =~ s/($unescape_re)/$unescape{$1}/g;
212 # truncate to what looks like first sentence:
213 $summary =~ s/[\.\!\?].+$/.../s;
214 # Remove newlines and carriage returns:
215 $summary =~ s/[\r\n]/ /g;
216 # Prepare for use in tempate:
217 $summary = "<summary type=\"text\">$summary</summary>";
219 # take look through $$body_ref for any enclosures or via/related links:
220 my @anchors = ( $$body_ref =~ /(<a [^>]+>)/gis );
222 foreach my $anchor ( @anchors ) {
223 if ( $anchor =~ /rel\s*=\s*"?\s*(via|enclosure|related)"?/is ) {
226 if ( $anchor =~ /href\s*=\s*"([^"]+)"/is ) {
229 elsif ( $anchor =~ /href\s*=\s*([^\s]+)/is ) {
234 if ( $use_full_enclosures && ( $type eq "enclosure" ) ) {
235 my( $mime, $length );
236 # Check for presence of enclosure in $info:
237 unless ( $info->{$href} ) { _get_info($href); }
238 if ( $info->{$href} ) {
239 # Check again for data on enclosure in $info, just in case of problems getting it.
240 $mime = $info->{$href}->{type};
241 $length = $info->{$href}->{length};
242 $links .= " <link rel=\"$type\" href=\"$href\" type=\"$mime\" length=\"$length\"/>\n";
245 # Fall back on a basic link:
246 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
251 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
258 ($title_type, $title) = _parse_markup($$title_ref);
260 # Parse the post body:
261 ($body_type, $body) = _parse_markup($$body_ref);
267 my($pkg, $currentdir, $foot_ref) = @_;
268 # Replace the placeholder with the feed-level <updated> element:
269 $feed_utc_date = "<updated>$feed_utc_date</updated>";
270 $blosxom::output =~ s/$template_placeholder/$feed_utc_date/m;
274 # ----- private subroutines -----
278 # Pass in some test to parse, and I'll return a type and the text suitably configured.
282 # First, check to see if $text appears to contain markup.
283 # This regex should match any tag-like string: opening, closing or orphan tags.
284 if ( $text =~ m!</?[a-zA-Z0-9]+ ?/?>! ) {
285 # OK, looks like markup in there.
286 # Now, check to see if it looks well-formed:
287 if ( eval{$parser->parse("<div>$text</div>")}) {
288 # Yes? XHTML it is, then. I hope.
290 $text = "<div xmlns=\"http://www.w3.org/1999/xhtml\">$text</div>";
293 # No? Good old tag soup.
295 $text =~ s/($escape_re)/$escape{$1}/g;
299 # We'll assume it's plaintext then.
303 # Out go the results:
309 # Loads the data stored in $DataFile:
313 if( -e "$DataFile") {
314 open FH, "$DataFile" or return $info;
319 my ($url, $size, $type) = split (/ /, $_);
320 $info->{$url}->{length} = $size;
321 $info->{$url}->{type} = $type;
328 # Saves enclosure data structure in $info out to $DataFile
330 open FH, ">$DataFile" or return 0;
332 foreach $url (keys (%{$info})) {
333 print FH $url." ".$info->{$url}->{length} ." ". $info->{$url}->{type}."\n";
340 # Uses LWP to get content-type and content-length data
341 # for a given URL, adds this to the $info data structure
342 # and then calls _save_cache to preserve $info
343 return 0 unless eval "require LWP::UserAgent";
345 my $ua = LWP::UserAgent->new;
346 $ua->agent('BlosxomAtomFeed/0.5');
347 my $req = HTTP::Request->new(HEAD => "$url");
348 my $res = $ua->request($req);
350 if ( $res->is_success ){
351 $ct = $res->header('content-type');
352 $cl = $res->header('content-length');
353 $info->{$url}->{type} = $ct;
354 $info->{$url}->{length} = $cl;
361 sub _load_templates {
362 $blosxom::template{'atom'}{'content_type'} = 'application/atom+xml';
364 $blosxom::template{'atom'}{'date'} = "\n";
366 my $path_info_full = $blosxom::path_info_full || "$blosxom::path_info/index.atom";
367 $blosxom::template{'atom'}{'head'} =<<HEAD;
368 <?xml version="1.0" encoding="utf-8"?>\$atomfeed::css_url
369 <feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://\$atomfeed::id_domain">
370 <title type="\$atomfeed::blog_title_type">\$atomfeed::blog_title</title>
371 <subtitle type="\$atomfeed::blog_description_type">\$atomfeed::blog_description</subtitle>
372 <link rel="self" type="application/atom+xml" href="$blosxom::url$path_info_full" />
373 <link rel="alternate" type="text/html" hreflang="$blosxom::blog_language" href="$blosxom::url" />
374 <id>tag\$atomfeed::colon\$atomfeed::id_domain,\$atomfeed::feed_yr\$atomfeed::colon/$blosxom::path_info</id>
375 <generator uri="\$atomfeed::generator_url" version="$blosxom::version">Blosxom</generator>
376 \$atomfeed::copyright
382 $blosxom::template{'atom'}{'story'} =<<'STORY';
384 <id>tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::utc_yr$atomfeed::colon$path/$fn</id>
385 <link rel="alternate" type="text/html" href="$blosxom::url$blosxom::path/$blosxom::fn.$blosxom::default_flavour" />$atomfeed::links
386 <title type="$atomfeed::title_type">$atomfeed::title</title>
387 <published>$atomfeed::published_utc_date</published>
388 <updated>$atomfeed::updated_utc_date</updated>
391 <name>$atomfeed::author</name>
392 $atomfeed::author_uri$atomfeed::author_email
394 <content type="$atomfeed::body_type" xml:base="http://$atomfeed::id_domain" xml:lang="$blosxom::blog_language">
401 $blosxom::template{'atom'}{'foot'} =<<'FOOT';
414 Blosxom Plug-in: atomfeed
418 Provides an Atom 1.0 feed of your weblog.
420 The plugin has all you need right on-board, including the appropriate
421 flavour template components and a few configuration directives.
423 It supports the majority of the Atom 1.0 spec excluding the <source>
424 element, which seems intended for use in feeds that contain items
425 aggregated from other feeds, and currently the <contributor> element,
426 which could be included using the meta plugin.
428 Point your browser/feed reader at http://yoururl/index.atom.
436 Rael Dornfest <rael@oreilly.com>, http://www.raelity.org/
437 - wrote the original plugin based on the 0.3 spec
439 Sam Ruby <sam@intertwingly.net>, http://www.intertwingly.net/
440 - contributed the XML::Parser magic
442 Frank Hecker <hecker@hecker.org>, http://www.hecker.org/
443 - contributed patches for Atom 0.3 compliance, UTC date/time fix
445 Sam Pearson <sam@sgp.me.uk>, http://sgp.me.uk/
446 - Upgraded the plugin to handle Atom 1.0
448 Additional code was incorporated in the Atom 1.0 revision from the
449 enclosures plugin originally written by:
451 Dave Slusher, http://www.evilgeniuschronicles.org/wordpress/ and Keith
452 Irwin, http://www.asyserver.com/~kirwin/.
454 This plugin is now maintained by the Blosxom Sourceforge Team,
455 <blosxom-devel@lists.sourceforge.net>.
457 =head1 QUICKSTART INSTALLATION
459 To get an Atom feed up and running in a jiffy, you need only set the
460 following variables and drop the plugin into your plugins directory:
462 B<$default_author> is where you specify who to credit as the default
463 author of each entry. This can be overidden with the value provided
464 by the B<whoami> or B<fauxami> plugins.
466 B<$feed_yr> is where you specify the year your site began. This is
467 important as atomfeed needs to create a unique, unchanging ID for
468 your weblog and it need this information to do so.
470 Everything else is optional.
472 =head1 FURTHER CONFIGURATION
474 There are a lot of variables available in the plugin you can use to
475 customise your Atom feed. These are all listed under B<CONFIGURABLE
476 VARIABLES>, below, with some notes as to their intended usage. Some
477 have defaults already specified, others will silently be excluded
480 As there are some variables generated entirely by the plugin, and as
481 some of the configurable variables are modified by the plugin, there
482 is also a complete list of all the variables available for use in
483 templates with notes on their form under B<TEMPLATE VARIABLES>.
485 If you wish to include enclosures or other types of <link> element in
486 your feed, see the section B<ENCLOSURES AND LINK ELEMENTS>, below.
488 Although you can use this plugin without anything other than blosxom
489 itself and a standard perl installation, it will perform better with
490 some optional extras available. See B<PERL MODULES> and B<OTHER
491 PLUGINS> for more information, particularly if you intend to use the
492 B<config> or B<prefs> plugins, any plugin that modifies your posts'
493 actual content (particularly by introducing markup), or any plugin
494 that operates on Blosxom's variable interpolation, such as
495 B<interpolate_fancy>.
497 =head1 CONFIGURABLE VARIABLES
499 In addition to B<$default_author> and B<$feed_yr>, the plugin has the
500 following user-configurable variables. Note that when setting
501 variables that are to be used at feed level and that contain URLs, any
502 relative URLs will be interpreted in relation to the value of the
503 variable B<$id_domain>. This is also true of any URLs included in
506 B<$author_uri> provides a URI for your default author. If you
507 leave this blank, it defaults to B<$blosxom::url>.
509 B<$author_email> Set this if you wish to include an email address for
510 the author of each entry. Leave it blank to exclude this element of
513 B<$copyright> Set this variable to a statement of copyright for your
514 site. Leave blank to exclude.
516 B<$id_domain> Atom associates unique ID tags with the feed itself and
517 individual entries. By default it'll attempt to glean your domain
518 from the specified or calculated value of B<$blosxom::url>, but you can
519 override this by setting this variable.
521 B<$icon_url> Set this variable to the URL of an icon to associate with
522 your site. This should be a small image with a 1:1 aspect ratio -
523 favicons are ideal. Leave blank to exclude.
525 B<$logo_url> Set this variable to the URL of a logo to associate with
526 your site. This can be larger than the icon, and should have an
527 aspect ratio of 2:1. Leave blank to exclude.
529 B<$template_placeholder> Set this varibale to the string used in your
530 head.atom flavour template to identify where you would like the
531 feed-level updated element to appear. If you are using the built-in
532 templates, there is no need to change the default value.
534 B<$use_full_enclosures> If you are including enclosures in your Atom
535 feed, set this variable to 1 if you would like to include length and
536 type attributes. This requires that you have the LWP modules
537 installed on your webserver to work. See B<ENCLOSURES AND LINK
538 ELEMENTS>, below, for more information.
540 B<$DataFile> Set this variable to the name of a file where length and
541 type data on your enclosures is stored.
543 B<$css_url> Set this variable to the location of a stylesheet you
544 would like to have applied to your Atom feed. Leave blank to exclude
547 B<$css_type> Set this variable to the correct MIME type for the
548 stylesheet you are including in your feed. Defaults to 'text/css'.
550 =head1 TEMPLATE VARIABLES
552 The following notes will be of use if you intend to create your own
553 atom flavour templates.
555 Note that some variables have the necessary markup included, while
556 others do not; it is stated clearly when a variable contains the
557 required markup. This is so that they can be included in templates
558 without leaving empty elements when they are not required.
560 B<$atomfeed::author> contains the contents for the author section's
563 B<$atomfeed::author_email> contains any <email> element for the
564 author. Includes the required opening and closing tags.
566 B<$atomfeed::author_uri> contains any <uri> element for the author.
567 Includes the required opening and closing tags.
569 B<$atomfeed::blog_description> contains the contents for the
572 B<$atomfeed::blog_description_type> contains the value for the type
573 attribute of the <subtitle> element of the feed.
575 B<$atomfeed::blog_title> contains the title of your blog, suitably
576 prepared for use as the content of the feed-level <title> element.
578 B<$atomfeed::blog_title_type> contains the value required for the type
579 attribute for the feed-level <title> element.
581 B<$atomfeed::body> contains the full text of the body of your weblog
582 post, suitably formatted for use as the contents of the <content>
585 B<$atomfeed::body_type> contains the value for the type attribute of
586 the <content> element.
588 B<$atomfeed::category> contains a <category> for an entry, derived
589 from a story's path. This variable contains the required opening and
592 B<$atomfeed::colon> simply contains a colon character, for use in the
593 <id> elements - helps avoid confusion with variable interpolation.
595 B<$atomfeed::copyright> contains any copyright statement. This
596 variable includes the required opening and closing tags.
598 B<$atomfeed::css_url> contains everything you need to link to a
599 stylesheet, including the required opening and closing tags. Note
600 that this element belongs before the opening <feed> tag, as it is a
603 B<$atomfeed::feed_yr> contains the year your weblog started.
605 B<$atomfeed::icon_url> contains a complete <icon> element, including
606 the required opening and closing tags.
608 B<$atomfeed::id_domain> contains the root domain for your weblog.
610 B<$atomfeed::links> contains all the via, related and enclosure links
611 for an entry. This variable contains all the required markup.
613 B<$atomfeed::logo_url> contains a complete <logo> element, including
614 the required opening and closing tags.
616 B<$atomfeed::published_utc_date> contains the timestamp for an entry
617 based on the value stored in the B<%blosxom::files> hash.
619 B<$atomfeed::summary> contains a trimmed <summary> element, including
620 the opening and closing tags. Derived by truncating the entry down to
621 the first sentence, similar to the B<foreshortened> plugin.
623 B<$atomfeed::title> contains the contents for the story-level <title>
626 B<$atomfeed::title_type> contains the value required for the type
627 attribute of the story-level <title> element.
629 B<$atomfeed::updated_utc_date> contains the timestamp for an entry
630 based on a direct stat on the story file itself.
632 B<$atomfeed::utc_yr> contains the year in which an entry was made,
633 based upon the value stored in the B<%blosxom::files> hash.
635 =head1 ENCLOSURES AND LINK ELEMENTS
637 Atom provides an elegant method for expressing relationships between
638 different resources using the rel attribute of its <link> element.
639 This includes the method Atom uses to support enclosures, used to
640 deliver additional content - often audio or video data - to the
641 receipient of the feed.
643 To take advantage of this, the plugin supports rel attribute values of
644 via, related and enclosure. To have these included in your feed,
645 simply link the the resource in the body of your weblog post and make
646 sure that the anchor tag has an appropriate rel attribute of
647 enclosure, via or related, depending upon the kind of relationship you
650 Ideally, enclosures should also contain information on their length
651 (the size of the file) and MIME type. The atomfeed plugin will try to
652 determine this information if you set the B<$use_full_enclosures>
653 variable to '1'. To make sure this works correctly, you should link
654 to the anclosure using an absolute URL rather than a relative one -
655 "http://example.com/podcasts/july-05.mp3" instead of
656 "/podcasts/july-05.mp3" - even if the enclosure is hosted under the
659 If you are unsure as to whether your server has this module installed,
660 you should be able to experiment by setting the variable anyway, as
661 the plugin should continue to function even if it is not present.
665 This plugin will work at its best if your server has B<XML::Parser>
666 and B<LWP> modules installed, although it will function adequately
671 In order for the <published> and <updated> timestamps to make sense,
672 you should be running a plugin like B<entries_cache> that retains the
673 original timestamps of your entries and places them into the
674 B<%blosxom::files> hash. If you are not, you should remove the
675 <published> element from the story template.
677 The atomfeed plugin assumes you're not running any fancy interpolation
678 plugin (e.g. B<interpolate_fancy>) which changes the way variables are
679 specified in a template (e.g. <$foo /> rather than $foo). If you are
680 running B<interpolate_fancy> or the like use the B<config> plugin and
681 a config.atom file in your blosxom B<$datadir> consisting of:
683 $blosxom::plugins{"interpolate_fancy"} = 0;
685 Where "interpolate_fancy" is the name of the interpolation plugin
686 you're turning off _just for the atom feed_.
688 If you are planning on using the B<config> or B<prefs> plugins to alter
689 variables in the atomfeed namespace, you will need to ensure that
690 these plugins run B<before> the atomfeed plugin. You can do this by
691 prefixing a number to the name of the relevant plugin, such as B<1config>
694 Similarly, if you are running any plugins that alter the content of
695 your posts - for example by escaping characters or adding markup -
696 these should also be set to run before atomfeed. Essentially, you
697 want atomfeed to get each post as it would be sent to a normal web
698 browser for it to work as intended.
702 Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/
704 Blosxom Plugin Docs: http://blosxom.sourceforge.net/documentation/users/plugins.html
706 1.0 Update Release Notes:
707 http://sgp.me.uk/sam/2005/08/04/atom-for-blosxom
709 Atom 1.0 Specification:
710 http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
714 None known; please send bug reports and feedback to the Blosxom
715 development mailing list <blosxom-devel@lists.sourceforge.net>.
719 Blosxom and this Blosxom Plug-in
720 Copyright 2003, Rael Dornfest
722 Permission is hereby granted, free of charge, to any person obtaining
723 a copy of this software and associated documentation files (the
724 "Software"), to deal in the Software without restriction, including
725 without limitation the rights to use, copy, modify, merge, publish,
726 distribute, sublicense, and/or sell copies of the Software, and to
727 permit persons to whom the Software is furnished to do so, subject to
728 the following conditions:
730 The above copyright notice and this permission notice shall be
731 included in all copies or substantial portions of the Software.
733 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
734 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
735 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
736 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
737 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
738 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
739 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.