1 # Blosxom Plugin: atomfeed -*-cperl-*-
2 # Author(s): Original plugin: Rael Dornfest <rael@oreilly.com>
3 # XML::Parser: Sam Ruby
4 # UTC and <modified> fixes for 0.3: Frank Hecker
5 # Enclosures support: Dave Slusher and Keith Irwin
6 # Upgrade to Atom 1.0 spec: Sam Pearson
7 # Replace $feed_url with $path_info_full: Gavin Carr
9 # Docs: Included below: type "perldoc atomfeed", or scroll down
10 # Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/
14 # ----- Mandatory configurable variables -----
16 # For a basic atom feed, you only need set $default_author and $feed_yr.
17 # If you do not, the plugin will exit quietly.
18 # All other configuration is optional, and can be safely ignored.
20 # Who would you like your feed to credit as the default author of each entry?
21 # Leave blank and the atomfeed plugin will attempt to use the whoami and
25 # What year was your weblog started? This will be used
26 # to form part of your weblog's unique ID.
29 # ----- Optional configurable variables -----
31 # What is the default author's URL?
32 # Blank defaults to $blosxom::url
35 # What is the default author's email address?
36 # Leave blank to exclude.
39 # Copyright statement:
40 # leave blank to exclude.
43 # What domain should Blosxom use in ID tags?
44 # Leave blank if you don't understand or for Blosxom to use the domain in $url.
48 # Put the URL for a site icon here (for example, your site's favicon). Leave blank to exclude.
52 # Set to the URL for your site logo. Leave blank to exclude.
55 # What template placeholder in your flavour template should I replace with feed-level <updated>?
56 # If you are using the built-in templates, leave this alone.
57 my $template_placeholder = "{{{updated}}}";
59 # Generator that produced this feed
60 $generator_url = "http://blosxom.sourceforge.net/";
65 # You can add enclosures to your atom feed by linking to them in your post
66 # and giving the anchor tag a rel attribute of "enclosure".
68 # Set $use_full_enclosures to 1 if you wish to add length and content-type
69 # to your enclosures. This function relies upon your webserver having
70 # LWP modules installed.
71 $use_full_enclosures = '0';
73 # Name of a file to cache info about your enclosures:
74 $DataFile = "$blosxom::plugin_state_dir/enclosures.dat";
79 # If you have a stylesheet to associate with your atom feed, place it's URL here.
82 # You can specify the type of stylesheet here:
83 $css_type = "text/css";
85 # ----- END OF CONFIGURABLE VARIABLES -----
89 # --- Plug-in package variables -----
96 # Try to glean the domain from $url
97 $id_domain or ($id_domain) = $blosxom::url =~ m#http://(?:www\.)?([^\/]+)#;
101 use vars qw/$feed_utc_date/;
106 # ----- plugin subroutines -----
110 # Check for our two mandatory variables:
111 unless ( ( eval { whoami::start() or fauxami::start() } or $default_author ) and $feed_yr ) {
112 warn 'Blosxom plugin: atomfeed > Please set $default_author and $feed_yr. Exiting.\n';
116 # Check for the existence of already-loaded flavour templates or theme,
117 # loading templates if there's nothing:
118 # Note that it looks like this condition should *never* be met, so why
119 # did Rael put this code here? Can't we just do _load_templates();
121 $blosxom::template{'atom'}{'head'} or _load_templates();
123 # changed to require from use to make plugin work for those
124 # without XML::Parser. Consequence: entries will never be labelled
125 # type='xhtml', only 'text' or 'html'. Thanks, S2!
126 eval { require XML::Parser; $parser = new XML::Parser; };
128 %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"');
129 $escape_re = join '|' => keys %escape;
131 foreach ( keys %escape ) { $unescape{$escape{$_}} = $_; }
132 $unescape_re = join '|' => keys %unescape;
134 # If required, initialise the enclosures data cache:
135 $use_full_enclosures and _load_cache();
142 # Make adjustments to plugin variables here, so that users
143 # can modify their defaults using the config and prefs plugins.
144 # Note that these plugins will have to run *before* atomfeed for this to work as intended.
146 $css_url and $css_url = "\n<?xml-stylesheet href=\"$css_url\" type=\"$css_type\"?>";
148 $copyright and $copyright = "<rights>$copyright</rights>";
150 $author_uri or $author_uri = "$blosxom::url";
151 $author_uri = "<uri>$author_uri</uri>";
153 $author_email and $author_email = "\n <email>$author_email</email>";
154 $icon_url and $icon_url = "<icon>$icon_url</icon>";
155 $logo_url and $logo_url = "<logo>$logo_url</logo>";
157 # Check and prepare a <title> and <subtitle>:
159 ($blog_title_type, $blog_title) = _parse_markup($blosxom::blog_title);
160 ($blog_description_type, $blog_description) = _parse_markup($blosxom::blog_description);
167 my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
173 $category = "<category term=\"$path\"/>";
176 # <published>: derive from %blosxom::files
177 my @published_utc = gmtime($blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"});
178 $published_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
179 $published_utc[5]+1900,
185 # <updated>: derive by stat()ing the file for its mtime:
186 my @updated_utc = gmtime(stat("$blosxom::datadir$path/$filename.$blosxom::file_extension")->mtime);
187 $updated_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z",
188 $updated_utc[5]+1900,
194 # Date/time of most recently-modified story becomes date/time of the feed.
195 $feed_utc_date = $updated_utc_date if $updated_utc_date gt $feed_utc_date;
197 # use %blosxom::files for the year component of feed-level <atom:id>
198 # in case the creation time is cached somewhere.
199 $utc_yr = $published_utc[5]+1900;
201 # Set authorship if available, falling back to $atomfeed::author
202 $author = $whoami::fullname || $fauxami::name || $default_author || '';
204 # Setup $summary. Adapted from Rael's foreshortened plugin.
205 # For simplicities sake, we're going to provide plaint text summaries.
206 $summary = $$body_ref;
208 $summary =~ s/<.+?>//gs;
209 # then unescape any entities:
210 $summary =~ s/($unescape_re)/$unescape{$1}/g;
211 # truncate to what looks like first sentence:
212 $summary =~ s/[\.\!\?].+$/.../s;
213 # Remove newlines and carriage returns:
214 $summary =~ s/[\r\n]/ /g;
215 # Prepare for use in tempate:
216 $summary = "<summary type=\"text\">$summary</summary>";
218 # take look through $$body_ref for any enclosures or via/related links:
219 my @anchors = ( $$body_ref =~ /(<a [^>]+>)/gis );
221 foreach my $anchor ( @anchors ) {
222 if ( $anchor =~ /rel\s*=\s*"?\s*(via|enclosure|related)"?/is ) {
225 if ( $anchor =~ /href\s*=\s*"([^"]+)"/is ) {
228 elsif ( $anchor =~ /href\s*=\s*([^\s]+)/is ) {
233 if ( $use_full_enclosures && ( $type eq "enclosure" ) ) {
234 my( $mime, $length );
235 # Check for presence of enclosure in $info:
236 unless ( $info->{$href} ) { _get_info($href); }
237 if ( $info->{$href} ) {
238 # Check again for data on enclosure in $info, just in case of problems getting it.
239 $mime = $info->{$href}->{type};
240 $length = $info->{$href}->{length};
241 $links .= " <link rel=\"$type\" href=\"$href\" type=\"$mime\" length=\"$length\"/>\n";
244 # Fall back on a basic link:
245 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
250 $links .= " <link rel=\"$type\" href=\"$href\"/>\n";
257 ($title_type, $title) = _parse_markup($$title_ref);
259 # Parse the post body:
260 ($body_type, $body) = _parse_markup($$body_ref);
266 my($pkg, $currentdir, $foot_ref) = @_;
267 # Replace the placeholder with the feed-level <updated> element:
268 $feed_utc_date = "<updated>$feed_utc_date</updated>";
269 $blosxom::output =~ s/$template_placeholder/$feed_utc_date/m;
273 # ----- private subroutines -----
277 # Pass in some test to parse, and I'll return a type and the text suitably configured.
281 # First, check to see if $text appears to contain markup.
282 # This regex should match any tag-like string: opening, closing or orphan tags.
283 if ( $text =~ m!</?[a-zA-Z0-9]+ ?/?>! ) {
284 # OK, looks like markup in there.
285 # Now, check to see if it looks well-formed:
286 if ( eval{$parser->parse("<div>$text</div>")}) {
287 # Yes? XHTML it is, then. I hope.
289 $text = "<div xmlns=\"http://www.w3.org/1999/xhtml\">$text</div>";
292 # No? Good old tag soup.
294 $text =~ s/($escape_re)/$escape{$1}/g;
298 # We'll assume it's plaintext then.
302 # Out go the results:
308 # Loads the data stored in $DataFile:
312 if( -e "$DataFile") {
313 open FH, "$DataFile" or return $info;
318 my ($url, $size, $type) = split (/ /, $_);
319 $info->{$url}->{length} = $size;
320 $info->{$url}->{type} = $type;
327 # Saves enclosure data structure in $info out to $DataFile
329 open FH, ">$DataFile" or return 0;
331 foreach $url (keys (%{$info})) {
332 print FH $url." ".$info->{$url}->{length} ." ". $info->{$url}->{type}."\n";
339 # Uses LWP to get content-type and content-length data
340 # for a given URL, adds this to the $info data structure
341 # and then calls _save_cache to preserve $info
342 return 0 unless eval "require LWP::UserAgent";
344 my $ua = LWP::UserAgent->new;
345 $ua->agent('BlosxomAtomFeed/0.5');
346 my $req = HTTP::Request->new(HEAD => "$url");
347 my $res = $ua->request($req);
349 if ( $res->is_success ){
350 $ct = $res->header('content-type');
351 $cl = $res->header('content-length');
352 $info->{$url}->{type} = $ct;
353 $info->{$url}->{length} = $cl;
360 sub _load_templates {
361 $blosxom::template{'atom'}{'content_type'} = 'application/atom+xml';
363 $blosxom::template{'atom'}{'date'} = "\n";
365 my $path_info_full = $blosxom::path_info_full || "$blosxom::path_info/index.atom";
366 $blosxom::template{'atom'}{'head'} =<<HEAD;
367 <?xml version="1.0" encoding="utf-8"?>\$atomfeed::css_url
368 <feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://\$atomfeed::id_domain">
369 <title type="\$atomfeed::blog_title_type">\$atomfeed::blog_title</title>
370 <subtitle type="\$atomfeed::blog_description_type">\$atomfeed::blog_description</subtitle>
371 <link rel="self" type="application/atom+xml" href="$blosxom::url$path_info_full" />
372 <link rel="alternate" type="text/html" hreflang="$blosxom::blog_language" href="$blosxom::url" />
373 <id>tag\$atomfeed::colon\$atomfeed::id_domain,\$atomfeed::feed_yr\$atomfeed::colon/$blosxom::path_info</id>
374 <generator uri="\$atomfeed::generator_url" version="$blosxom::version">Blosxom</generator>
375 \$atomfeed::copyright
381 $blosxom::template{'atom'}{'story'} =<<'STORY';
383 <id>tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::utc_yr$atomfeed::colon$path/$fn</id>
384 <link rel="alternate" type="text/html" href="$blosxom::url$blosxom::path/$blosxom::fn.$blosxom::default_flavour" />$atomfeed::links
385 <title type="$atomfeed::title_type">$atomfeed::title</title>
386 <published>$atomfeed::published_utc_date</published>
387 <updated>$atomfeed::updated_utc_date</updated>
390 <name>$atomfeed::author</name>
391 $atomfeed::author_uri$atomfeed::author_email
393 <content type="$atomfeed::body_type" xml:base="http://$atomfeed::id_domain" xml:lang="$blosxom::blog_language">
400 $blosxom::template{'atom'}{'foot'} =<<'FOOT';
413 Blosxom Plug-in: atomfeed
417 Provides an Atom 1.0 feed of your weblog.
419 The plugin has all you need right on-board, including the appropriate
420 flavour template components and a few configuration directives.
422 It supports the majority of the Atom 1.0 spec excluding the <source>
423 element, which seems intended for use in feeds that contain items
424 aggregated from other feeds, and currently the <contributor> element,
425 which could be included using the meta plugin.
427 Point your browser/feed reader at http://yoururl/index.atom.
435 Rael Dornfest <rael@oreilly.com>, http://www.raelity.org/
436 - wrote the original plugin based on the 0.3 spec
438 Sam Ruby <sam@intertwingly.net>, http://www.intertwingly.net/
439 - contributed the XML::Parser magic
441 Frank Hecker <hecker@hecker.org>, http://www.hecker.org/
442 - contributed patches for Atom 0.3 compliance, UTC date/time fix
444 Sam Pearson <sam@sgp.me.uk>, http://sgp.me.uk/
445 - Upgraded the plugin to handle Atom 1.0
447 Additional code was incorporated in the Atom 1.0 revision from the
448 enclosures plugin originally written by:
450 Dave Slusher, http://www.evilgeniuschronicles.org/wordpress/ and Keith
451 Irwin, http://www.asyserver.com/~kirwin/.
453 This plugin is now maintained by the Blosxom Sourceforge Team,
454 <blosxom-devel@lists.sourceforge.net>.
456 =head1 QUICKSTART INSTALLATION
458 To get an Atom feed up and running in a jiffy, you need only set the
459 following variables and drop the plugin into your plugins directory:
461 B<$default_author> is where you specify who to credit as the default
462 author of each entry. This can be overidden with the value provided
463 by the B<whoami> or B<fauxami> plugins.
465 B<$feed_yr> is where you specify the year your site began. This is
466 important as atomfeed needs to create a unique, unchanging ID for
467 your weblog and it need this information to do so.
469 Everything else is optional.
471 =head1 FURTHER CONFIGURATION
473 There are a lot of variables available in the plugin you can use to
474 customise your Atom feed. These are all listed under B<CONFIGURABLE
475 VARIABLES>, below, with some notes as to their intended usage. Some
476 have defaults already specified, others will silently be excluded
479 As there are some variables generated entirely by the plugin, and as
480 some of the configurable variables are modified by the plugin, there
481 is also a complete list of all the variables available for use in
482 templates with notes on their form under B<TEMPLATE VARIABLES>.
484 If you wish to include enclosures or other types of <link> element in
485 your feed, see the section B<ENCLOSURES AND LINK ELEMENTS>, below.
487 Although you can use this plugin without anything other than blosxom
488 itself and a standard perl installation, it will perform better with
489 some optional extras available. See B<PERL MODULES> and B<OTHER
490 PLUGINS> for more information, particularly if you intend to use the
491 B<config> or B<prefs> plugins, any plugin that modifies your posts'
492 actual content (particularly by introducing markup), or any plugin
493 that operates on Blosxom's variable interpolation, such as
494 B<interpolate_fancy>.
496 =head1 CONFIGURABLE VARIABLES
498 In addition to B<$default_author> and B<$feed_yr>, the plugin has the
499 following user-configurable variables. Note that when setting
500 variables that are to be used at feed level and that contain URLs, any
501 relative URLs will be interpreted in relation to the value of the
502 variable B<$id_domain>. This is also true of any URLs included in
505 B<$author_uri> provides a URI for your default author. If you
506 leave this blank, it defaults to B<$blosxom::url>.
508 B<$author_email> Set this if you wish to include an email address for
509 the author of each entry. Leave it blank to exclude this element of
512 B<$copyright> Set this variable to a statement of copyright for your
513 site. Leave blank to exclude.
515 B<$id_domain> Atom associates unique ID tags with the feed itself and
516 individual entries. By default it'll attempt to glean your domain
517 from the specified or calculated value of B<$blosxom::url>, but you can
518 override this by setting this variable.
520 B<$icon_url> Set this variable to the URL of an icon to associate with
521 your site. This should be a small image with a 1:1 aspect ratio -
522 favicons are ideal. Leave blank to exclude.
524 B<$logo_url> Set this variable to the URL of a logo to associate with
525 your site. This can be larger than the icon, and should have an
526 aspect ratio of 2:1. Leave blank to exclude.
528 B<$template_placeholder> Set this varibale to the string used in your
529 head.atom flavour template to identify where you would like the
530 feed-level updated element to appear. If you are using the built-in
531 templates, there is no need to change the default value.
533 B<$use_full_enclosures> If you are including enclosures in your Atom
534 feed, set this variable to 1 if you would like to include length and
535 type attributes. This requires that you have the LWP modules
536 installed on your webserver to work. See B<ENCLOSURES AND LINK
537 ELEMENTS>, below, for more information.
539 B<$DataFile> Set this variable to the name of a file where length and
540 type data on your enclosures is stored.
542 B<$css_url> Set this variable to the location of a stylesheet you
543 would like to have applied to your Atom feed. Leave blank to exclude
546 B<$css_type> Set this variable to the correct MIME type for the
547 stylesheet you are including in your feed. Defaults to 'text/css'.
549 =head1 TEMPLATE VARIABLES
551 The following notes will be of use if you intend to create your own
552 atom flavour templates.
554 Note that some variables have the necessary markup included, while
555 others do not; it is stated clearly when a variable contains the
556 required markup. This is so that they can be included in templates
557 without leaving empty elements when they are not required.
559 B<$atomfeed::author> contains the contents for the author section's
562 B<$atomfeed::author_email> contains any <email> element for the
563 author. Includes the required opening and closing tags.
565 B<$atomfeed::author_uri> contains any <uri> element for the author.
566 Includes the required opening and closing tags.
568 B<$atomfeed::blog_description> contains the contents for the
571 B<$atomfeed::blog_description_type> contains the value for the type
572 attribute of the <subtitle> element of the feed.
574 B<$atomfeed::blog_title> contains the title of your blog, suitably
575 prepared for use as the content of the feed-level <title> element.
577 B<$atomfeed::blog_title_type> contains the value required for the type
578 attribute for the feed-level <title> element.
580 B<$atomfeed::body> contains the full text of the body of your weblog
581 post, suitably formatted for use as the contents of the <content>
584 B<$atomfeed::body_type> contains the value for the type attribute of
585 the <content> element.
587 B<$atomfeed::category> contains a <category> for an entry, derived
588 from a story's path. This variable contains the required opening and
591 B<$atomfeed::colon> simply contains a colon character, for use in the
592 <id> elements - helps avoid confusion with variable interpolation.
594 B<$atomfeed::copyright> contains any copyright statement. This
595 variable includes the required opening and closing tags.
597 B<$atomfeed::css_url> contains everything you need to link to a
598 stylesheet, including the required opening and closing tags. Note
599 that this element belongs before the opening <feed> tag, as it is a
602 B<$atomfeed::feed_yr> contains the year your weblog started.
604 B<$atomfeed::icon_url> contains a complete <icon> element, including
605 the required opening and closing tags.
607 B<$atomfeed::id_domain> contains the root domain for your weblog.
609 B<$atomfeed::links> contains all the via, related and enclosure links
610 for an entry. This variable contains all the required markup.
612 B<$atomfeed::logo_url> contains a complete <logo> element, including
613 the required opening and closing tags.
615 B<$atomfeed::published_utc_date> contains the timestamp for an entry
616 based on the value stored in the B<%blosxom::files> hash.
618 B<$atomfeed::summary> contains a trimmed <summary> element, including
619 the opening and closing tags. Derived by truncating the entry down to
620 the first sentence, similar to the B<foreshortened> plugin.
622 B<$atomfeed::title> contains the contents for the story-level <title>
625 B<$atomfeed::title_type> contains the value required for the type
626 attribute of the story-level <title> element.
628 B<$atomfeed::updated_utc_date> contains the timestamp for an entry
629 based on a direct stat on the story file itself.
631 B<$atomfeed::utc_yr> contains the year in which an entry was made,
632 based upon the value stored in the B<%blosxom::files> hash.
634 =head1 ENCLOSURES AND LINK ELEMENTS
636 Atom provides an elegant method for expressing relationships between
637 different resources using the rel attribute of its <link> element.
638 This includes the method Atom uses to support enclosures, used to
639 deliver additional content - often audio or video data - to the
640 receipient of the feed.
642 To take advantage of this, the plugin supports rel attribute values of
643 via, related and enclosure. To have these included in your feed,
644 simply link the the resource in the body of your weblog post and make
645 sure that the anchor tag has an appropriate rel attribute of
646 enclosure, via or related, depending upon the kind of relationship you
649 Ideally, enclosures should also contain information on their length
650 (the size of the file) and MIME type. The atomfeed plugin will try to
651 determine this information if you set the B<$use_full_enclosures>
652 variable to '1'. To make sure this works correctly, you should link
653 to the anclosure using an absolute URL rather than a relative one -
654 "http://example.com/podcasts/july-05.mp3" instead of
655 "/podcasts/july-05.mp3" - even if the enclosure is hosted under the
658 If you are unsure as to whether your server has this module installed,
659 you should be able to experiment by setting the variable anyway, as
660 the plugin should continue to function even if it is not present.
664 This plugin will work at its best if your server has B<XML::Parser>
665 and B<LWP> modules installed, although it will function adequately
670 In order for the <published> and <updated> timestamps to make sense,
671 you should be running a plugin like B<entries_cache> that retains the
672 original timestamps of your entries and places them into the
673 B<%blosxom::files> hash. If you are not, you should remove the
674 <published> element from the story template.
676 The atomfeed plugin assumes you're not running any fancy interpolation
677 plugin (e.g. B<interpolate_fancy>) which changes the way variables are
678 specified in a template (e.g. <$foo /> rather than $foo). If you are
679 running B<interpolate_fancy> or the like use the B<config> plugin and
680 a config.atom file in your blosxom B<$datadir> consisting of:
682 $blosxom::plugins{"interpolate_fancy"} = 0;
684 Where "interpolate_fancy" is the name of the interpolation plugin
685 you're turning off _just for the atom feed_.
687 If you are planning on using the B<config> or B<prefs> plugins to alter
688 variables in the atomfeed namespace, you will need to ensure that
689 these plugins run B<before> the atomfeed plugin. You can do this by
690 prefixing a number to the name of the relevant plugin, such as B<1config>
693 Similarly, if you are running any plugins that alter the content of
694 your posts - for example by escaping characters or adding markup -
695 these should also be set to run before atomfeed. Essentially, you
696 want atomfeed to get each post as it would be sent to a normal web
697 browser for it to work as intended.
701 Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/
703 Blosxom Plugin Docs: http://blosxom.sourceforge.net/documentation/users/plugins.html
705 1.0 Update Release Notes:
706 http://sgp.me.uk/sam/2005/08/04/atom-for-blosxom
708 Atom 1.0 Specification:
709 http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
713 None known; please send bug reports and feedback to the Blosxom
714 development mailing list <blosxom-devel@lists.sourceforge.net>.
718 Blosxom and this Blosxom Plug-in
719 Copyright 2003, Rael Dornfest
721 Permission is hereby granted, free of charge, to any person obtaining
722 a copy of this software and associated documentation files (the
723 "Software"), to deal in the Software without restriction, including
724 without limitation the rights to use, copy, modify, merge, publish,
725 distribute, sublicense, and/or sell copies of the Software, and to
726 permit persons to whom the Software is furnished to do so, subject to
727 the following conditions:
729 The above copyright notice and this permission notice shall be
730 included in all copies or substantial portions of the Software.
732 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
733 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
734 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
735 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
736 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
737 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
738 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.