# Blosxom Plugin: atomfeed -*-cperl-*- # Author(s): Original plugin: Rael Dornfest # XML::Parser: Sam Ruby # UTC and fixes for 0.3: Frank Hecker # Enclosures support: Dave Slusher and Keith Irwin # Upgrade to Atom 1.0 spec: Sam Pearson # Removed static $feed_url, added georss support: Gavin Carr # Version: 2007-12-31 # Docs: Included below: type "perldoc atomfeed", or scroll down # Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/ package atomfeed; # ----- Mandatory configurable variables ----- # For a basic atom feed, you only need set $default_author and $feed_yr. # If you do not, the plugin will exit quietly. # All other configuration is optional, and can be safely ignored. # Who would you like your feed to credit as the default author of each entry? # Leave blank and the atomfeed plugin will attempt to use the whoami and # fauxami plugins $default_author = ""; # What year was your weblog started? This will be used # to form part of your weblog's unique ID. $feed_yr = ""; # ----- Optional configurable variables ----- # What is the default author's URL? # Blank defaults to $blosxom::url $author_uri = ""; # What is the default author's email address? # Leave blank to exclude. $author_email = ''; # Copyright statement: # leave blank to exclude. $copyright = ""; # What domain should Blosxom use in ID tags? # Leave blank if you don't understand or for Blosxom to use the domain in $url. $id_domain = ""; # Icon # Put the URL for a site icon here (for example, your site's favicon). Leave blank to exclude. $icon_url = ""; # Logo # Set to the URL for your site logo. Leave blank to exclude. $logo_url = ""; # What template placeholder in your flavour template should I replace with feed-level ? # If you are using the built-in templates, leave this alone. my $template_placeholder = "{{{updated}}}"; # Generator that produced this feed $generator_url = "http://blosxom.sourceforge.net/"; # Enclosures support # ------------------ # You can add enclosures to your atom feed by linking to them in your post # and giving the anchor tag a rel attribute of "enclosure". # Set $use_full_enclosures to 1 if you wish to add length and content-type # to your enclosures. This function relies upon your webserver having # LWP modules installed. $use_full_enclosures = '0'; # Name of a file to cache info about your enclosures: $DataFile = "$blosxom::plugin_state_dir/enclosures.dat"; # Stylesheet support # ------------------ # If you have a stylesheet to associate with your atom feed, place it's URL here. $css_url = ""; # You can specify the type of stylesheet here: $css_type = "text/css"; # ----- END OF CONFIGURABLE VARIABLES ----- # __END_CONFIG__ # --- Plug-in package variables ----- $author = ''; $T = 'T'; $colon = ':'; $zerozero = '00'; # Try to glean the domain from $url $id_domain or ($id_domain) = $blosxom::url =~ m#http://(?:www\.)?([^\/]+)#; $utc_date = ''; use vars qw/$feed_utc_date/; $category; $links; $summary; $georss; # ----- plugin subroutines ----- sub start { # Check for our two mandatory variables: unless ( ( eval { whoami::start() or fauxami::start() } or $default_author ) and $feed_yr ) { warn 'Blosxom plugin: atomfeed > Please set $default_author and $feed_yr. Exiting.\n'; return 0; } # Check for the existence of already-loaded flavour templates or theme, # loading templates if there's nothing: # Note that it looks like this condition should *never* be met, so why # did Rael put this code here? Can't we just do _load_templates(); $blosxom::template{'atom'}{'head'} or _load_templates(); # changed to require from use to make plugin work for those # without XML::Parser. Consequence: entries will never be labelled # type='xhtml', only 'text' or 'html'. Thanks, S2! eval { require XML::Parser; $parser = new XML::Parser; }; %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); $escape_re = join '|' => keys %escape; foreach ( keys %escape ) { $unescape{$escape{$_}} = $_; } $unescape_re = join '|' => keys %unescape; # If required, initialise the enclosures data cache: $use_full_enclosures and _load_cache(); 1; } sub head { # Make adjustments to plugin variables here, so that users # can modify their defaults using the config and prefs plugins. # Note that these plugins will have to run *before* atomfeed for this to work as intended. $css_url and $css_url = "\n"; $copyright and $copyright = "$copyright"; $author_uri or $author_uri = "$blosxom::url"; $author_uri = "$author_uri"; $author_email and $author_email = "\n $author_email"; $icon_url and $icon_url = "$icon_url"; $logo_url and $logo_url = "$logo_url"; # Check and prepare a and <subtitle>: ($blog_title_type, $blog_title) = _parse_markup($blosxom::blog_title); ($blog_description_type, $blog_description) = _parse_markup($blosxom::blog_description); $feed_utc_date = ''; 1; } sub story { my($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; use File::stat; # set up <category>: $category = ''; if ( $path ) { $category = "<category term=\"$path\"/>"; } # GeoRSS support $georss = ''; if ($blosxom::plugins{geo} && $geo::latitude && $geo::longitude) { $georss = qq(<georss:point>$geo::latitude $geo::longitude</georss:point>); } # <published>: derive from %blosxom::files my @published_utc = gmtime($blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"}); $published_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z", $published_utc[5]+1900, $published_utc[4]+1, $published_utc[3], $published_utc[2], $published_utc[1]); # <updated>: derive by stat()ing the file for its mtime: my @updated_utc = gmtime(stat("$blosxom::datadir$path/$filename.$blosxom::file_extension")->mtime); $updated_utc_date = sprintf("%4d-%02d-%02dT%02d:%02d:00Z", $updated_utc[5]+1900, $updated_utc[4]+1, $updated_utc[3], $updated_utc[2], $updated_utc[1]); # Date/time of most recently-modified story becomes date/time of the feed. $feed_utc_date = $updated_utc_date if $updated_utc_date gt $feed_utc_date; # use %blosxom::files for the year component of feed-level <atom:id> # in case the creation time is cached somewhere. $utc_yr = $published_utc[5]+1900; # Set authorship if available, falling back to $atomfeed::author $author = $whoami::fullname || $fauxami::name || $default_author || ''; # Setup $summary. Adapted from Rael's foreshortened plugin. # For simplicities sake, we're going to provide plaint text summaries. $summary = $$body_ref; # first remove tags: $summary =~ s/<.+?>//gs; # then unescape any entities: $summary =~ s/($unescape_re)/$unescape{$1}/g; # truncate to what looks like first sentence: $summary =~ s/[\.\!\?].+$/.../s; # Remove newlines and carriage returns: $summary =~ s/[\r\n]/ /g; # Prepare for use in tempate: $summary = "<summary type=\"text\">$summary</summary>"; # take look through $$body_ref for any enclosures or via/related links: my @anchors = ( $$body_ref =~ /(<a [^>]+>)/gis ); $links = "\n"; foreach my $anchor ( @anchors ) { if ( $anchor =~ /rel\s*=\s*"?\s*(via|enclosure|related)"?/is ) { my( $type, $href ); $type = $1; if ( $anchor =~ /href\s*=\s*"([^"]+)"/is ) { $href = $1; } elsif ( $anchor =~ /href\s*=\s*([^\s]+)/is ) { $href = $1; } if ( $href ){ $href =~ s/\s//g; if ( $use_full_enclosures && ( $type eq "enclosure" ) ) { my( $mime, $length ); # Check for presence of enclosure in $info: unless ( $info->{$href} ) { _get_info($href); } if ( $info->{$href} ) { # Check again for data on enclosure in $info, just in case of problems getting it. $mime = $info->{$href}->{type}; $length = $info->{$href}->{length}; $links .= " <link rel=\"$type\" href=\"$href\" type=\"$mime\" length=\"$length\"/>\n"; } else { # Fall back on a basic link: $links .= " <link rel=\"$type\" href=\"$href\"/>\n"; } } else { # Basic link: $links .= " <link rel=\"$type\" href=\"$href\"/>\n"; } } } } # Parse post title: ($title_type, $title) = _parse_markup($$title_ref); # Parse the post body: ($body_type, $body) = _parse_markup($$body_ref); return 1; } sub foot { my($pkg, $currentdir, $foot_ref) = @_; # Replace the placeholder with the feed-level <updated> element: $feed_utc_date = "<updated>$feed_utc_date</updated>"; $blosxom::output =~ s/$template_placeholder/$feed_utc_date/m; return 1; } # ----- private subroutines ----- sub _parse_markup { # Pass in some test to parse, and I'll return a type and the text suitably configured. my $text = shift; my $type; # First, check to see if $text appears to contain markup. # This regex should match any tag-like string: opening, closing or orphan tags. if ( $text =~ m!</?[a-zA-Z0-9]+ ?/?>! ) { # OK, looks like markup in there. # Now, check to see if it looks well-formed: if ( eval{$parser->parse("<div>$text</div>")}) { # Yes? XHTML it is, then. I hope. $type = 'xhtml'; $text = "<div xmlns=\"http://www.w3.org/1999/xhtml\">$text</div>"; } else { # No? Good old tag soup. $type = 'html'; $text =~ s/($escape_re)/$escape{$1}/g; } } else { # We'll assume it's plaintext then. $type = 'text'; } # Out go the results: return $type, $text; } sub _load_cache { # Loads the data stored in $DataFile: $info = {}; #open data file local *FH; if( -e "$DataFile") { open FH, "$DataFile" or return $info; } flock(FH, 2); while (<FH>) { chomp ($_); my ($url, $size, $type) = split (/ /, $_); $info->{$url}->{length} = $size; $info->{$url}->{type} = $type; } close (FH); return $info; } sub _save_cache { # Saves enclosure data structure in $info out to $DataFile local *FH; open FH, ">$DataFile" or return 0; flock(FH, 2); foreach $url (keys (%{$info})) { print FH $url." ".$info->{$url}->{length} ." ". $info->{$url}->{type}."\n"; } close FH; return 1; } sub _get_info { # Uses LWP to get content-type and content-length data # for a given URL, adds this to the $info data structure # and then calls _save_cache to preserve $info return 0 unless eval "require LWP::UserAgent"; my $url = shift; my $ua = LWP::UserAgent->new; $ua->agent('BlosxomAtomFeed/0.5'); my $req = HTTP::Request->new(HEAD => "$url"); my $res = $ua->request($req); my( $ct, $cl ); if ( $res->is_success ){ $ct = $res->header('content-type'); $cl = $res->header('content-length'); $info->{$url}->{type} = $ct; $info->{$url}->{length} = $cl; _save_cache(); return 1; } return 0; } sub _load_templates { $blosxom::template{'atom'}{'content_type'} = 'application/atom+xml'; $blosxom::template{'atom'}{'date'} = "\n"; my $path_info_full = $blosxom::path_info_full || "$blosxom::path_info/index.atom"; $blosxom::template{'atom'}{'head'} =<<HEAD; <?xml version="1.0" encoding="utf-8"?>\$atomfeed::css_url <feed xmlns="http://www.w3.org/2005/Atom" xml:base="http://\$atomfeed::id_domain" HEAD if ($blosxom::plugins{geo}) { $blosxom::template{'atom'}{'head'} .= qq( xmlns:georss="http://www.georss.org/georss"); } $blosxom::template{'atom'}{'head'} .= <<HEAD; > <title type="\$atomfeed::blog_title_type">\$atomfeed::blog_title \$atomfeed::blog_description tag\$atomfeed::colon\$atomfeed::id_domain,\$atomfeed::feed_yr\$atomfeed::colon/$blosxom::path_info Blosxom \$atomfeed::copyright \$atomfeed::icon_url \$atomfeed::logo_url {{{updated}}} HEAD $blosxom::template{'atom'}{'story'} =<<'STORY'; tag$atomfeed::colon$atomfeed::id_domain,$atomfeed::utc_yr$atomfeed::colon$path/$fn $atomfeed::links $atomfeed::title $atomfeed::published_utc_date $atomfeed::updated_utc_date $atomfeed::category$atomfeed::georss $atomfeed::author $atomfeed::author_uri$atomfeed::author_email $atomfeed::body STORY $blosxom::template{'atom'}{'foot'} =<<'FOOT'; FOOT 1; } 1; __END__ =head1 NAME Blosxom Plug-in: atomfeed =head1 SYNOPSIS Provides an Atom 1.0 feed of your weblog. The plugin has all you need right on-board, including the appropriate flavour template components and a few configuration directives. It supports the majority of the Atom 1.0 spec excluding the element, which seems intended for use in feeds that contain items aggregated from other feeds, and currently the element, which could be included using the meta plugin. Point your browser/feed reader at http://yoururl/index.atom. =head1 VERSION 2005-08-04 =head1 AUTHORS Rael Dornfest , http://www.raelity.org/ - wrote the original plugin based on the 0.3 spec Sam Ruby , http://www.intertwingly.net/ - contributed the XML::Parser magic Frank Hecker , http://www.hecker.org/ - contributed patches for Atom 0.3 compliance, UTC date/time fix Sam Pearson , http://sgp.me.uk/ - Upgraded the plugin to handle Atom 1.0 Additional code was incorporated in the Atom 1.0 revision from the enclosures plugin originally written by: Dave Slusher, http://www.evilgeniuschronicles.org/wordpress/ and Keith Irwin, http://www.asyserver.com/~kirwin/. This plugin is now maintained by the Blosxom Sourceforge Team, . =head1 QUICKSTART INSTALLATION To get an Atom feed up and running in a jiffy, you need only set the following variables and drop the plugin into your plugins directory: B<$default_author> is where you specify who to credit as the default author of each entry. This can be overidden with the value provided by the B or B plugins. B<$feed_yr> is where you specify the year your site began. This is important as atomfeed needs to create a unique, unchanging ID for your weblog and it need this information to do so. Everything else is optional. =head1 FURTHER CONFIGURATION There are a lot of variables available in the plugin you can use to customise your Atom feed. These are all listed under B, below, with some notes as to their intended usage. Some have defaults already specified, others will silently be excluded until you set them. As there are some variables generated entirely by the plugin, and as some of the configurable variables are modified by the plugin, there is also a complete list of all the variables available for use in templates with notes on their form under B