# Blosxom Plugin: lastmodified2 # Author(s): Frank Hecker # (based on work by Bob Schumaker ) # Version: 0.10 # Documentation: See the bottom of this file or type: perldoc lastmodified2 package lastmodified2; use strict; use HTTP::Date; use Data::Dumper; use POSIX qw! strftime !; # Use the Digest:MD5 module if available, the older MD5 module if not. my $use_digest; my $use_just_md5; BEGIN { if (eval "require Digest::MD5") { Digest::MD5->import(); $use_digest = 1; } elsif (eval "require MD5") { MD5->import(); $use_just_md5 = 1; } } # --- Package variables ----- my $current_time = time(); # Use consistent value of current time. my $last_modified_time = 0; my $etag = ""; my $md5_digest = ""; my %validator; # --- Output variables ----- our $latest_rfc822 = ''; our $latest_iso8601 = ''; our $others_rfc822 = ''; our $others_iso8601 = ''; our $now_rfc822 = ''; our $now_iso8601 = ''; our $story_rfc822 = ''; our $story_iso8601 = ''; # --- Configurable variables ----- my $generate_etag = 1; # generate ETag header? my $generate_mod = 1; # generate Last-modified header? my $strong = 0; # do strong validation? my $val_cache = "validator.cache"; # where to cache last-modified values # and MD5 digests (in state directory) my $generate_expires = 0; # generate Expires header? my $generate_cache = 0; # generate Cache-control header? my $freshness_time = 3000; # number of seconds pages are fresh # (0 = do not cache, max is 1 year) my $generate_length = 1; # generate Content-length header? my $use_others = 0; # consult %others for weak validation # (DEPRECATED) my $export_dates = 1; # set $latest_rfc822, etc., for # compatibility with lastmodified my $debug = 0; # set > 0 for debug output # -------------------------------- # Do any initial processing, and decide whether to activate the plugin. sub start { warn "lastmodified2: start\n" if $debug > 1; # Don't activate this plugin if we are doing static page generation. return 0 if $blosxom::static_or_dynamic eq 'static'; # If we can't do MD5 then we don't do strong validation. if ($strong && !($use_digest || $use_just_md5)) { $strong = 0; warn "lastmodified2: MD5 not available, forcing weak validation\n" if $debug > 0; } # Limit freshness time to maximum of one year, must be non-negative. $freshness_time > 365*24*3600 and $freshness_time = 365*24*3600; $freshness_time < 0 and $freshness_time = 0; if ($debug > 1) { warn "lastmodified2: \$generate_etag = $generate_etag\n"; warn "lastmodified2: \$generate_mod = $generate_mod\n"; warn "lastmodified2: \$strong = $strong\n"; warn "lastmodified2: \$generate_cache = $generate_cache\n"; warn "lastmodified2: \$generate_expires = $generate_expires\n"; warn "lastmodified2: \$freshness_time = $freshness_time\n"; warn "lastmodified2: \$generate_length = $generate_length\n"; } # If we are using Last-modified as a strong validator then read # in the cached last-modified values and MD5 digests. if ($generate_mod && $strong && open CACHE, "<$blosxom::plugin_state_dir/$val_cache" ) { warn "lastmodified2: loading cached validators\n" if $debug > 0; my $index = join '', ; close CACHE; my $VAR1; $index =~ m!\$VAR1 = \{! and eval($index) and !$@ and %validator = %$VAR1; } # Convert current time to RFC 822 and ISO 8601 formats for others' use. if ($export_dates && $current_time) { $now_rfc822 = HTTP::Date::time2str($current_time); $now_iso8601 = iso8601($current_time); } return 1; } # We check the list of entries to be displayed and determine the modification # time of the most recent entry. sub filter { my ($pkg, $files, $others) = @_; warn "lastmodified2: filter\n" if $debug > 1; # We can skip all this unless we're doing weak validation and/or we're # setting the *_rfc822 and *_iso8601 variables for others to use. return 1 unless $export_dates || (($generate_etag || $generate_mod) && !$strong); # Find the latest date/time modified for the entries to be displayed. $last_modified_time = 0; for (values %$files) { $_ > $last_modified_time and $last_modified_time = $_; } warn "lastmodified2: \$last_modified_time = " . $last_modified_time . " (entries)\n" if $debug > 0; # Convert last modified time to RFC 822 and ISO 8601 formats for others. if ($export_dates && $last_modified_time) { $latest_rfc822 = HTTP::Date::time2str($last_modified_time); $latest_iso8601 = iso8601($last_modified_time); } # Optionally look at other files as well (DEPRECATED). if ($use_others) { my $others_last_modified_time = 0; for (values %$others) { $_ > $others_last_modified_time and $others_last_modified_time = $_; } if ($export_dates && $others_last_modified_time) { $others_rfc822 = HTTP::Date::time2str($others_last_modified_time); $others_iso8601 = iso8601($others_last_modified_time); } warn "lastmodified2: \$others_last_modified_time = " . $others_last_modified_time . " (others)\n" if $debug > 0; $others_last_modified_time > $last_modified_time and $last_modified_time = $others_last_modified_time; } # If we're doing weak validation then create an etag based on the latest # date/time modified and mark it as weak (i.e., by prefixing it with 'W/'). if ($generate_etag && !$strong) { $etag = 'W/"' . $last_modified_time . '"'; warn "lastmodified2: \$etag = $etag\n" if $debug > 0; } return 1; } # Skip story processing and generate configured headers now on a conditional # GET request for which we don't need to return a full response. sub skip { warn "lastmodified2: skip\n" if $debug > 1; # If we are doing strong validation then we can't skip story processing # because we need all output in order to generate the proper etag and/or # last-modified value. return 0 unless ($generate_etag || $generate_mod) && !$strong; # Otherwise we can check here whether we can send a 304 or not. my $send_304 = check_for_304(); # If we don't need to return a full response on a conditional GET then # set the HTTP status to 304 and generate headers as configured. # (We have to do this here because the last subroutine won't be executed # if we skip story processing.) add_headers($send_304) if $send_304; return $send_304; } # Set variables with story date/time in RFC 822 and ISO 8601 formats. sub story { my ($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_; warn "lastmodified2: story (\$path = $path, \$filename = $filename)\n" if $debug > 1; if ($export_dates) { $path ||= ""; my $timestamp = $blosxom::files{"$blosxom::datadir$path/$filename.$blosxom::file_extension"}; warn "lastmodified2: \$timestamp = $timestamp\n" if $debug > 0; $story_rfc822 = $timestamp ? HTTP::Date::time2str($timestamp) : ''; $story_iso8601 = $timestamp ? iso8601($timestamp) : ''; } return 1; } # Do conditional GET checks if we couldn't do them before (i.e., we are # doing strong validation and couldn't skip story processing) and output # any configured headers plus a 304 status if appropriate. sub last { warn "lastmodified2: last\n" if $debug > 1; # If some other plugin has set the HTTP status to a non-OK value then we # don't attempt to do anything here, since it would probably be wrong. return 1 if $blosxom::header->{'Status'} && $blosxom::header->{'Status'} !~ m!^200 !; # If we are using ETag and/or Last-modified as a strong validator then # we generate an entity tag from the MD5 message digest of the complete # output. (We use the base-64 representation if possible because it is # more compact than hex and hence saves a few bytes of bandwidth.) if (($generate_etag || $generate_mod) && $strong) { $md5_digest = $use_digest ? Digest::MD5::md5_base64($blosxom::output) : MD5->hex_hash($blosxom::output); $etag = '"' . $md5_digest . '"'; warn "lastmodified2: \$etag = $etag\n" if $debug > 0; } # If we are using Last-modified as a strong validator then we look up # the cached MD5 digest for this URI, compare it to the current digest, # and use the cached last-modified value if they match. Otherwise we set # the last-modified value to just prior to the current time. my $cache_tag = cache_tag(); my $update_cache = 0; if ($generate_mod && $strong) { if ($validator{$cache_tag} && $md5_digest eq $validator{$cache_tag}{'md5'}) { $last_modified_time = $validator{$cache_tag}{'last-modified'}; } else { $last_modified_time = $current_time - 5; $validator{$cache_tag}{'last-modified'} = $last_modified_time; $validator{$cache_tag}{'md5'} = $md5_digest; $update_cache = 1; } warn "lastmodified2: \$last_modified_time = $last_modified_time\n" if $debug > 0; } # Do conditional GET checks and output configured headers plus status. my $send_304 = check_for_304(); add_headers($send_304); # Update the validator cache if we need to. To minimize race conditions # we write the cache as a temporary file and then rename it. if ($update_cache) { warn "lastmodified2: updating validator cache\n" if $debug > 0; my $tmp_cache = "$val_cache-$$-$current_time"; if (open CACHE, ">$blosxom::plugin_state_dir/$tmp_cache") { print CACHE Dumper \%validator; close CACHE; warn "lastmodified2: renaming $tmp_cache to $val_cache\n" if $debug > 1; rename("$blosxom::plugin_state_dir/$tmp_cache", "$blosxom::plugin_state_dir/$val_cache") or warn "couldn't rename $blosxom::plugin_state_dir/$tmp_cache: $!\n"; } else { warn "couldn't > $blosxom::plugin_state_dir/$tmp_cache: $!\n"; } } 1; } # Check If-none-match and/or If-modified-since headers and return true if # we can send a 304 (not modified) response instead of a normal response. sub check_for_304 { my $etag_send_304 = 0; my $mod_send_304 = 0; my $etag_request = 0; my $mod_request = 0; my $send_304 = 0; warn "lastmodified2: check_for_304\n" if $debug > 1; # For a conditional GET using the If-none-match header, compare the # ETag value(s) in the header with the ETag value generated for the page, # set $etag_send_304 true if we don't need to send a full response, # and note that an etag value was included in the request. if ($ENV{'HTTP_IF_NONE_MATCH'}) { $etag_request = 1; if ($generate_etag) { my @inm_etags = split '\s*,\s*', $ENV{'HTTP_IF_NONE_MATCH'}; if ($debug > 0) { for (@inm_etags) { warn "lastmodified2: \$inm_etag = |" . $_ . "|\n"; } } for (@inm_etags) { $etag eq $_ and $etag_send_304 = 1 and last; } } } # For a conditional GET using the If-modified-since header, compare the # time in the header with the time any entry on the page was last modified, # set $mod_send_304 true if we don't need to send a full response, and # also note that a last-modified value was included in the request. if ($ENV{'HTTP_IF_MODIFIED_SINCE'}) { $mod_request = 1; if ($generate_mod) { my $ims_time = HTTP::Date::str2time($ENV{'HTTP_IF_MODIFIED_SINCE'}); warn "lastmodified2: \$ims_time = " . $ims_time . "\n" if $debug > 0; $mod_send_304 = 1 if $last_modified_time <= $ims_time; } } # If the request includes both If-none-match and If-modified-since then # we don't send a 304 response unless both tests agree it should be sent, # per section 13.3.4 of the HTTP 1.1 specification. if ($etag_request && $mod_request) { $send_304 = $etag_send_304 && $mod_send_304; } else { $send_304 = $etag_send_304 || $mod_send_304; } warn "lastmodified2: \$send_304 = " . $send_304 . " \$etag_send_304 = " . $etag_send_304 . " \$mod_send_304 = " . $mod_send_304 . "\n" if $debug > 0; return $send_304; } # Set status and add additional header(s) depending on the type of response. sub add_headers { my ($send_304) = @_; warn "lastmodified2: add_headers (\$send_304 = $send_304)\n" if $debug > 1; # Set HTTP status and truncate output if we are sending a 304 response. if ($send_304) { $blosxom::header->{'Status'} = "304 Not Modified"; $blosxom::output = ""; warn "lastmodified2: Status: " . $blosxom::header->{'Status'} . "\n" if $debug > 0; } # For the rules on what headers to generate for a 304 response, see # section 10.3.5 of the HTTP 1.1 protocol specification. # Last-modified is not returned on a 304 response. if ($generate_mod && !$send_304) { $blosxom::header->{'Last-modified'} = HTTP::Date::time2str($last_modified_time); warn "lastmodified2: Last-modified: " . $blosxom::header->{'Last-modified'} . "\n" if $debug > 0; } # If we send ETag on a 200 response then we send it on a 304 as well. if ($generate_etag) { $blosxom::header->{'ETag'} = $etag; warn "lastmodified2: ETag: " . $blosxom::header->{'ETag'} . "\n" if $debug > 0; } # We send Expires for a 304 since its value is updated for each request. if ($generate_expires) { $blosxom::header->{'Expires'} = $freshness_time ? HTTP::Date::time2str($current_time + $freshness_time) : HTTP::Date::time2str($current_time - 60); warn "lastmodified2: Expires: " . $blosxom::header->{'Expires'} . "\n" if $debug > 0; } # We send Cache-control for a 304 response for consistency with Expires. if ($generate_cache) { $blosxom::header->{'Cache-control'} = $freshness_time ? "max-age=" . $freshness_time : "no-cache"; warn "lastmodified2: Cache-control: " . $blosxom::header->{'Cache-control'} . "\n" if $debug > 0; } # Content-length is not returned on a 304 response. if ($generate_length && !$send_304) { $blosxom::header->{'Content-length'} = length($blosxom::output); warn "lastmodified2: Content-length: " . $blosxom::header->{'Content-length'} . "\n" if $debug > 0; } } # Generate a tag to look up the cached last-modified value and MD5 digest # for this URI. sub cache_tag { # Start with the original URI from the request. my $tag = $ENV{REQUEST_URI} || ""; # Add an "/index.flavour" for uniqueness unless it's already present. unless ($tag =~ m!/index\.!) { $tag .= '/' unless ($tag =~ m!/$!); $tag .= "index.$blosxom::flavour"; } return $tag; } # Convert time to ISO 8601 format (including time zone offset). # (Format is YYYY-MM-DDThh:mm:ssTZD per http://www.w3.org/TR/NOTE-datetime) sub iso8601 { my ($timestamp) = @_; my $tz_offset = strftime("%z", localtime()); $tz_offset = substr($tz_offset, 0, 3) . ":" . substr($tz_offset, 3, 5); return strftime("%Y-%m-%dT%T", localtime($timestamp)) . $tz_offset; } 1; __END__ =head1 NAME Blosxom Plug-in: lastmodified2 =head1 SYNOPSIS Enables caching and validation of dynamically-generated Blosxom pages by generating C, C, C, and/or C HTTP headers in the response and responding appropriately to an C and/or C header in the request. Also generates a C header to support HTTP 1.0 persistent connections. =head1 VERSION 0.10 =head1 AUTHOR Frank Hecker , http://www.hecker.org/ (based on work by Bob Schumaker, , http://www.cobblers.net/blog/) =head1 DESCRIPTION This plugin enables caching and validation of dynamically-generated Blosxom pages by web browsers, web proxies, feed aggregators, and other clients by generating various cache-related HTTP headers in the response and supporting conditional GET requests, as described below. This can reduce excess network traffic and server load caused by requests for RSS or Atom feeds or for web pages for popular entries or categories. =head1 INSTALLATION AND CONFIGURATION Copy this plugin into your Blosxom plugin directory. You should not normally need to rename the plugin; however see the discussion below. Configurable variables specify how the plugin handles validation (C<$generate_etag>, C<$generate_mod>, and C<$strong>), caching (C<$generate_cache>, C<$generate_expires>, and C<$freshness_time>) and whether or not to generate any other recommended headers (C<$generate_length>). The plugin supports the variable C<$use_others> as used in the lastmodified plugin; however use of this is deprecated (use strong validation instead). The variable C<$export_dates> specifies whether to export date/time variables C<$latest_rfc822>, etc., for compatibility with the lastmodified plugin. You can set the variable C<$debug> to 1 or greater to produce additional information useful in debugging the operation of the plugin; the debug output is sent to your web server's error log. This plugin supplies C, C, and C subroutines. It needs to run after any other plugin whose C subroutine changes the list of entries included in the response; otherwise the C date may be computed incorrectly. It needs to run after any other plugin whose C subroutine does redirection (e.g., the canonicaluri plugin) or otherwise conditionally sets the HTTP status to any value other than 200. Finally, this plugin needs to run after any other plugin whose C subroutine changes the output for the page; otherwise the C value (and the C and C values, if you are using strong validation) may be computed incorrectly. If you are encountering problems in any of these regards then you can force the plugin to run after other plugins by renaming it to, e.g., 99lastmodified2. =head1 SEE ALSO Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/ Blosxom Plugin Docs: http://blosxom.sourceforge.net/documentation/users/plugins.html lastmodified plugin: http://www.cobblers.net/blog/dev/blosxom/ more on the lastmodified2 plugin: http://www.hecker.org/blosxom/lastmodified2 =head1 BUGS None known; please send bug reports and feedback to the Blosxom development mailing list . =head1 AUTHOR Frank Hecker http://www.hecker.org/ Based on the original lastmodified plugin by Bob Schumaker http://www.cobblers.net/blog This plugin is now maintained by the Blosxom Sourceforge Team, . =head1 LICENSE This source code is submitted to the public domain. Feel free to use and modify it. If you like, a comment in your modified source attributing credit to myself, Bob Schumaker, and any other contributors for our work would be appreciated. THIS SOFTWARE IS PROVIDED AS IS AND WITHOUT ANY WARRANTY OF ANY KIND. USE AT YOUR OWN RISK!