Add xml entity escape code to rss20.
[matthijs/upstream/blosxom-plugins.git] / gavinc / rss20
index 9ce4d0d8c9d27cb714e99b47fc20da7f111fa005..4d7ac2951aa785bc236496d05086a79ea3a7ebec 100644 (file)
@@ -1,6 +1,6 @@
 # Blosxom Plugin: rss20
 # Author(s): Gavin Carr <gavin@openfusion.com.au>
-# Version: 0.001001
+# Version: 0.001002
 # Requires: storydate, lastmodified2
 # Suggests: absolute
 # Follows:  storydate, lastmodified2
@@ -50,38 +50,79 @@ $generator_url = "http://blosxom.sourceforge.net/?v=$blosxom::version";
 
 $error_email ||= $author_email;
 
+# Escape <, >, and & to hex-encoded entities for max compatibility in text elements
+# See http://www.rssboard.org/rss-profile#data-types-characterdata
+my %escape_text = (
+  '<' => '&#x3C;',
+  '>' => '&#x3E;',
+  '&' => '&#x26;',
+);
+my $escape_text_re = join '|' => keys %escape_text;
+
+# Escape <, >, and & to standard html-encoded entities for in html elements
+my %escape_html = (
+  '<' => '&lt;',
+  '>' => '&gt;',
+  '&' => '&amp;',
+);
+my $escape_html_re = join '|' => keys %escape_html;
+
 sub start { 
   _load_templates();
+}
 
-  1;
+sub story {
+  my ($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
+
+  # Don't double-encode if someone else has already done it
+  return unless $blosxom::encode_xml_entities;
+
+  # Encode and reset encode_xml_entities flag
+  $$title_ref = _escape_text( $$title_ref );
+  $$body_ref =  _escape_html( $$body_ref );
+  $blosxom::encode_xml_entities = 0;
 }
 
 # --- Private subroutines
 
+sub _escape_text {
+  my ($text) = @_;
+  $text =~ s/($escape_text_re)/$escape_text{$1}/g;
+  return $text;
+}
+
+sub _escape_html {
+  my ($html) = @_;
+  $html =~ s/($escape_html_re)/$escape_html{$1}/g;
+  return $html;
+}
+
 sub _load_templates {
-  $blosxom::template{$flavour}{'content_type'} = 'text/xml';
+  $blosxom::template{$flavour}{'content_type'} = 'text/xml; charset=$blog_encoding';
 
   $blosxom::template{$flavour}{'date'} = "\n";
 
-  $blosxom::template{$flavour}{'head'} = <<'HEAD';
-<?xml version="1.0" encoding="iso-8859-1"?>
+  $blosxom::template{$flavour}{'head'} = <<HEAD;
+<?xml version="1.0" encoding="$blosxom::blog_encoding"?>
 <rss version="2.0"
     xmlns:dc="http://purl.org/dc/elements/1.1/"
     xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
     xmlns:admin="http://webns.net/mvcb/"
+    xmlns:atom="http://www.w3.org/2005/Atom"
     xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
     xmlns:content="http://purl.org/rss/1.0/modules/content/">
 
     <channel>
-        <title>$blog_title</title>
-        <link>$url</link>
-        <description>$blog_description</description>
-        <dc:date>$lastmodified2::latest_iso8601</dc:date>
+        <title>$blosxom::blog_title</title>
+        <link>$blosxom::url</link>
+        <description>$blosxom::blog_description</description>
+        <dc:date>\$lastmodified2::latest_iso8601</dc:date>
         <dc:language>$blosxom::blog_language</dc:language>
         <dc:creator>mailto:$rss20::author_email</dc:creator>
         <dc:rights>$rss20::copyright</dc:rights>
         <admin:generatorAgent rdf:resource="$rss20::generator_url" />
         <admin:errorReportsTo rdf:resource="mailto:$rss20::error_email" />
+        <atom:link href="$blosxom::url$ENV{PATH_INFO}" rel="self" type="application/rss+xml" />
         <sy:updatePeriod>hourly</sy:updatePeriod>
         <sy:updateFrequency>1</sy:updateFrequency>
         <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
@@ -103,6 +144,8 @@ STORY
     </channel>                                    
 </rss>  
 FOOT
+
+  1;
 }