1 # Blosxom Plugin: autotrack -*- cperl -*-
2 # Author: Todd Larason (jtl@molehill.org)
4 # Blosxom Home/Docs/Licensing: http://blosxom.sourceforge.net/
5 # AutoTrack plugin Home/Docs/Licensing:
6 # http://molelog.molehill.org/blox/Computers/Internet/Web/Blosxom/AutoTrack/
9 # -------------- Configuration Variables --------------
11 # regular expression matching URLs not to trackback
12 # there's no reason to try google or amazon, and most people don't
13 # want to trackback their own stories
14 $dont_tb_re = qr!(?:http://(?:
15 (?: www\.google\.com ) |
16 (?: www\.amazon\.com )
19 (?: $blosxom::url )!ix
20 unless defined $dont_tb_re;
22 # what to do if the timestamp file doesn't exist? if $start_from_now is
23 # set, autotrack future stories but not old ones
24 $start_from_now = 1 unless defined $start_from_now;
26 # how automatic? if $semi_auto is set, only send trackbacks if ?autotrack=yes
27 # otheriwse, fully automatic
28 $semi_auto = 1 unless defined $semi_auto;
30 # networking implementation to be used
31 # can be 'LWP', 'curl' or 'wget' (or a full pathname to a curl or wget
33 # wget must be at least a 1.9 beta to support the --post-data option
34 $networking = "LWP" unless defined $networking;
37 unless defined $debug_level;
39 # -----------------------------------------------------
41 # template-visible vars
47 use vars qw/$dont_tb_re $start_from_now $semi_auto $networking $debug_level/;
49 my $dont_really_ping = 0;
50 my $package = "autotrack";
51 my $timestamp_file = "$blosxom::plugin_state_dir/.$package.timestamp";
56 my ($level, @msg) = @_;
58 if ($debug_level >= $level) {
59 print STDERR "$package debug $level: @msg\n";
67 s/([^a-zA-Z0-9])/sprintf("%%%02x",ord($1))/eg;
75 if ($networking =~ m:curl:) {
76 return `$networking -m 30 -s $url`;
77 } elsif ($networking =~ m:wget:) {
78 return `$networking --quiet -O - $url`;
79 } elsif ($networking eq 'LWP') {
80 foreach (qw/LWP::UserAgent HTTP::Request::Common/) {
83 debug(0, "Can't load $_, can't use LWP networking");
87 my $ua = LWP::UserAgent->new;
88 my $res = $ua->request(HTTP::Request::Common::GET $url);
89 if (!$res->is_success) {
90 my $error = $res->status_line;
91 debug(0, "HTTP GET error: $error");
96 debug(0, "ERROR: invalid \$networking $networking");
101 my ($url, %vars) = @_;
103 if ($networking =~ m:curl:) {
104 my $command = "$networking -m 30 -s ";
105 $command .= join ' ',
106 map {my $v = url_escape($vars{$_}); "-d $_=$v";} keys %vars;
108 debug(2, "Posting with :$command:");
110 unless $dont_really_ping;
111 return "<error>0</error>"; # for testing
112 } elsif ($networking =~ m:wget:) {
113 my $command = "$networking --quiet -O - --post-data='";
114 $command .= join '&',
115 map {my $v = url_escape($vars{$_}); "$_=$v"} keys %vars;
116 $command .= "' $url";
117 debug(2, "Posting with :$command:");
119 unless $dont_really_ping;
120 return "<error>0</error>"; # for testing
121 } elsif ($networking eq 'LWP') {
122 foreach (qw/LWP::UserAgent HTTP::Request::Common/) {
125 debug(0, "Can't load $_, can't use LWP networking");
129 my $ua = LWP::UserAgent->new;
130 my $res = $ua->request(HTTP::Request::Common::POST($url, [%vars]));
131 if (!$res->is_success) {
132 my $error = $res->status_line;
133 debug(0, "HTTP POST error: $error");
136 return $res->content;
138 debug(0, "ERROR: invalid \$networking $networking");
142 sub get_trackback_url {
145 return undef if ($url =~ m:$dont_tb_re:);
146 my $text = GET($url);
147 return undef if (!defined($text));
149 while ($text =~ m!(<rdf:RDF.*?</rdf:RDF>)!msg) {
151 my ($id) = ($rdf =~ m!dc:identifier="([^\"]+)"!);
152 next unless ($id eq $url);
153 my ($tb) = ($rdf =~ m!trackback:ping="([^\"]+)"!);
154 return $tb if defined $tb;
156 if ($url =~ m:(.*)\#:) {
158 while ($text =~ m!(<rdf:RDF.*?</rdf:RDF>)!msg) {
160 # XXX is this good enough? Can't the namespace IDs be different?
161 # the sample code in the spec @
162 # http://www.movabletype.org/docs/mttrackback.html
164 my ($id) = ($rdf =~ m!dc:identifier="([^\"]+)"!);
165 next unless ($id eq $url);
166 my ($tb) = ($rdf =~ m!trackback:ping="([^\"]+)"!);
167 return $tb if defined $tb;
170 debug(2, "Couldn't find tb url for $url");
175 my ($tb_url, $title, $excerpt, $url) = @_;
177 my $txt = POST($tb_url, title => $title, url => $url, excerpt => $excerpt,
178 blog_name => $blosxom::blog_title);
179 debug(3, "Response:$txt:");
180 if ($txt =~ m:<error>(.*?)</error>:ms) {
183 $txt =~ m:<message>(.*)</message>:ms;
184 debug(0, "Error $errcode ($1) pinging $tb_url");
189 debug(0, "Malformed response while pinging $tb_url");
196 # XXX options to use plaintext or foreshortened plugins
198 $story =~ s:<.+?>::msg;
199 $story = substr($story, 0, 255);
206 return 0 unless $networking;
207 return 0 if ($semi_auto && !param('autotrack'));
208 # XXX there are at least two different race conditions here
209 # 1: two instances running at the same time could both see an old
211 # 2: an instance run at the same time a new story is published could
212 # (at least if there's any clock skew at all) create a timestamp
213 # file >= timestamp(story), for a story that isn't seen.
214 $last_timestamp = -e $timestamp_file ? stat($timestamp_file)->mtime :
215 ($start_from_now ? time : 0);
216 my $fh = new FileHandle;
217 if (!$fh->open("> $timestamp_file")) {
218 debug(0, "Couldn't touch timestamp file $timestamp_file");
223 debug(1, "autotrack enabled");
229 my ($pkg, $files_ref) = @_;
237 my ($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
238 my ($pathname) = "$blosxom::datadir$path/$filename.$blosxom::file_extension";
239 return 1 if ($files->{$pathname} < $last_timestamp);
240 my (%pinged, $ping_tries, $ping_succeed);
241 my $excerpt = make_excerpt($$body_ref);
242 my $url = "$blosxom::url$path/$filename.writeback";
243 defined $meta::tb_ping and
245 ping_trackback($meta::tb_ping, $$title_ref, $excerpt, $url) and
247 ++$pinged{$meta::tb_ping};
248 return 1 if (defined $meta::autotrack && $meta::autotrack eq 'no');
255 my $trackback_url = get_trackback_url(decode_entities($+));
256 next unless defined $trackback_url;
257 next if $pinged{$trackback_url};
259 ping_trackback($trackback_url, $$title_ref, $excerpt, $url) and
261 ++$pinged{trackback_url};
262 debug(1, "autotracked: $trackback_url");
265 # XXX what do we do if some succeed and some fail?
266 # If we tried some but they all failed, revert the timestamp to
268 if ($ping_succeed == 0 && $ping_tries > 0) {
269 debug(0, "All pings failed, reverting timestamp");
270 utime($last_timestamp, $last_timestamp, $timestamp_file);
278 Blosxom Plug-in: autotrack
282 Automatically or semi-automatically sends trackback pings for new stories.
292 Todd Larason <jtl@molehill.org> http://molelog.molehill.org/
294 This plugin is now maintained by the Blosxom Sourceforge Team,
295 <blosxom-devel@lists.sourceforge.net>.
299 None known; please send bug reports and feedback to the Blosxom
300 development mailing list <blosxom-devel@lists.sourceforge.net>.
302 =head1 Trackback Ping URL Discovery
304 Trackback Ping URLs are discovered two different ways.
306 =head2 Manual Ping URLs
308 If you have the meta plugin installed, and have it set to run prior to the
309 autotrack plugin, you can give a trackback url with the "meta-tb_ping"
310 header; the value of the header should be the ping URL to ping.
312 =head2 Automatic Ping URL detection
314 Subject to some exceptions explained below, every URL given in an 'href' in
315 the story is fetched, and the resulting content is searched for embedded RDF
316 sections giving trackback URLs for the given URL. This is the preferred way
317 for all tools to be given trackback URLs, as it requires no human
318 intervention, but unfortunately not everyone which has a trackback server
319 includes the appropriate RDF. Even more unfortunately, there's no easy
320 way to know whether it's included or not, other than examining the source
323 It's always safe to give a meta-tb_ping header; if you give one, and the
324 same ping URL is found by autodiscovery, it's only pinged once.
326 If you don't want autodiscovery to be used for a given story, you can set
327 the meta header 'meta-autotrack' to 'no'. If "meta-autotrack: no" is given,
328 the meta-tb_ping URL is still pinged if it's specified.
332 =head2 Configuration Variables
334 C<$dont_tb_re> is a regular expression agains which URLs are matched;
335 if it matches, the URL isn't fetched for autodiscovery; this is useful
336 for classes of URLs that you link to frequently that you know don't
337 include the autodiscovery RDF, or that you don't wish to be pinged. The
338 default value matches Amazon and Google URLs, as well as references to
341 C<$start_from_now> is a boolean that controls the behavior if the timestamp
342 file doesn't exist; if it's true, then it's treated as if it does exist,
343 with the current time -- no old articles are pinged. If it's false, then
344 every story seen is treated as new. Defaults to true.
346 C<$semi_auto> is a boolean controlling how automatic the pinging is. If
347 it's false, then the plugin acts in fully automatic mode -- it's always
348 enabled, and any new story is examined. If it's true, then the plugin
349 acts in semi-automatic mode -- it's only enabled if the URL being browsed
350 includes the paramater "autotrack" (ie, ends with "?autotrack=yes"). By
351 default, this is true.
353 C<$networking> controls which networking implementation to use. If set to
354 "LWP", an implementation which uses the common LWP (libwww-for-perl) perl
355 module set is used; if set to a string that includes the word 'curl', an
356 implementation which uses the external 'curl' utility is used, and the value
357 of $networking is used as the beginning of the command line (this can be used
358 to specify a full path to curl or to pass additional arguments); if set
359 to a string which includes the word 'wget', an implementation which uses the
360 external 'wget' utility is used with $networking used at the beginning of
361 the command line as with curl. The wget executable must be new enough to
362 include the --post-data option; currently, that means a recent 1.9 beta.
365 C<$debug_level> is an int from 0 to 5 controlling how much debugging output
366 is logged; 0 logs only errors. Defaults to 1.
368 =head2 CSS and Flavour Files
370 There is no output, so no customization through these methods.
374 A timestamp file is kept as $plugin_state_dir/.autotrack.timestamp; stories
375 are considered 'new' if their timestamp is later than the timestamp file
376 (see the C<$start_from_now> variable for the behavior if the file doesn't
377 exist). There is a small race condition between reading the timestamp
378 file and updating it when the plugin is enabled; one advantage of semi-
379 automatic mode is that this is rarely a problem, since the plugin is only
380 enabled when you want it to be.
382 If trackback pings are attempted but they all fail, the timestamp file is
383 reverted to its previous value, so the pings will be tried again later. if
384 some pings succeed and others fail, however, the timestamp is left with the
385 updated values, and the failed pings won't be retried.
389 * Rael Dornfest -- blosxom (of course) and suggesting $start_from_now option
390 * Taper Wickel -- pointing out wget 1.9's post support
395 Copyright 2003, Todd Larason
397 (This license is the same as Blosxom's)
399 Permission is hereby granted, free of charge, to any person obtaining a
400 copy of this software and associated documentation files (the "Software"),
401 to deal in the Software without restriction, including without limitation
402 the rights to use, copy, modify, merge, publish, distribute, sublicense,
403 and/or sell copies of the Software, and to permit persons to whom the
404 Software is furnished to do so, subject to the following conditions:
406 The above copyright notice and this permission notice shall be included
407 in all copies or substantial portions of the Software.
409 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
410 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
411 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
412 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
413 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
414 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
415 OTHER DEALINGS IN THE SOFTWARE.