1 # Blosxom Plugin: autotrack -*- cperl -*-
2 # Author: Todd Larason (jtl@molehill.org)
4 # Blosxom Home/Docs/Licensing: http://www.raelity.org/blosxom
5 # AutoTrack plugin Home/Docs/Licensing:
6 # http://molelog.molehill.org/blox/Computers/Internet/Web/Blosxom/AutoTrack/
9 # -------------- Configuration Variables --------------
11 # regular expression matching URLs not to trackback
12 # there's no reason to try google or amazon, and most people don't
13 # want to trackback their own stories
14 $dont_tb_re = qr!(?:http://(?:
15 (?: www\.google\.com ) |
16 (?: www\.amazon\.com )
19 (?: $blosxom::url )!ix
20 unless defined $dont_tb_re;
22 # what to do if the timestamp file doesn't exist? if $start_from_now is
23 # set, autotrack future stories but not old ones
24 $start_from_now = 1 unless defined $start_from_now;
26 # how automatic? if $semi_auto is set, only send trackbacks if ?autotrack=yes
27 # otheriwse, fully automatic
28 $semi_auto = 1 unless defined $semi_auto;
30 # networking implementation to be used
31 # can be 'LWP', 'curl' or 'wget' (or a full pathname to a curl or wget
33 # wget must be at least a 1.9 beta to support the --post-data option
34 $networking = "LWP" unless defined $networking;
37 unless defined $debug_level;
39 # -----------------------------------------------------
41 # template-visible vars
47 use vars qw/$dont_tb_re $start_from_now $semi_auto $networking $debug_level/;
49 my $dont_really_ping = 0;
50 my $package = "autotrack";
51 my $timestamp_file = "$blosxom::plugin_state_dir/.$package.timestamp";
56 my ($level, @msg) = @_;
58 if ($debug_level >= $level) {
59 print STDERR "$package debug $level: @msg\n";
67 s/([^a-zA-Z0-9])/sprintf("%%%02x",ord($1))/eg;
75 if ($networking =~ m:curl:) {
76 return `$networking -m 30 -s $url`;
77 } elsif ($networking =~ m:wget:) {
78 return `$networking --quiet -O - $url`;
79 } elsif ($networking eq 'LWP') {
80 foreach (qw/LWP::UserAgent HTTP::Request::Common/) {
83 debug(0, "Can't load $_, can't use LWP networking");
87 my $ua = LWP::UserAgent->new;
88 my $res = $ua->request(HTTP::Request::Common::GET $url);
89 if (!$res->is_success) {
90 my $error = $res->status_line;
91 debug(0, "HTTP GET error: $error");
96 debug(0, "ERROR: invalid \$networking $networking");
101 my ($url, %vars) = @_;
103 if ($networking =~ m:curl:) {
104 my $command = "$networking -m 30 -s ";
105 $command .= join ' ',
106 map {my $v = url_escape($vars{$_}); "-d $_=$v";} keys %vars;
108 debug(2, "Posting with :$command:");
110 unless $dont_really_ping;
111 return "<error>0</error>"; # for testing
112 } elsif ($networking =~ m:wget:) {
113 my $command = "$networking --quiet -O - --post-data='";
114 $command .= join '&',
115 map {my $v = url_escape($vars{$_}); "$_=$v"} keys %vars;
116 $command .= "' $url";
117 debug(2, "Posting with :$command:");
119 unless $dont_really_ping;
120 return "<error>0</error>"; # for testing
121 } elsif ($networking eq 'LWP') {
122 foreach (qw/LWP::UserAgent HTTP::Request::Common/) {
125 debug(0, "Can't load $_, can't use LWP networking");
129 my $ua = LWP::UserAgent->new;
130 my $res = $ua->request(HTTP::Request::Common::POST($url, [%vars]));
131 if (!$res->is_success) {
132 my $error = $res->status_line;
133 debug(0, "HTTP POST error: $error");
136 return $res->content;
138 debug(0, "ERROR: invalid \$networking $networking");
142 sub get_trackback_url {
145 return undef if ($url =~ m:$dont_tb_re:);
146 my $text = GET($url);
147 return undef if (!defined($text));
149 while ($text =~ m!(<rdf:RDF.*?</rdf:RDF>)!msg) {
151 my ($id) = ($rdf =~ m!dc:identifier="([^\"]+)"!);
152 next unless ($id eq $url);
153 my ($tb) = ($rdf =~ m!trackback:ping="([^\"]+)"!);
154 return $tb if defined $tb;
156 if ($url =~ m:(.*)\#:) {
158 while ($text =~ m!(<rdf:RDF.*?</rdf:RDF>)!msg) {
160 # XXX is this good enough? Can't the namespace IDs be different?
161 # the sample code in the spec @
162 # http://www.movabletype.org/docs/mttrackback.html
164 my ($id) = ($rdf =~ m!dc:identifier="([^\"]+)"!);
165 next unless ($id eq $url);
166 my ($tb) = ($rdf =~ m!trackback:ping="([^\"]+)"!);
167 return $tb if defined $tb;
170 debug(2, "Couldn't find tb url for $url");
175 my ($tb_url, $title, $excerpt, $url) = @_;
177 my $txt = POST($tb_url, title => $title, url => $url, excerpt => $excerpt,
178 blog_name => $blosxom::blog_title);
179 debug(3, "Response:$txt:");
180 if ($txt =~ m:<error>(.*?)</error>:ms) {
183 $txt =~ m:<message>(.*)</message>:ms;
184 debug(0, "Error $errcode ($1) pinging $tb_url");
189 debug(0, "Malformed response while pinging $tb_url");
196 # XXX options to use plaintext or foreshortened plugins
198 $story =~ s:<.+?>::msg;
199 $story = substr($story, 0, 255);
206 return 0 unless $networking;
207 return 0 if ($semi_auto && !param('autotrack'));
208 # XXX there are at least two different race conditions here
209 # 1: two instances running at the same time could both see an old
211 # 2: an instance run at the same time a new story is published could
212 # (at least if there's any clock skew at all) create a timestamp
213 # file >= timestamp(story), for a story that isn't seen.
214 $last_timestamp = -e $timestamp_file ? stat($timestamp_file)->mtime :
215 ($start_from_now ? time : 0);
216 my $fh = new FileHandle;
217 if (!$fh->open("> $timestamp_file")) {
218 debug(0, "Couldn't touch timestamp file $timestamp_file");
223 debug(1, "autotrack enabled");
229 my ($pkg, $files_ref) = @_;
237 my ($pkg, $path, $filename, $story_ref, $title_ref, $body_ref) = @_;
238 my ($pathname) = "$blosxom::datadir$path/$filename.$blosxom::file_extension";
239 return 1 if ($files->{$pathname} < $last_timestamp);
240 my (%pinged, $ping_tries, $ping_succeed);
241 my $excerpt = make_excerpt($$body_ref);
242 my $url = "$blosxom::url$path/$filename.writeback";
243 defined $meta::tb_ping and
245 ping_trackback($meta::tb_ping, $$title_ref, $excerpt, $url) and
247 ++$pinged{$meta::tb_ping};
248 return 1 if (defined $meta::autotrack && $meta::autotrack eq 'no');
255 my $trackback_url = get_trackback_url(decode_entities($+));
256 next unless defined $trackback_url;
257 next if $pinged{$trackback_url};
259 ping_trackback($trackback_url, $$title_ref, $excerpt, $url) and
261 ++$pinged{trackback_url};
262 debug(1, "autotracked: $trackback_url");
265 # XXX what do we do if some succeed and some fail?
266 # If we tried some but they all failed, revert the timestamp to
268 if ($ping_succeed == 0 && $ping_tries > 0) {
269 debug(0, "All pings failed, reverting timestamp");
270 utime($last_timestamp, $last_timestamp, $timestamp_file);
278 Blosxom Plug-in: autotrack
282 Automatically or semi-automatically sends trackback pings for new stories.
290 Todd Larason <jtl@molehill.org> http://molelog.molehill.org/
294 None known; address bug reports and comments to me or to the Blosxom
295 mailing list [http://www.yahoogroups.com/groups.blosxom].
297 =head1 Trackback Ping URL Discovery
299 Trackback Ping URLs are discovered two different ways.
301 =head2 Manual Ping URLs
303 If you have the meta plugin installed, and have it set to run prior to the
304 autotrack plugin, you can give a trackback url with the "meta-tb_ping"
305 header; the value of the header should be the ping URL to ping.
307 =head2 Automatic Ping URL detection
309 Subject to some exceptions explained below, every URL given in an 'href' in
310 the story is fetched, and the resulting content is searched for embedded RDF
311 sections giving trackback URLs for the given URL. This is the preferred way
312 for all tools to be given trackback URLs, as it requires no human
313 intervention, but unfortunately not everyone which has a trackback server
314 includes the appropriate RDF. Even more unfortunately, there's no easy
315 way to know whether it's included or not, other than examining the source
318 It's always safe to give a meta-tb_ping header; if you give one, and the
319 same ping URL is found by autodiscovery, it's only pinged once.
321 If you don't want autodiscovery to be used for a given story, you can set
322 the meta header 'meta-autotrack' to 'no'. If "meta-autotrack: no" is given,
323 the meta-tb_ping URL is still pinged if it's specified.
327 =head2 Configuration Variables
329 C<$dont_tb_re> is a regular expression agains which URLs are matched;
330 if it matches, the URL isn't fetched for autodiscovery; this is useful
331 for classes of URLs that you link to frequently that you know don't
332 include the autodiscovery RDF, or that you don't wish to be pinged. The
333 default value matches Amazon and Google URLs, as well as references to
336 C<$start_from_now> is a boolean that controls the behavior if the timestamp
337 file doesn't exist; if it's true, then it's treated as if it does exist,
338 with the current time -- no old articles are pinged. If it's false, then
339 every story seen is treated as new. Defaults to true.
341 C<$semi_auto> is a boolean controlling how automatic the pinging is. If
342 it's false, then the plugin acts in fully automatic mode -- it's always
343 enabled, and any new story is examined. If it's true, then the plugin
344 acts in semi-automatic mode -- it's only enabled if the URL being browsed
345 includes the paramater "autotrack" (ie, ends with "?autotrack=yes"). By
346 default, this is true.
348 C<$networking> controls which networking implementation to use. If set to
349 "LWP", an implementation which uses the common LWP (libwww-for-perl) perl
350 module set is used; if set to a string that includes the word 'curl', an
351 implementation which uses the external 'curl' utility is used, and the value
352 of $networking is used as the beginning of the command line (this can be used
353 to specify a full path to curl or to pass additional arguments); if set
354 to a string which includes the word 'wget', an implementation which uses the
355 external 'wget' utility is used with $networking used at the beginning of
356 the command line as with curl. The wget executable must be new enough to
357 include the --post-data option; currently, that means a recent 1.9 beta.
360 C<$debug_level> is an int from 0 to 5 controlling how much debugging output
361 is logged; 0 logs only errors. Defaults to 1.
363 =head2 CSS and Flavour Files
365 There is no output, so no customization through these methods.
369 A timestamp file is kept as $plugin_state_dir/.autotrack.timestamp; stories
370 are considered 'new' if their timestamp is later than the timestamp file
371 (see the C<$start_from_now> variable for the behavior if the file doesn't
372 exist). There is a small race condition between reading the timestamp
373 file and updating it when the plugin is enabled; one advantage of semi-
374 automatic mode is that this is rarely a problem, since the plugin is only
375 enabled when you want it to be.
377 If trackback pings are attempted but they all fail, the timestamp file is
378 reverted to its previous value, so the pings will be tried again later. if
379 some pings succeed and others fail, however, the timestamp is left with the
380 updated values, and the failed pings won't be retried.
384 * Rael Dornfest -- blosxom (of course) and suggesting $start_from_now option
385 * Taper Wickel -- pointing out wget 1.9's post support
390 Copyright 2003, Todd Larason
392 (This license is the same as Blosxom's)
394 Permission is hereby granted, free of charge, to any person obtaining a
395 copy of this software and associated documentation files (the "Software"),
396 to deal in the Software without restriction, including without limitation
397 the rights to use, copy, modify, merge, publish, distribute, sublicense,
398 and/or sell copies of the Software, and to permit persons to whom the
399 Software is furnished to do so, subject to the following conditions:
401 The above copyright notice and this permission notice shall be included
402 in all copies or substantial portions of the Software.
404 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
405 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
406 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
407 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
408 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
409 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
410 OTHER DEALINGS IN THE SOFTWARE.