X-Git-Url: https://git.stderr.nl/gitweb?p=matthijs%2Fupstream%2Fblosxom-plugins.git;a=blobdiff_plain;f=general%2Ffind;fp=general%2Ffind;h=7fd1c37f4c93abb463e2066ce9d335d28edb77b7;hp=0000000000000000000000000000000000000000;hb=bdbd50ac8c30082886b3f29e88bfdeb8f088dee6;hpb=eeb750d0481d0cb8e27a7ccef3d39254306e7ce4 diff --git a/general/find b/general/find new file mode 100644 index 0000000..7fd1c37 --- /dev/null +++ b/general/find @@ -0,0 +1,353 @@ +# Blosxom Plugin: Find +# Author: Fletcher T. Penney +# advanced search concept and keywords code by Eric Sherman +# Recent Searches feature based on code by Marc Nozell +# Version: 0.9 +package find; + +# --- Configurable variables ----- +# None yet - may add ability to change search behaviors, such as +# always starting at the root level of your datadir while staying in a subdirectory + +$keywords_tag = "meta-keywords:" unless defined $keywords_tag; + +$do_local_search = 0; # Perform search relative to the current page, + # not the whole site. If set to 1, this will + # override the advanced search option + +$show_advanced = 0; # Set to 1 to always show the advanced form + +$show_debug = 0; # display more info about search terms for debugging + +$default_to_and = 0; # Set to 1 to always do "and" searches by default + +$match_whole_words = 0; # Set to 1 to only match whole words by default + +$log_searches = 1; # Log search queries to a file? +$log_filename = "$blosxom::plugin_state_dir/queries"; # Where should I log? + + +$max_previous_searches = 10; # Maximum old queries to display + +$search_writebacks = 1; # Should I also search writebacks? +$writeback_dir = "$blosxom::plugin_state_dir/writeback"; +$writeback_ext = "wb"; + + +$search_filenames = 1; # Should I also search filenames? + +# -------------------------------- + + +$results = ""; +$recentsearches = ""; + +use CGI qw/:standard/; + + +sub start { + + + # Figure out the current path and flavour for the form + $path_withflavour = $blosxom::path_info; + if ($path_withflavour !~ s/\.[^\.]*$//) { + $path_withflavour =~ s/\/$//; + $path_withflavour .= "\/index"; + $path_withflavour =~ s/^([^\/])/$1/; + } + $path_withflavour =~ s/^\/*//; + $path_withflavour.="\.$blosxom::flavour"; + + # Insert this html code only if advanced form is indicated + $advancedform = qq!
Search:
+Entire Site +This Topic Only +
Match:
+Any +All +
+Partial +Whole Words only +! + if ((param('advanced_search')) || $show_advanced); + + # This is the basic form + +$searchform = qq!
+
+ + + + +
+ Advanced Search +$advancedform +
!; + + 1; +} + +sub filter { + # Check that writebacks are working + #$search_writebacks = 0 if ( $writeback::writeback_dir eq ""); + + my ($pkg, $files_ref) = @_; + my @files_list = keys %$files_ref; + if (param('find')) { + my $terms = param('find'); + $searchpath = "$blosxom::datadir/" . param('path'); + $do_local_search = 1 if (param('domain') eq "topic"); + + $match_whole_words =1 if (param('match') eq 'whole'); + $match_whole_words =0 if (param('match') eq 'any'); + + my $searchtype = param('type'); + $default_to_and = 0 if ($searchtype eq 'any'); + + my @requiredterms, @forbiddenterms; + + + $results = "These pages matched: $terms"; + + if ($log_searches eq 1) { + if ( !-e $log_filename ) { + open (LOG, ">>$log_filename"); + chmod (0666, "$log_filename"); + } else { + open (LOG, ">>$log_filename") or warn "Error in find logging file." + } + print LOG "$terms\n"; + close (LOG); + } + + $terms = " " . $terms; # Add a space for pattern matching reasons + + + # Handle double quotations (exact phrases) + $terms =~ s/\"([^\"]+)\"/\[\{$1\}\]/g; + while ($terms =~ s/\[\{([^\}]*)\s+([^\}]*)/\[\{$1\\s\+$2/g) { + } + $terms =~ s/\[\{/(/g; + $terms =~ s/\}\]/)/g; + # Any left over quotes were "odd-numbered" + $terms =~ s/\"//g; + + # Handle parentheses + while ($terms =~ s/\(([^\)]*)\s+([^\)]*)\)/\($1\|$2\)/g) { + } + + # Strip trailing spaces to prevent empty terms + # Don't strip leading spaces yet! + $terms =~ s/\s+$//; + + # Convert English to symbols + # The "OR"'s will wait til the end + # Handle "NOT"'s + $terms =~ s/\s+not\s+/ \-/ig; + # Handle "AND"'s and convert to "+", unless preceded by "-" + $terms =~ s/\s+(\([^\)]+\))\s+and\s+/ \+$1 \+/ig; + $terms =~ s/\-(\([^\)]+\))\s+and\s+/\-$1 \+/ig; + $terms =~ s/\s+([^\)]+)\s+and\s+/ \+$1 \+/ig; + $terms =~ s/\-([^\)]+)\s+and\s+/\-$1 \+/ig; + $terms =~ s/\+\-/\-/g; # Fix if the second term already had "-" + + $results = "These pages matched: $terms" if ($show_debug eq 1); + + # If doing "all" search, then every term is required + # Will not override terms already set to "NOT" + $terms =~ s/\s+\+?([\(\)\|\w]+)/ \+$1/g if (($searchtype eq "all") || ($default_to_and eq 1)); + + # Extract all required terms ("AND" terms) + while ($terms =~ s/\s+\+([\(\)\|\\\+\w]+)//){ + $theterm = $1; + $theterm = "\\b$theterm\\b" if ($match_whole_words eq 1); + push(@requiredterms,$theterm); + $results.="
Required Term: $theterm" if ($show_debug eq 1); + } + + # Extract all "forbidden" terms ("NOT" terms) + while ($terms =~ s/\s+\-([\(\)\|\\\+\w]+)//){ + $theterm = $1; + $theterm = "\\b$theterm\\b" if ($match_whole_words eq 1); + push(@forbiddenterms,$theterm); + $results.="
Forbidden Term: $theterm" if ($show_debug eq 1); + } + + # Strip "OR"'s with only one term + while ($terms =~ s/^\s*or\s+//i) {} + while ($terms =~ s/\s+or\s*$//i) {} + + + # Now cleanup for regexp's + $terms =~ s/^\s+//; #Strip leading and trailing spaces + $terms =~ s/\s+$//; + # Finally, convert all the "OR" terms to a single regexp + $terms =~ s/\s+(or\s+)?/\|/ig; + $terms =~ s/(\s)\+/$1/g; # Loose '+' will crash regexp + + # Handle whole word matching on remainder + $terms = "\\b$terms\\b" if ($match_whole_words eq 1); + + # Debugging Aids + $results.="
Remainder regexp: $terms
" if ($show_debug eq 1); + $results.="Search path: $searchpath
" if ($show_debug eq 1); + + # Quit now if nothing to search for + if (($terms eq "") & (scalar(@requiredterms) eq 0) & (scalar(@forbiddenterms) eq 0)) { + $results = ""; + return 0; + } + + foreach $file (@files_list) { + # next; # Enable this line to debug terms parsing only + if ($do_local_search eq 1) { + # Limit search to the current path only + if ($file !~ /^$searchpath/) { + delete $files_ref->{$file}; + next; + } + } + my $keep = 0; + my $delete = 0; + open (FILE, "<$file") or next; + my $contents = ""; + my $pastHeader = 0; + while ($line = ) { + if (!$pastHeader) { + # include keywords + if ($line =~ /^$keywords_tag/i) { + $line =~ s/^$keywords_tag(.*)$/\1/; + } + # don't read other meta- tags + elsif ($line =~ /^meta-/i) { + next; + } + # if reached the header, say so + elsif ($line =~ /^\s.*$/) { + $pastHeader = 1; + } + } + $contents .= $line; + } + + close (FILE); + + # Now scan writebacks for this story + if ( $search_writebacks == 1) { + my $writeback_file = $file; + $writeback_file =~ s/$blosxom::datadir/$writeback_dir/; + $writeback_file =~ s/$blosxom::file_extension$/$writeback_ext/; + + if (open (FILE, "<$writeback_file")) { + while ($line = ) { + # We'll just appened writebacks to the story + $contents .= $line; + } + close (FILE); + } + } + + # If searching filenames, append that to the story for + # searching as well + + if ($search_filenames == 1) { + $contents.=$file; + } + + # If we match any "OR" terms flag file for keeping + $keep = 1 if ($contents =~ /$terms/si); + + # If we match required terms, keep, else delete for sure + foreach (@requiredterms) { + if ($contents =~ /$_/si) { + $keep =1; + } else { + $delete = 1; + } + } + + # If we match forbidden terms, then delete + foreach (@forbiddenterms) { + if ($contents =~ /$_/si) { + $delete =1; + } + } + + # Remove file if marked for delete or not marked to keep + delete $files_ref->{$file} if (($delete eq 1) or ($keep eq 0)); + } + } + + 1; +} + +sub getrecentsearches { + if ( open(LOG, "< $log_filename")) { + my @searches = ; + close(LOG); + @searches = reverse(@searches); + $recentsearches = "
    "; + for ($count = 0; $count < $max_previous_searches; $count++) { + $recentsearches .= '
  • ' . $searches[$count] . '
  • '; + } + $recentsearches .= "
"; + } else { + warn "Couldn't open $log_filename: $!\n" if ($log_searches == 1); + } + 1; +} + +sub head { + getrecentsearches(); + 1; +} + + +1; + +__END__ + +=head1 NAME + +Blosxom Plug-in: find + +=head1 DESCRIPTION + +Find searches through the available articles and filters out those that do not match the submitted search terms. To use it, simply place $find::searchform in your template, and it will create a search box that automatically calls the search routine. It performs a boolean "OR" search by default, or you can use regular expressions for more complicated search terms. + +This plugin is capable of handling the following search terms + +term1 term2; term1 or term2 + These match any page with term1 OR term2 + +term1 and term2; +term1 +term2 + These match any page with both term1 AND term2 + +term1 not term2; term1 -term2 + This matches pages with term1 that DO NOT contain term2 + +term1 not (term2 term3) + This matches pages with term1 that DO NOT contain term2 OR term3 + +"term1 term2 term3" + This matches the exact phrase, term1 term2 term3 + +" pen " + This will match the word "pen", but not the word "pencil". + +You can also use regular expressions within your search terms to further refine your searches, creating a very powerful search engine. + +Additionally, you can include the most recent search requests in your blog. Add $find::recentsearches in your template. By default, the last 10 searches will be shown in an unordered list. You can change $max_previous_searches to alter the number displayed. + +=head1 AUTHORS + +Fletcher T. Penney - http://fletcher.freeshell.org + +Eric Sherman +Marc Nozell http://www.nozell.com/blog + +=head1 LICENSE + +This source is submitted to the public domain. Feel free to use and modify it. If you like, a comment in your modified source attributing credit for my original work would be appreciated. + +THIS SOFTWARE IS PROVIDED AS IS AND WITHOUT ANY WARRANTY OF ANY KIND. USE AT YOUR OWN RISK!