2 # Author: Fletcher T. Penney
3 # advanced search concept and keywords code by Eric Sherman
4 # Recent Searches feature based on code by Marc Nozell
8 # --- Configurable variables -----
9 # None yet - may add ability to change search behaviors, such as
10 # always starting at the root level of your datadir while staying in a subdirectory
12 $keywords_tag = "meta-keywords:" unless defined $keywords_tag;
14 $do_local_search = 0; # Perform search relative to the current page,
15 # not the whole site. If set to 1, this will
16 # override the advanced search option
18 $show_advanced = 0; # Set to 1 to always show the advanced form
20 $show_debug = 0; # display more info about search terms for debugging
22 $default_to_and = 0; # Set to 1 to always do "and" searches by default
24 $match_whole_words = 0; # Set to 1 to only match whole words by default
26 $log_searches = 1; # Log search queries to a file?
27 $log_filename = "$blosxom::plugin_state_dir/queries"; # Where should I log?
30 $max_previous_searches = 10; # Maximum old queries to display
32 $search_writebacks = 1; # Should I also search writebacks?
33 $writeback_dir = "$blosxom::plugin_state_dir/writeback";
34 $writeback_ext = "wb";
37 $search_filenames = 1; # Should I also search filenames?
39 # --------------------------------
45 use CGI qw/:standard/;
51 # Figure out the current path and flavour for the form
52 $path_withflavour = $blosxom::path_info;
53 if ($path_withflavour !~ s/\.[^\.]*$//) {
54 $path_withflavour =~ s/\/$//;
55 $path_withflavour .= "\/index";
56 $path_withflavour =~ s/^([^\/])/$1/;
58 $path_withflavour =~ s/^\/*//;
59 $path_withflavour.="\.$blosxom::flavour";
61 # Insert this html code only if advanced form is indicated
62 $advancedform = qq!<br />Search:<br />
63 <input checked type="radio" name="domain" value="all" />Entire Site
64 <input type="radio" name="domain" value="topic" />This Topic Only
66 <input checked="checked" type="radio" name="type" value="any" />Any
67 <input type="radio" name="type" value="all" />All
69 <input checked="checked" type="radio" name="match" value="any" />Partial
70 <input type="radio" name="match" value="whole" />Whole Words only
72 if ((param('advanced_search')) || $show_advanced);
74 # This is the basic form
76 $searchform = qq!<form method="get" action="$blosxom::url/index.$blosxom::flavour">
78 <input type="text" name="find" size="15" value=""/>
79 <input type="submit" value="Search" />
80 <input type="hidden" name="plugin" value="find"/>
81 <input type="hidden" name="path" value="$blosxom::path_info"/>
83 <a href="$blosxom::url/$path_withflavour?advanced_search=1">Advanced Search</a>
91 # Check that writebacks are working
92 #$search_writebacks = 0 if ( $writeback::writeback_dir eq "");
94 my ($pkg, $files_ref) = @_;
95 my @files_list = keys %$files_ref;
97 my $terms = param('find');
98 $searchpath = "$blosxom::datadir/" . param('path');
99 $do_local_search = 1 if (param('domain') eq "topic");
101 $match_whole_words =1 if (param('match') eq 'whole');
102 $match_whole_words =0 if (param('match') eq 'any');
104 my $searchtype = param('type');
105 $default_to_and = 0 if ($searchtype eq 'any');
107 my @requiredterms, @forbiddenterms;
110 $results = "These pages matched: $terms";
112 if ($log_searches eq 1) {
113 if ( !-e $log_filename ) {
114 open (LOG, ">>$log_filename");
115 chmod (0666, "$log_filename");
117 open (LOG, ">>$log_filename") or warn "Error in find logging file."
119 print LOG "$terms\n";
123 $terms = " " . $terms; # Add a space for pattern matching reasons
126 # Handle double quotations (exact phrases)
127 $terms =~ s/\"([^\"]+)\"/\[\{$1\}\]/g;
128 while ($terms =~ s/\[\{([^\}]*)\s+([^\}]*)/\[\{$1\\s\+$2/g) {
130 $terms =~ s/\[\{/(/g;
131 $terms =~ s/\}\]/)/g;
132 # Any left over quotes were "odd-numbered"
136 while ($terms =~ s/\(([^\)]*)\s+([^\)]*)\)/\($1\|$2\)/g) {
139 # Strip trailing spaces to prevent empty terms
140 # Don't strip leading spaces yet!
143 # Convert English to symbols
144 # The "OR"'s will wait til the end
146 $terms =~ s/\s+not\s+/ \-/ig;
147 # Handle "AND"'s and convert to "+", unless preceded by "-"
148 $terms =~ s/\s+(\([^\)]+\))\s+and\s+/ \+$1 \+/ig;
149 $terms =~ s/\-(\([^\)]+\))\s+and\s+/\-$1 \+/ig;
150 $terms =~ s/\s+([^\)]+)\s+and\s+/ \+$1 \+/ig;
151 $terms =~ s/\-([^\)]+)\s+and\s+/\-$1 \+/ig;
152 $terms =~ s/\+\-/\-/g; # Fix if the second term already had "-"
154 $results = "These pages matched: $terms" if ($show_debug eq 1);
156 # If doing "all" search, then every term is required
157 # Will not override terms already set to "NOT"
158 $terms =~ s/\s+\+?([\(\)\|\w]+)/ \+$1/g if (($searchtype eq "all") || ($default_to_and eq 1));
160 # Extract all required terms ("AND" terms)
161 while ($terms =~ s/\s+\+([\(\)\|\\\+\w]+)//){
163 $theterm = "\\b$theterm\\b" if ($match_whole_words eq 1);
164 push(@requiredterms,$theterm);
165 $results.="<br>Required Term: $theterm" if ($show_debug eq 1);
168 # Extract all "forbidden" terms ("NOT" terms)
169 while ($terms =~ s/\s+\-([\(\)\|\\\+\w]+)//){
171 $theterm = "\\b$theterm\\b" if ($match_whole_words eq 1);
172 push(@forbiddenterms,$theterm);
173 $results.="<br>Forbidden Term: $theterm" if ($show_debug eq 1);
176 # Strip "OR"'s with only one term
177 while ($terms =~ s/^\s*or\s+//i) {}
178 while ($terms =~ s/\s+or\s*$//i) {}
181 # Now cleanup for regexp's
182 $terms =~ s/^\s+//; #Strip leading and trailing spaces
184 # Finally, convert all the "OR" terms to a single regexp
185 $terms =~ s/\s+(or\s+)?/\|/ig;
186 $terms =~ s/(\s)\+/$1/g; # Loose '+' will crash regexp
188 # Handle whole word matching on remainder
189 $terms = "\\b$terms\\b" if ($match_whole_words eq 1);
192 $results.="<br>Remainder regexp: $terms<br>" if ($show_debug eq 1);
193 $results.="Search path: $searchpath <br>" if ($show_debug eq 1);
195 # Quit now if nothing to search for
196 if (($terms eq "") & (scalar(@requiredterms) eq 0) & (scalar(@forbiddenterms) eq 0)) {
201 foreach $file (@files_list) {
202 # next; # Enable this line to debug terms parsing only
203 if ($do_local_search eq 1) {
204 # Limit search to the current path only
205 if ($file !~ /^$searchpath/) {
206 delete $files_ref->{$file};
212 open (FILE, "<$file") or next;
215 while ($line = <FILE>) {
218 if ($line =~ /^$keywords_tag/i) {
219 $line =~ s/^$keywords_tag(.*)$/\1/;
221 # don't read other meta- tags
222 elsif ($line =~ /^meta-/i) {
225 # if reached the header, say so
226 elsif ($line =~ /^\s.*$/) {
235 # Now scan writebacks for this story
236 if ( $search_writebacks == 1) {
237 my $writeback_file = $file;
238 $writeback_file =~ s/$blosxom::datadir/$writeback_dir/;
239 $writeback_file =~ s/$blosxom::file_extension$/$writeback_ext/;
241 if (open (FILE, "<$writeback_file")) {
242 while ($line = <FILE>) {
243 # We'll just appened writebacks to the story
250 # If searching filenames, append that to the story for
253 if ($search_filenames == 1) {
257 # If we match any "OR" terms flag file for keeping
258 $keep = 1 if ($contents =~ /$terms/si);
260 # If we match required terms, keep, else delete for sure
261 foreach (@requiredterms) {
262 if ($contents =~ /$_/si) {
269 # If we match forbidden terms, then delete
270 foreach (@forbiddenterms) {
271 if ($contents =~ /$_/si) {
276 # Remove file if marked for delete or not marked to keep
277 delete $files_ref->{$file} if (($delete eq 1) or ($keep eq 0));
284 sub getrecentsearches {
285 if ( open(LOG, "< $log_filename")) {
286 my @searches = <LOG>;
288 @searches = reverse(@searches);
289 $recentsearches = "<ul>";
290 for ($count = 0; $count < $max_previous_searches; $count++) {
291 $recentsearches .= '<li>' . $searches[$count] . '</li>';
293 $recentsearches .= "</ul>";
295 warn "Couldn't open $log_filename: $!\n" if ($log_searches == 1);
312 Blosxom Plug-in: find
316 Find searches through the available articles and filters out those that do not match the submitted search terms. To use it, simply place $find::searchform in your template, and it will create a search box that automatically calls the search routine. It performs a boolean "OR" search by default, or you can use regular expressions for more complicated search terms.
318 This plugin is capable of handling the following search terms
320 term1 term2; term1 or term2
321 These match any page with term1 OR term2
323 term1 and term2; +term1 +term2
324 These match any page with both term1 AND term2
326 term1 not term2; term1 -term2
327 This matches pages with term1 that DO NOT contain term2
329 term1 not (term2 term3)
330 This matches pages with term1 that DO NOT contain term2 OR term3
333 This matches the exact phrase, term1 term2 term3
336 This will match the word "pen", but not the word "pencil".
338 You can also use regular expressions within your search terms to further refine your searches, creating a very powerful search engine.
340 Additionally, you can include the most recent search requests in your blog. Add $find::recentsearches in your template. By default, the last 10 searches will be shown in an unordered list. You can change $max_previous_searches to alter the number displayed.
344 Fletcher T. Penney - http://fletcher.freeshell.org
346 Eric Sherman <enkidu@enkidu.bloggedup.com>
347 Marc Nozell <marc@nozell.com> http://www.nozell.com/blog
349 This plugin is now maintained by the Blosxom Sourceforge Team,
350 <blosxom-devel@lists.sourceforge.net>.
354 This source is submitted to the public domain. Feel free to use and modify it. If you like, a comment in your modified source attributing credit for my original work would be appreciated.
356 THIS SOFTWARE IS PROVIDED AS IS AND WITHOUT ANY WARRANTY OF ANY KIND. USE AT YOUR OWN RISK!