X-Git-Url: https://git.stderr.nl/gitweb?a=blobdiff_plain;ds=sidebyside;f=usr%2Flocal%2Fbin%2Fupdate-stats;fp=usr%2Flocal%2Fbin%2Fupdate-stats;h=749fed9b11d2ef23f7b11417dac8de498d65328a;hb=9ca3934aa9d8a853188ed8c8eb5f86e038c933c8;hp=0000000000000000000000000000000000000000;hpb=8ff111dafeae1a2f2b19d7b90b4c113cc0ec1a46;p=matthijs%2Fservers%2Fdrsnuggles.git diff --git a/usr/local/bin/update-stats b/usr/local/bin/update-stats new file mode 100755 index 0000000..749fed9 --- /dev/null +++ b/usr/local/bin/update-stats @@ -0,0 +1,178 @@ +#!/usr/bin/python +# This script takes care of two things: +# * Generate lighttpd configuration that puts access logs for each subdomain +# into a separate file. +# * Generate awstats configuration files to parse each of these. +# * Run awstats to process all current logfiles or +# * When --after-logrotate is given, run awstats to process the just rotated +# logfiles. +# For the last part, it is assumed that logrotate is configured with dateext, +# without olddir and, until http://bugs.gentoo.org/106651 is fixed, with +# delaycompress. + +import os, sys, datetime, subprocess + +root_dir = '/data/www' +htdocs_dir = 'htdocs' +logs_dir = 'logs' +lighttpd_conf_file = '/etc/lighttpd/logging.conf' +# The directory with awstats configuration files +awstats_dir = '/etc/awstats' +# The template for each awstats configuration file. %s is replaced with the +# full domain name the configuration is for +awstats_config_file = 'awstats.%s.conf' +# Let each awstats config file include this file +awstats_common_file = os.path.join(awstats_dir, 'common.conf') +# Filename for the log files +log_file = 'access.log' +# Directory for domains we didn't find +other_dir = 'other' +awstats = '/usr/lib/cgi-bin/awstats.pl' +# Use sudo to run awstats as this user +awstats_user = 'www-data' +# The dateformat option as used by logrotate. This is the default. +dateformat = '-%Y%m%d' +# Lighttpd restart command +reload_lighttpd = 'invoke-rc.d lighttpd reload' + +header = """ +# This config file was autogenerated by the %s script. Do not change it +# directly, since it will be periodically regenerated. + +""" % sys.argv[0] + +lighttpd_conf = header +domains = {} + +for d in os.listdir(root_dir): + domain_htdocs_dir = os.path.join(root_dir, d, htdocs_dir) + # Require a dot in the domain name to filter out stuff like "template" or + # "php5-libs" and require the htdocs directory to exist. + if not '.' in d or not os.path.isdir(domain_htdocs_dir): + continue + + print "%s" % d + + # Make a dictionary of subdomains, containing a list of all aliases. + # Iterate all subdomains by looking into the htdocs directory. + subdomains = {} + def add_subdomain(sub, alias=None): + if (not sub in subdomains): subdomains[sub] = [] + if alias: subdomains[sub].append(alias) + + for dir in os.listdir(domain_htdocs_dir): + subdomain_htdocs_dir = os.path.join(domain_htdocs_dir, dir) + # Skip non-directories + if not os.path.isdir(subdomain_htdocs_dir): + continue + + # If the htdocs dir is a link, resolve it (only once!) + if os.path.islink(subdomain_htdocs_dir): + # Resolve the link to a full path + target = os.readlink(subdomain_htdocs_dir) + target = os.path.join(domain_htdocs_dir, target) + # Only resolve links that point within the same domain + if os.path.dirname(target) == domain_htdocs_dir: + target = os.path.basename(target) + print "\t\%s -> %s" % (dir, target) + + add_subdomain(target, dir) + continue + # If we get here, there was no resolvable link + add_subdomain(dir, dir) + + domains[d] = subdomains + + # Generate the lighttpd config file part for this domain + other_logfile = os.path.join(root_dir, d, logs_dir, other_dir, log_file) + lighttpd_conf += '$HTTP["host"] =~ ".%s$" {\n' % d + lighttpd_conf += '\t# Fallback logfile, in case none if the below conditionals match.\n' + lighttpd_conf += '\t# This can happen when a domain was added, but the %s script\n' % sys.argv[0] + lighttpd_conf += '\t# has not run yet\n' + lighttpd_conf += '\taccesslog.filename = "%s"\n' % other_logfile + + # Make sure the directory exists + if not os.path.isdir(os.path.dirname(other_logfile)): + os.makedirs(os.path.dirname(other_logfile)) + + for (s, aliases) in subdomains.items(): + print "\t%s" % s + + full_domain = "%s.%s" % (s, d) + subdomain_logfile = os.path.join(root_dir, d, logs_dir, s, log_file) + + # Generate the lighttpd config file part for this subdomain + print "\t\tGenerating lighttpd configuration" + if aliases != [s]: + # Don't use a regex if we don't need to. I think this should slightly + # speed up lighttpd. + aliases_regex = '|'.join(aliases) + lighttpd_conf += '\t$HTTP["host"] =~ "^(%s).%s$" {\n' % (aliases_regex, d) + else: + lighttpd_conf += '\t$HTTP["host"] == "%s.%s" {\n' % (s, d) + lighttpd_conf += '\t\taccesslog.filename = "%s"\n' % subdomain_logfile + lighttpd_conf += '\t}\n' + + # Only generate awstats configuration for real paths, not symlinks + awstats_conf = header + awstats_conf += 'LogFile="%s"\n' % subdomain_logfile + awstats_conf += 'SiteDomain="%s.%s"\n' % (s, d) + awstats_conf += 'HostAliases="%s"\n' % ' '.join(["%s.%s" % (s, d) for s in aliases]) + awstats_conf += 'Include "%s"\n' % awstats_common_file + + # Write out the awstats config file + subdomain_awstats_file = os.path.join(awstats_dir, awstats_config_file % full_domain) + print "\t\tWriting %s" % subdomain_awstats_file + f = open(subdomain_awstats_file , 'w') + f.write(awstats_conf) + + # Make sure the directory exists + if not os.path.isdir(os.path.dirname(subdomain_logfile)): + os.makedirs(os.path.dirname(subdomain_logfile)) + + lighttpd_conf += '}\n' + +# Write out the lighttpd configuration. Check if it has changed first, to +# prevent useless lighttpd reloads. +f = open(lighttpd_conf_file, 'r+') +if lighttpd_conf != f.read(): + print "Writing %s" % lighttpd_conf_file + f.seek(0) + f.truncate() + f.write(lighttpd_conf) + + # Reload lighttpd configuration + print "Reloading lighttpd: %s" % reload_lighttpd + subprocess.call(reload_lighttpd, shell=True) + +f.close() + +# Now, run awstats to parse log files. + +if len(sys.argv) > 1 and sys.argv[1] == '--after-logrotate': + # Logs have just been rotated, so update "todays" log. We make a guess at + # logrotate's date extension (which shouldn't be a guess, unless logrotate's + # dateformat was modified). + dateext = datetime.date.today().strftime(dateformat) +else: + dateext = '' + +for (d, subdomains) in domains.items(): + for (s, aliases) in subdomains.items(): + subdomain_logfile = os.path.join(root_dir, d, logs_dir, s, log_file + dateext) + + # Call awstats. We explicitly pass in a LogFile, in case --after-logrotate + # is given. The config parameter points to the middle part of the + # configuration file name, awstats adds the root dir and awstats.%s.conf + # part. We check if the file exists, since rotation might not have been + # happened (when the file was empty, for example) + if os.path.exists(subdomain_logfile): + subprocess.call([ 'sudo' + , '-u', awstats_user + , awstats + , '-config=%s.%s' % (s, d) + , '-update' + , '-LogFile=%s' % subdomain_logfile + ]) + +# vim: set sw=2 sts=2 expandtab autoindent: