#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Dinko Korunic 'kreator', 2005. # ircustats.py # script for ircu clients log analysis # - statistics per last month: total clients, unique IPs, unique TLDs """This script can be used to generate statistics from a Undernet IRC daemon logs. Specifically, statistics per last month: total clients, unique IPs and unique TLDs. Typical IRCU ircd log format: ----------------------------- [2005-7-29 16:45:54] USER [TRACE]: 1122648277 77 beaugosse@adsl-20-143-192-81.adsl2.iam.net.ma 81.192.143.20 0 A8r32 mignonne :mignonne Relevant IRCU daemon configuration: ----------------------------------- F:LOG:USER:FILE:ircd-clients.log F:LOG:USER:LEVEL:INFO Example output follows: ----------------------- Statistics for 2006/1: Total users: 2713866 Total unique IPs: 606193 Total TLDs: 177 10 most popular TLDs: [('net', 374564), ('com', 211204), ('ca', 127452), ('ro', 53086), ('jp', 36279), ('it', 31919), ('mx', 26380), ('hr', 24842), ('org', 24455), ('de', 24026)] """ __copyright__ = """Copyright (C) 2005 Dinko Korunic, InfoMAR d.o.o. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ __version__ = '$Id: ircustats.py 195 2006-02-26 11:53:36Z kreator $' import sys, re, time def dictsortValue(dict): """Sort dictionary descending per values and return touples >>> a = {'a': 1, 'b': 5, 'c': -10, 'd': 12, 'e': 30, 'f': 3} >>> dictsortValue(a) [('e', 30), ('d', 12), ('b', 5), ('f', 3), ('a', 1), ('c', -10)] >>> a = {1: 1, 2: -10, 3: 20} >>> dictsortValue(a) [(3, 20), (1, 1), (2, -10)] """ items = [(v, k) for k, v in dict.items()] items.sort() items.reverse() return [(k, v) for v, k in items] def irculog(iterable = None): """Return IRCU statistics for a given month >>> localtime = time.localtime() >>> year, month = localtime[0], localtime[1] >>> if month == 1: ... month = 12 ... year -= 1 ... else: ... month -= 1 >>> a = '[%d-%d-1 16:45:54] USER [TRACE]: 1122648277 77 beaugosse@adsl-20-143-192-81.adsl2.iam.net.ma 81.192.143.20 0 A8r32 mignonne :mignonne' % (year, month) >>> a = [a, a] >>> irculog(a) Statistics for 2006/1: Total users: 2 Total unique IPs: 1 Total TLDs: 1 10 most popular TLDs: [('ma', 2)] """ # sanity check if iterable is None: return # calculate -1 month offset from now localtime = time.localtime() year, month = localtime[0], localtime[1] if month == 1: month = 12 year -= 1 else: month -= 1 # produce regexes to match the exact data ltime = r'^\[' + str(year) + '-' + str(month) + r'.+\] ' logrest = r'USER \[TRACE\]: \d+ \d+ ' userhost = r'\S+@(\S+) ' ipaddr = r'(\d+\.\d+\.\d+\.\d+)' regex = re.compile(ltime + logrest + userhost + ipaddr) # initialise totalusers = 0 totaldomains = 0 totalipaddrs = 0 domains = {} ipaddrs = {} for line in iterable: result = regex.match(line) if not result: continue # positive match totalusers += 1 # extract possible TLD tld = result.group(1).split('.')[-1].lower() if tld.isalpha(): # it's not an IP if tld in domains: domains[tld] += 1 else: domains[tld] = 1 # extract IP ip = result.group(2) if ip in ipaddrs: ipaddrs[ip] += 1 else: ipaddrs[ip] = 1 # and calculate totals for key in domains: totaldomains += 1 for key in ipaddrs: totalipaddrs += 1 print 'Statistics for %s/%s:\n' % (year, month), \ ' Total users: %d\n' % totalusers, \ ' Total unique IPs: %d\n' % totalipaddrs, \ ' Total TLDs: %d\n' % totaldomains, \ ' 10 most popular TLDs: %s' % dictsortValue(domains)[0:10] def _test(): import doctest, ircustats doctest.testmod(ircustats) if __name__ == '__main__': # import Psyco if available try: import psyco psyco.full() except ImportError: pass #_test() sys.exit(irculog(sys.stdin))