#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Dinko Korunic 'kreator', 2006. # name_search.py # script to return unique matches for a given regex """This script can be used to return unique matches for a given (unicode) regex. Usage: name_search.py original_text catalog [regex] Original_text is the text to search, catalog will contain the result. Regex is optional: if not given, program will use default unicode regex "(\S+)\s\d{4}". """ # History: # 1.1 - Use local argv if possible, not only sys.argv # 1.0 - Initial release __copyright__ = """Copyright (C) 2005 Dinko Korunic, InfoMAR d.o.o. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ __version__ = '1.1' import sys, sre def usage(): """Print program usage string and gracefully exit""" usage_text = """Usage: %s original_text catalog [regex] Original_text is the text to search, catalog will contain the result. Regex is optional: if not given, program will use unicode regex '(\S+)\s\d{4}' """ print usage_text % sys.argv[0] sys.exit(0) def index_file(source_filename, output_filename, regex_src = None): """Search file using a regex fullmatching and save the results""" # locals catalog = {} if regex_src is None: regex_src = r'(\S+)\s\d{4}' regex = sre.compile(regex_src, sre.UNICODE) try: sourcefile = file(source_filename, 'r') # read file in lines for line in sourcefile: for result in regex.findall(line): catalog[result] = 1 sourcefile.close() except IOError, (errno, strerror): print 'I/O error with %s(%s): %s' % \ (source_filename, errno, strerror) sys.exit(1) try: outputfile = file(output_filename, 'w') catalog_sorted = catalog.keys() catalog_sorted.sort() for k in catalog_sorted: outputfile.write('%s\n' % k) outputfile.close() except IOError, (errno, strerror): print 'I/O error with %s(%s): %s' % \ (output_filename, errno, strerror) sys.exit(1) def main(argv = None): """Call all necessary functions...""" # parse CLI arguments or arguments from main() if argv is None: argv = sys.argv # check if proper number of arguments used args = len(argv) # do the indexing if args == 3: index_file(argv[1], argv[2]) elif args == 4: index_file(argv[1], argv[2], argv[3]) else: usage() if __name__ == '__main__': # import Psyco if available try: import psyco psyco.full() except ImportError: pass sys.exit(main())