Hace un rato publiqué un post con un script para ver qué dominios de Apache consumen mayor transferencia. El script en BASH toma unos 7 segundos en procesar 750 archivos de uno de mis servidores. Ahora hice una versión en Python, para la que me dieron algunos consejos los chicos de PyAr, y toma aproximadamente 4 segundos en procesar los mismos archivos. Aquí les dejo el código:
""" Access Log Parser
Parses all the files in a directory
treating them as access_log files
and outputs the list of files sorted
by transfered megabytes. Useful for
identifying heavy users.
Usage:
./access_log_parser.py <base_dir>
base_dir = directory where the access_log files are
"
import sys
import os
import time
from operator import itemgetter
def main (args):
"""Main
Main function of the script.
This is where the magic happens.
It takes the script arguments
and returns an exit code.
"""
# Parameter checking
if len(args) < 2:
print "Usage: %s <base_dir>" % args[0]
return 1
if os.path.isdir(args[1]):
base_dir = args[1]
else:
print "%s is not a directory" % args[1]
return 2
# Init vars
t1 = time.time()
mbyte = 1048576.0
domains = []
# Start processing files
for item in os.listdir(args[1]):
path = os.path.join(base_dir, item)
if os.path.isfile(path) and \
os.path.getsize(path) > 0:
bytes = 0
init_date = None
data = None
# Process file lines
for line in open(path, ‘r’):
data = line.split(" ")
if init_date is None:
init_date = data[3][1:]
try:
bytes += long(data[9])
except ValueError:
pass
domains.append({‘domain’: item, \
‘mbytes’: bytes / mbyte, \
‘init_date’: init_date, \
‘end_date’: data[3][1:] })
# Print out sorted information
for domain in sorted(domains, key=itemgetter(‘mbytes’)):
print "%.2f MB | From: %s | To: %s | %s" % \
(domain[‘mbytes’], \
domain[‘init_date’], \
domain[‘end_date’], \
domain[‘domain’])
print "Generated in %d seconds" % (time.time() – t1)
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv))
Para ejecutarlo:
./access_log_parser.py /tmp/domain_logs ... 544.30 MB | From: 10/Mar/2010:02:48:33 | To: 10/Mar/2010:18:13:25 | dominio1.com.ar.log 602.34 MB | From: 10/Mar/2010:00:23:09 | To: 10/Mar/2010:23:39:45 | dominio2.com.ar.log 944.03 MB | From: 10/Mar/2010:00:49:35 | To: 10/Mar/2010:23:39:57 | dominio3.com.ar.log Generated in 3 seconds
0 Comentarios.