Difference between revisions of "Development/swift/slogging cache log module"
Line 1: | Line 1: | ||
__NOTOC__ | __NOTOC__ | ||
+ | {| border="1" cellpadding="2" cellspacing="0" | ||
+ | |} | ||
+ | |||
import collections | import collections | ||
from urllib import unquote | from urllib import unquote | ||
Line 197: | Line 200: | ||
keylist_mapping['cache_'+verb+'_'+code+'_'+hit] = 'cache_'+verb+'_'+code+'_'+hit | keylist_mapping['cache_'+verb+'_'+code+'_'+hit] = 'cache_'+verb+'_'+code+'_'+hit | ||
return keylist_mapping | return keylist_mapping | ||
+ | |||
+ | {| border="1" cellpadding="2" cellspacing="0" |
Revision as of 02:25, 23 February 2012
import collections from urllib import unquote import copy import time
from swift.common.utils import split_path, get_logger
month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split() LISTING_PARAMS = set('path limit format delimiter marker end_marker prefix'.split())
class CacheLogProcessor(object):
"""Transform proxy server cache logs"""
def init(self, conf): self.server_name = conf.get('server_name', 'proxy-server') self.lb_private_ips = [x.strip() for x in \ conf.get('lb_private_ips', ).split(',')\ if x.strip()] self.service_ips = [x.strip() for x in \ conf.get('service_ips', ).split(',')\ if x.strip()] self.warn_percent = float(conf.get('warn_percent', '0.8')) self.logger = get_logger(conf, log_route='cache-processor') self.logger.info(_('init cache log processor'))
def log_line_parser(self, raw_log): self.logger.info(_('cache process one line log')) given a raw access log line, return a dict of the good parts d = {} try: (client_ip, client_user, timestamp, request, upstream_status, status, byte_send, host, uri, referrer, user_agent, cache_status) = raw_log.split(' - ')
except ValueError:
self.logger.info(_('bad line 1')) self.logger.debug(_('Bad line data: %s') % repr(raw_log)) return {}
account = None container_name = None storage = None brighthost = None com = None
try:
(account, container_name, storage, brighthost, com) = host.split('.')
except ValueError, e:
self.logger.info(_('bad line 2')) self.logger.info(_('bad line 2 %s') % host) self.logger.debug(_('Invalid host: %(error)s from data: %(log)s') % {'error': e, 'log': repr(raw_log)}) return {}
if storage != "storage" or brighthost != "brighthost" or com != "com": self.logger.info(_('bad line 3 %s %s %s') % storage, brighthost, com) return {}
object_name = None
if uri is not None:
if uri == '/': object_name = 'index.html' else: object_name = uri.split('/')[1]
else: self.logger.info(_('bad line 4')) self.logger.debug(_('Invalid uri: %(error)s from data: %(log)s') % {'error': e, 'log': repr(raw_log)}) return {}
method = None
if request is not None:
method = request.split(' ')[0].upper()
else:
self.logger.info(_('bad line 5')) self.logger.debug(_('Invalid method: %(error)s from data: %(log)s') % {'error': e, 'log': repr(raw_log)}) return {}
if cache_status is None or cache_status is "" or cache_status is "-": cache_status = 'NULL'
d['client_ip'] = client_ip d['method'] = method d['request'] = request d['code'] = int(status) d['referrer'] = referrer d['user_agent'] = user_agent d['byte_send'] = int(byte_send) d['cache_status'] = cache_status
#change localtime to UTC time
self.logger.info(_('OLD time: %s') % timestamp)
timestamp = timestamp.replace(':', '/') timestamp = timestamp.split(' ')[0] day, month, year, hour, minute, second = timestamp.split('/') month = ('%02s' % month_map.index(month)).replace(' ', '0')
#timestamp = day+"/"+month+"/"+year+":"+hour+":"+minute+":"+second #timestamp = time.strftime("%d/%m/%Y/%H/%M/%S", time.gmtime(time.mktime(time.strptime(timestamp, "%d/%m/%Y:%H:%M:%S"))))
#self.logger.info(_('UTC time: %s') % timestamp)
#day, month, year, hour, minute, second = timestamp.split('/')
d['day'] = day d['month'] = month d['year'] = year d['hour'] = hour d['minute'] = minute d['second'] = second d['tz'] = '+0000' d['account'] = "AUTH_"+account d['container_name'] = container_name d['object_name'] = object_name d['cache_status'] = cache_status
self.logger.info(_('finish one line'))
return d
def process(self, obj_stream, data_object_account, data_object_container, data_object_name): generate hourly groupings of data from one access log file hourly_aggr_info = {} total_lines = 0 bad_lines = 0 for line in obj_stream: line_data = self.log_line_parser(line) total_lines += 1 if not line_data: bad_lines += 1 continue account = line_data['account'] container_name = line_data['container_name'] year = line_data['year'] month = line_data['month'] day = line_data['day'] hour = line_data['hour'] byte_send = int(line_data['byte_send']) method = line_data['method'] code = int(line_data['code']) object_name = line_data['object_name'] client_ip = line_data['client_ip'] cache_status = line_data['cache_status']
aggr_key = (account, year, month, day, hour) d = hourly_aggr_info.get(aggr_key, {})
d['cache_byte_send'] = d.setdefault(('cache_byte_send'), 0) + byte_send
self.logger.info(_('cache byte send: %d') % d['cache_byte_send'])
code = '%dxx' % (code / 100) key = 'cache_'+method+'_'+code+'_'+cache_status d[key] = d.setdefault(key, 0) + 1
hourly_aggr_info[aggr_key] = d
#if bad_lines > (total_lines * self.warn_percent): # name = '/'.join([data_object_account, data_object_container, # data_object_name]) # self.logger.warning(_('I found a bunch of bad lines in %(name)s '\ # '(%(bad)d bad, %(total)d total)') % # {'name': name, 'bad': bad_lines, 'total': total_lines})
return hourly_aggr_info
def keylist_mapping(self):
verb_keys = 'GET HEAD'.split() code_keys = '2xx 3xx 4xx 5xx'.split() hit_keys = 'MISS HIT EXPIRED UPDATING STALE NULL'.split()
keylist_mapping = {} keylist_mapping['cache_byte_send'] = 'cache_byte_send'
for verb in verb_keys: for code in code_keys: for hit in hit_keys: keylist_mapping['cache_'+verb+'_'+code+'_'+hit] = 'cache_'+verb+'_'+code+'_'+hit return keylist_mapping