|
|
Line 1: |
Line 1: |
| __NOTOC__ | | __NOTOC__ |
− | {| border="1" cellpadding="2" cellspacing="0"
| + | slogging nginx cache log analysis module |
− | |}
| |
| | | |
− | import collections
| + | [[]] |
− | from urllib import unquote
| |
− | import copy
| |
− | import time
| |
− | | |
− | from swift.common.utils import split_path, get_logger
| |
− | | |
− | month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
| |
− | LISTING_PARAMS = set('path limit format delimiter marker end_marker prefix'.split())
| |
− | | |
− | class [[CacheLogProcessor]](object):
| |
− | """Transform proxy server cache logs"""
| |
− | | |
− | def <u>init</u>(self, conf):
| |
− | self.server_name = conf.get('server_name', 'proxy-server')
| |
− | self.lb_private_ips = [x.strip() for x in \
| |
− | conf.get('lb_private_ips', '').split(',')\
| |
− | if x.strip()]
| |
− | self.service_ips = [x.strip() for x in \
| |
− | conf.get('service_ips', '').split(',')\
| |
− | if x.strip()]
| |
− | self.warn_percent = float(conf.get('warn_percent', '0.8'))
| |
− | self.logger = get_logger(conf, log_route='cache-processor')
| |
− | self.logger.info(_('init cache log processor'))
| |
− | | |
− | def log_line_parser(self, raw_log):
| |
− | self.logger.info(_('cache process one line log'))
| |
− | '''given a raw access log line, return a dict of the good parts'''
| |
− | d = {}
| |
− | try:
| |
− | (client_ip,
| |
− | client_user,
| |
− | timestamp,
| |
− | request,
| |
− | upstream_status,
| |
− | status,
| |
− | byte_send,
| |
− | host,
| |
− | uri,
| |
− | referrer,
| |
− | user_agent,
| |
− | cache_status) = raw_log.split(' - ')
| |
− | | |
− | except [[ValueError]]:
| |
− | | |
− | self.logger.info(_('bad line 1'))
| |
− | self.logger.debug(_('Bad line data: %s') % repr(raw_log))
| |
− | return {}
| |
− | | |
− | account = None
| |
− | container_name = None
| |
− | storage = None
| |
− | brighthost = None
| |
− | com = None
| |
− | | |
− | try:
| |
− | | |
− | (account, container_name, storage, brighthost, com) = host.split('.')
| |
− | | |
− | except [[ValueError]], e:
| |
− | | |
− | self.logger.info(_('bad line 2'))
| |
− | self.logger.info(_('bad line 2 %s') % host)
| |
− | self.logger.debug(_('Invalid host: %(error)s from data: %(log)s') %
| |
− | {'error': e, 'log': repr(raw_log)})
| |
− | return {}
| |
− | | |
− | if storage != "storage" or brighthost != "brighthost" or com != "com":
| |
− | self.logger.info(_('bad line 3 %s %s %s') % storage, brighthost, com)
| |
− | return {}
| |
− | | |
− | object_name = None
| |
− | | |
− | if uri is not None:
| |
− | | |
− | if uri == '/':
| |
− | object_name = 'index.html'
| |
− | else:
| |
− | object_name = uri.split('/')[1]
| |
− | | |
− | else:
| |
− | self.logger.info(_('bad line 4'))
| |
− | self.logger.debug(_('Invalid uri: %(error)s from data: %(log)s') %
| |
− | {'error': e, 'log': repr(raw_log)})
| |
− | return {}
| |
− | | |
− | method = None
| |
− | | |
− | if request is not None:
| |
− | | |
− | method = request.split(' ')[0].upper()
| |
− | | |
− | else:
| |
− | | |
− | self.logger.info(_('bad line 5'))
| |
− | self.logger.debug(_('Invalid method: %(error)s from data: %(log)s') %
| |
− | {'error': e, 'log': repr(raw_log)})
| |
− | return {}
| |
− | | |
− | if cache_status is None or cache_status is "" or cache_status is "-":
| |
− | cache_status = 'NULL'
| |
− | | |
− | d['client_ip'] = client_ip
| |
− | d['method'] = method
| |
− | d['request'] = request
| |
− | d['code'] = int(status)
| |
− | d['referrer'] = referrer
| |
− | d['user_agent'] = user_agent
| |
− | d['byte_send'] = int(byte_send)
| |
− | d['cache_status'] = cache_status
| |
− | | |
− | #change localtime to UTC time
| |
− | self.logger.info(_('OLD time: %s') % timestamp)
| |
− | timestamp = timestamp.replace(':', '/')
| |
− | timestamp = timestamp.split(' ')[0]
| |
− | day, month, year, hour, minute, second = timestamp.split('/')
| |
− | month = ('%02s' % month_map.index(month)).replace(' ', '0')
| |
− | | |
− | #timestamp = day+"/"+month+"/"+year+":"+hour+":"+minute+":"+second
| |
− | #timestamp = time.strftime("%d/%m/%Y/%H/%M/%S", time.gmtime(time.mktime(time.strptime(timestamp, "%d/%m/%Y:%H:%M:%S"))))
| |
− | #self.logger.info(_('UTC time: %s') % timestamp)
| |
− | #day, month, year, hour, minute, second = timestamp.split('/')
| |
− | | |
− | d['day'] = day
| |
− | d['month'] = month
| |
− | d['year'] = year
| |
− | d['hour'] = hour
| |
− | d['minute'] = minute
| |
− | d['second'] = second
| |
− | d['tz'] = '+0000'
| |
− | d['account'] = "AUTH_"+account
| |
− | d['container_name'] = container_name
| |
− | d['object_name'] = object_name
| |
− | d['cache_status'] = cache_status
| |
− | | |
− | self.logger.info(_('finish one line'))
| |
− | return d
| |
− | | |
− | def process(self, obj_stream, data_object_account, data_object_container, data_object_name):
| |
− | '''generate hourly groupings of data from one access log file'''
| |
− | hourly_aggr_info = {}
| |
− | total_lines = 0
| |
− | bad_lines = 0
| |
− | for line in obj_stream:
| |
− | line_data = self.log_line_parser(line)
| |
− | total_lines += 1
| |
− | if not line_data:
| |
− | bad_lines += 1
| |
− | continue
| |
− | account = line_data['account']
| |
− | container_name = line_data['container_name']
| |
− | year = line_data['year']
| |
− | month = line_data['month']
| |
− | day = line_data['day']
| |
− | hour = line_data['hour']
| |
− | byte_send = int(line_data['byte_send'])
| |
− | method = line_data['method']
| |
− | code = int(line_data['code'])
| |
− | object_name = line_data['object_name']
| |
− | client_ip = line_data['client_ip']
| |
− | cache_status = line_data['cache_status']
| |
− | | |
− | aggr_key = (account, year, month, day, hour)
| |
− | d = hourly_aggr_info.get(aggr_key, {})
| |
− | | |
− | d['cache_byte_send'] = d.setdefault(('cache_byte_send'), 0) + byte_send
| |
− | self.logger.info(_('cache byte send: %d') % d['cache_byte_send'])
| |
− | | |
− | code = '%dxx' % (code / 100)
| |
− | key = 'cache_'+method+'_'+code+'_'+cache_status
| |
− | d[key] = d.setdefault(key, 0) + 1
| |
− | | |
− | hourly_aggr_info[aggr_key] = d
| |
− | | |
− | #if bad_lines > (total_lines * self.warn_percent):
| |
− | # name = '/'.join([data_object_account, data_object_container,
| |
− | # data_object_name])
| |
− | # self.logger.warning(_('I found a bunch of bad lines in %(name)s '\
| |
− | # '(%(bad)d bad, %(total)d total)') %
| |
− | # {'name': name, 'bad': bad_lines, 'total': total_lines})
| |
− | | |
− | return hourly_aggr_info
| |
− | | |
− | def keylist_mapping(self):
| |
− | | |
− | verb_keys = 'GET HEAD'.split()
| |
− | code_keys = '2xx 3xx 4xx 5xx'.split()
| |
− | hit_keys = 'MISS HIT EXPIRED UPDATING STALE NULL'.split()
| |
− | | |
− | keylist_mapping = {}
| |
− | keylist_mapping['cache_byte_send'] = 'cache_byte_send'
| |
− | | |
− | for verb in verb_keys:
| |
− | for code in code_keys:
| |
− | for hit in hit_keys:
| |
− | keylist_mapping['cache_'+verb+'_'+code+'_'+hit] = 'cache_'+verb+'_'+code+'_'+hit
| |
− | return keylist_mapping
| |
− | | |
− | {| border="1" cellpadding="2" cellspacing="0"
| |