Jump to: navigation, search

Difference between revisions of "Development/swift/slogging cache log module"

 
m (Text replace - "__NOTOC__" to "")
 
(3 intermediate revisions by one other user not shown)
Line 1: Line 1:
__NOTOC__
 
import collections
 
from urllib import unquote
 
import copy
 
import time
 
  
from swift.common.utils import split_path, get_logger
+
slogging nginx cache log analysis module
  
month_map = '_ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec'.split()
+
download here http://wiki.openstack.org/development/swift/slogging_cache_log_module?action=[[AttachFile]]&do=get&target=sloggingcacheloganalysis.pdf
LISTING_PARAMS = set('path limit format delimiter marker end_marker prefix'.split())
 
 
 
class [[CacheLogProcessor]](object):
 
    """Transform proxy server cache logs"""
 
 
 
    def <u>init</u>(self, conf):
 
        self.server_name = conf.get('server_name', 'proxy-server')
 
        self.lb_private_ips = [x.strip() for x in \
 
                              conf.get('lb_private_ips', '').split(',')\
 
                              if x.strip()]
 
        self.service_ips = [x.strip() for x in \
 
                            conf.get('service_ips', '').split(',')\
 
                            if x.strip()]
 
        self.warn_percent = float(conf.get('warn_percent', '0.8'))
 
        self.logger = get_logger(conf, log_route='cache-processor')
 
        self.logger.info(_('init cache log processor'))
 
 
 
    def log_line_parser(self, raw_log):
 
        self.logger.info(_('cache process one line log'))
 
        '''given a raw access log line, return a dict of the good parts'''
 
        d = {}
 
        try:
 
            (client_ip,
 
            client_user,
 
            timestamp,
 
            request,
 
            upstream_status,
 
            status,
 
            byte_send,
 
            host,
 
            uri,
 
            referrer,
 
            user_agent,
 
            cache_status) = raw_log.split(' - ')
 
 
 
        except [[ValueError]]:
 
 
 
            self.logger.info(_('bad line 1'))
 
            self.logger.debug(_('Bad line data: %s') % repr(raw_log))
 
            return {}
 
 
 
        account = None
 
        container_name = None
 
        storage = None
 
        brighthost = None
 
        com = None
 
 
 
        try:
 
 
 
            (account, container_name, storage, brighthost, com) = host.split('.')
 
 
 
        except [[ValueError]], e:
 
 
 
            self.logger.info(_('bad line 2'))
 
            self.logger.info(_('bad line 2 %s') % host)
 
            self.logger.debug(_('Invalid host: %(error)s from data: %(log)s') %
 
            {'error': e, 'log': repr(raw_log)})
 
            return {}
 
 
 
        if storage != "storage" or brighthost != "brighthost" or com != "com":
 
            self.logger.info(_('bad line 3 %s %s %s') % storage, brighthost, com)
 
            return {}
 
 
 
        object_name = None
 
 
 
        if uri is not None:
 
 
 
            if uri == '/':
 
                object_name = 'index.html'
 
            else:
 
                object_name = uri.split('/')[1]
 
 
 
        else:
 
            self.logger.info(_('bad line 4'))
 
            self.logger.debug(_('Invalid uri: %(error)s from data: %(log)s') %
 
            {'error': e, 'log': repr(raw_log)})
 
            return {}
 
 
 
        method = None
 
 
 
        if request is not None:
 
 
 
            method = request.split(' ')[0].upper()
 
 
 
        else:
 
 
 
            self.logger.info(_('bad line 5'))
 
            self.logger.debug(_('Invalid method: %(error)s from data: %(log)s') %
 
            {'error': e, 'log': repr(raw_log)})
 
            return {}
 
 
 
        if cache_status is None or cache_status is "" or cache_status is "-":
 
            cache_status = 'NULL'
 
 
 
        d['client_ip'] = client_ip
 
        d['method'] = method
 
        d['request'] = request
 
        d['code'] = int(status)
 
        d['referrer'] = referrer
 
        d['user_agent'] = user_agent
 
        d['byte_send'] = int(byte_send)
 
        d['cache_status'] = cache_status
 
 
 
        #change localtime to UTC time
 
self.logger.info(_('OLD time: %s') % timestamp)
 
        timestamp = timestamp.replace(':', '/')
 
        timestamp = timestamp.split(' ')[0]
 
        day, month, year, hour, minute, second = timestamp.split('/')
 
        month = ('%02s' % month_map.index(month)).replace(' ', '0')
 
 
 
        #timestamp = day+"/"+month+"/"+year+":"+hour+":"+minute+":"+second
 
        #timestamp = time.strftime("%d/%m/%Y/%H/%M/%S", time.gmtime(time.mktime(time.strptime(timestamp, "%d/%m/%Y:%H:%M:%S"))))
 
#self.logger.info(_('UTC time: %s') % timestamp)
 
        #day, month, year, hour, minute, second = timestamp.split('/')
 
 
 
        d['day'] = day
 
        d['month'] = month
 
        d['year'] = year
 
        d['hour'] = hour
 
        d['minute'] = minute
 
        d['second'] = second
 
        d['tz'] = '+0000'
 
        d['account'] = "AUTH_"+account
 
        d['container_name'] = container_name
 
        d['object_name'] = object_name
 
        d['cache_status'] = cache_status
 
 
 
self.logger.info(_('finish one line'))
 
        return d
 
 
 
    def process(self, obj_stream, data_object_account, data_object_container, data_object_name):
 
        '''generate hourly groupings of data from one access log file'''
 
        hourly_aggr_info = {}
 
        total_lines = 0
 
        bad_lines = 0
 
        for line in obj_stream:
 
            line_data = self.log_line_parser(line)
 
            total_lines += 1
 
            if not line_data:
 
                bad_lines += 1
 
                continue
 
            account = line_data['account']
 
            container_name = line_data['container_name']
 
            year = line_data['year']
 
            month = line_data['month']
 
            day = line_data['day']
 
            hour = line_data['hour']
 
            byte_send = int(line_data['byte_send'])
 
            method = line_data['method']
 
            code = int(line_data['code'])
 
            object_name = line_data['object_name']
 
            client_ip = line_data['client_ip']
 
            cache_status = line_data['cache_status']
 
 
 
            aggr_key = (account, year, month, day, hour)
 
            d = hourly_aggr_info.get(aggr_key, {})
 
 
 
            d['cache_byte_send'] = d.setdefault(('cache_byte_send'), 0) + byte_send
 
    self.logger.info(_('cache byte send: %d') % d['cache_byte_send'])
 
 
 
            code = '%dxx' % (code / 100)
 
            key = 'cache_'+method+'_'+code+'_'+cache_status
 
            d[key] = d.setdefault(key, 0) + 1
 
 
 
            hourly_aggr_info[aggr_key] = d
 
 
 
        #if bad_lines > (total_lines * self.warn_percent):
 
        #    name = '/'.join([data_object_account, data_object_container,
 
        #                    data_object_name])
 
        #    self.logger.warning(_('I found a bunch of bad lines in %(name)s '\
 
        #            '(%(bad)d bad, %(total)d total)') %
 
        #            {'name': name, 'bad': bad_lines, 'total': total_lines})
 
 
 
        return hourly_aggr_info
 
 
 
    def keylist_mapping(self):
 
 
 
        verb_keys = 'GET HEAD'.split()
 
        code_keys = '2xx 3xx 4xx 5xx'.split()
 
        hit_keys = 'MISS HIT EXPIRED UPDATING STALE NULL'.split()
 
 
 
        keylist_mapping = {}
 
        keylist_mapping['cache_byte_send'] = 'cache_byte_send'
 
 
 
        for verb in verb_keys:
 
            for code in code_keys:
 
                for hit in hit_keys:
 
                    keylist_mapping['cache_'+verb+'_'+code+'_'+hit] = 'cache_'+verb+'_'+code+'_'+hit
 
        return keylist_mapping
 

Latest revision as of 23:29, 17 February 2013

slogging nginx cache log analysis module

download here http://wiki.openstack.org/development/swift/slogging_cache_log_module?action=AttachFile&do=get&target=sloggingcacheloganalysis.pdf