日志分析-mime统计

提取日志中未落入标准字段的mime,分adx,adtype 统计mime的数量和包含js的数量占比

require 'date'
require 'net/http'
require 'uri'
require 'json'

def getmimes (  adx , bodyobj ,totalmimes, statics) 
  if bodyobj.class  != Hash 
        return  
  end
  mimes = []
  bodyobj.keys.each  do |key|
        val = bodyobj[key]
          if val.class == Fixnum || val.class == Float  || val.class == Array
               if key == "imp"
                   if  val[0]['banner'] != nil  && val[0]['banner']['mimes'] != nil  
                     statics['includmime'] +=1 
                     mimes +=  val[0]['banner']['mimes']
                   end
                   if  val[0]['video'] != nil  && val[0]['video']['mimes'] != nil
                     statics['includmime'] +=1 
                     mimes +=  val[0]['video']['mimes']
                   end            
               end
          end
  end
  
  if mimes.length >0 
     mimes.each do |mime|
        kk = adx.to_s + "_" + mime.to_s
        if mime.include?"javascript"
          statics['includejs'] +=1 
        end
             totalmimes[ kk] +=1 
     end
  end
end


filepath = "/data/mvdsp/log/request.log.2017-11-30-12"
puts filepath 
i  = 0 
totalmimes = Hash.new(0)
statics = Hash.new(0)
begin  
  File.open("#{filepath}").each do |line|
    statics['total'] +=1
    if  line.length < 1000 
      statics['invalidbody'] +=1
        next 
    end 
   if ! line.valid_encoding?
     s = line.encode("UTF-16be", :invalid=>:replace, :replace=>"?").encode('UTF-8')
     line = s.gsub(/dr/i,'med')
   end 


   fields = line.split("\t") 
    if fields.length <10
        next 
    end
    adx  = fields[3]
    ext10  = fields[45]
    adtype = ""
    if ext10.class ==Hash &&  ext10['reqtype']!= nil 
      adtype =ext10['reqtype']
    end
    
  
    jsonstr = fields[6]
    bodyobj = {}
    begin  
     bodyobj =  JSON.parse  jsonstr
  
    rescue JSON::ParserError
      i +=1  
   end 
     getmimes(adx,bodyobj,totalmimes ,  statics) 
  end
rescue SystemCallError
  puts "warn:: #{filepath} not  exits!!"
end



puts "-----------totalmimes---------------------"
print   totalmimes  
sorted = totalmimes.sort_by {|_key, value| value}
puts sorted

puts "-----------statics--------------"
print   statics  
puts "--------------------------------"

 ruby hash sort by value 

hsh ={"a" => 1000, "b" => 10, "c" => 200000}
Hash[hsh.sort_by{|k,v| v}] 
#or 
hsh.sort_by{|k,v| v}.to_h
#or 
hsh.sort_by(&:last)

 

posted on 2017-12-01 11:05  iokde.com  阅读(176)  评论(0编辑  收藏  举报

导航