#!/usr/bin/env ruby

$results_sum = Hash.new(0)
$results_nr = Hash.new(0)
$interval_results_sum = Hash.new(0)
$interval_results_nr = Hash.new(0)
$interval_metrics_val = Hash.new(0)
$interval_nr = 0
$run_time = 0

$stats_begin_time = ENV['stats_part_begin'].to_f
$stats_end_time = ENV['stats_part_end'].to_f

$latency_prev = Hash.new(0)

def calc_addon_keys(results_sum, results_nr, interval, interval_nr, prefix)
  results_sum.each do |key, value|
    case key
    when /(.*)-misses/
      stem = $1
      ['s', '-references', '-instructions'].each do |pfx|
        avalue = results_sum[stem + pfx]
        next if avalue.zero?

        total = if pfx == 's'
                  avalue + value
                else
                  avalue
                end
        puts "#{prefix}#{stem}-miss-rate%: #{value * 100.0 / total}"
        break
      end
      next if value.zero?

      if key =~ /(.*)iTLB-load-misses/
        stem = $1
        instructions = results_sum[stem + 'instructions']
        puts "#{prefix}#{stem}instructions-per-iTLB-miss: #{instructions / value}" unless instructions.zero?
      elsif key =~ /(.*)cache-misses/
        stem = $1
        cycles = results_sum[stem + 'cpu-cycles']
        puts "#{prefix}#{stem}cycles-between-cache-misses: #{cycles / value}" unless cycles.zero?
      end
    when /(.*)cache-references/
      stem = $1
      instructions = results_sum[stem + 'instructions']
      puts "#{prefix}#{stem}MPKI: #{value / instructions * 1000}" unless instructions.zero?
    when /(.*)UNC_M_(.*)_INSERTS/
      stem1 = $1
      stem2 = $2
      stem = "#{stem1}UNC_M_#{stem2}"
      occupancy_key = stem1 + 'UNC_M_' + stem2 + '_OCCUPANCY'
      occupancy = results_sum[occupancy_key]
      ticks_key = stem1 + 'UNC_M_CLOCKTICKS'
      ticks_sum = results_sum[ticks_key]
      ticks_nr = results_nr[ticks_key].to_f
      if results_sum.key?(occupancy_key) && ticks_sum != 0
        # Need to deal with _INSERTSS == 0
        latency_key = "#{prefix}#{stem}_latency_ns"
        latency = if value != 0
                    occupancy / value / (ticks_sum / (ticks_nr / interval_nr)) * 1e+9 * interval
                  else
                    $latency_prev[latency_key]
                  end
        $latency_prev[latency_key] = latency
        puts "#{latency_key}: #{latency}"
      end
      puts "#{prefix}#{stem}_throughput_MBps: #{value.to_f * 64 / 1024 / 1024 / interval}"
    when /(.*)cpu-cycles/
      stem = $1
      instructions = results_sum[stem + 'instructions']
      unless instructions.zero?
        puts "#{prefix}#{stem}ipc: #{instructions / value}"
        puts "#{prefix}#{stem}cpi: #{value / instructions}"
      end
    end
  end
end

def output_interval(prev_time, time)
  return if $interval_results_sum.empty?

  interval = time - prev_time
  puts "time: #{time}"
  $interval_results_sum.each do |key, value|
    puts "i.#{key}: #{value / interval}"
  end
  calc_addon_keys($interval_results_sum, $interval_results_nr,
                  interval, 1, 'i.')

  unless $interval_metrics_val.empty?
    $interval_metrics_val.each do |key, value|
      puts "i.#{key}: #{value}"
    end
  end

  ignore = false
  if time <= $stats_begin_time
    ignore = true
  elsif prev_time <= $stats_begin_time
    # update $stats_begin_time to real start time
    $stats_begin_time = prev_time
  end

  if !ignore && $stats_end_time != 0 && time > $stats_end_time
    $stats_end_time = prev_time if prev_time <= $stats_end_time
    ignore = true
  end

  unless ignore
    $interval_results_sum.each do |key, value|
      $results_sum[key] += value
      $results_nr[key] += $interval_results_nr[key]
    end
    $interval_nr += 1
  end

  $interval_results_sum.clear
  $interval_results_nr.clear
  $interval_metrics_val.clear unless $interval_metrics_val.empty?
end

# Example output
#     1.000679357,S0,12,170516599,,l1d_pend_miss.pending,8004582139,66.65,80.5,Load_Miss_Real_Latency
#     1.000679357,S0,12,1202404,,mem_load_retired.l1_miss,8004582139,66.65,,
#     1.000679357,S0,12,915585,,mem_load_retired.fb_hit,8004582139,66.65,,
#     1.000679357,S0,12,66085859,,inst_retired.any,8009729165,66.69,7.3,CPI
#     1.000679357,S0,12,481531770,,cycles,8009729165,66.69,,
#     1.000679357,S0,12,478629457,,cpu-cycles,8005866225,66.66,,
#     1.000679357,S0,12,65320043,,instructions,8005866225,66.66,0.14,insn per cycle

#     5.000739365,163913433,,l1d_pend_miss.pending,8001787549,66.67,81.0,Load_Miss_Real_Latency
#     5.000739365,1197996,,mem_load_retired.l1_miss,8001787549,66.67,,
#     5.000739365,862693,,mem_load_retired.fb_hit,8001787549,66.67,,
#     5.000739365,66368209,,inst_retired.any,8000697756,66.66,7.3,CPI
#     5.000739365,478274162,,cycles,8000697756,66.66,,
#     5.000739365,478535457,,cpu-cycles,8001495219,66.67,,
#     5.000739365,69598766,,instructions,8001495219,66.67,0.14,insn per cycle

def parse
  prev_prev_time = 0
  prev_time = 0
  time = 0
  metric_val = 0

  $stdin.each_line do |line|
    next unless line =~ /^\s*\d+\.\d+\W+/

    if line.include? ','
      stime, *fields = line.split(',')
      # 139.478674756,146633.37,msec,task-clock,146633365406,100.00,146.633,CPUs utilized
      # 139.478674756,
      next if fields.size < 7
    else
      stime, *fields = line.split
    end

    prev_time = time
    time = stime.to_f
    # time different > 10ms, new output
    if time - prev_time > 0.01
      output_interval(prev_prev_time, prev_time)
      prev_prev_time = prev_time
    end
    socket = nil
    unit = nil
    socket_key = nil
    metric_key = nil

    # per-socket mode
    # S0,12,1202404,,mem_load_retired.l1_miss,8004582139,66.65,,
    if fields[0][0] == 'S'
      socket = fields[0]
      fields.delete_at 1
      fields.delete_at 0
    end

    # for unit
    # 7928832,Bytes,llc_misses.mem_read,8005538025,100.00,,
    unit = fields[1] if fields[1] == 'Bytes'

    # 10.072447351 43181  context-switches 16105112423 100.00 0.003 M/sec
    # 1.017184940,8778,,context-switches,16689586072,100.00,0.526,K/sec
    fields.delete_at 1 if line.include? ','

    # for metrics
    # 1.000679357,S0,12,170516599,,l1d_pend_miss.pending,8004582139,66.65,80.5,Load_Miss_Real_Latency
    if !fields[5].nil?
    # For the metric name, Its first char is an uppercase letter
    # Ignore the test result which is not for metrics
    # 1.000679357,S0,12,65320043,,instructions,8005866225,66.66,0.14,insn per cycle
      if fields[5] =~ /^[A-Z][^ ]+$/
        metric_key = "metric.#{fields[5].chomp}"
        metric_key = "#{socket}.#{metric_key}" if socket
        metric_val = fields[4]
        metric_val = metric_val.to_f
      end
    end

    # 777209 mem_load_uops_l3_miss_retired_remote_dram 89128748143 100.00
    value, key = fields
    value = value.to_f

    i_imc = key.index '_IMC'
    key = key[0, i_imc] if i_imc

    key = "#{key}_#{unit}" if unit
    socket_key = "#{socket}.#{key}" if socket

    $interval_results_sum[key] += value
    $interval_results_nr[key] += 1
    if socket_key
      $interval_results_sum[socket_key] += value
      $interval_results_nr[socket_key] += 1
    end
    $interval_metrics_val[metric_key] += metric_val if metric_key
  end

  # total time
  end_time = $stats_end_time == 0 ? prev_time : $stats_end_time
  $run_time = end_time - $stats_begin_time

  # skip the last record, because the interval hasn't run out
end

parse

$results_sum.each do |key, value|
  # output per-second value
  puts "ps.#{key}: #{value / $run_time}"
end

instructions = $results_sum['instructions']
unless instructions.zero?
  # to calc path-length
  puts "total.instructions: #{instructions}"
end

calc_addon_keys($results_sum, $results_nr, $run_time, $interval_nr, 'overall.')
