#!/usr/bin/python
# -*- coding: UTF-8 -*-

import os
import pandas as pd


def get_data_list(data):
    data_list = []
    sub_list = []
    # 收集时间数据
    time_list = []
    for i in range(len(data)):
        line = data.loc[i, 0]
        strs = line.split(" ")
        while "" in strs:
            strs.remove("")
        # 统计时间数据
        if len(strs) > 2 and strs[1] == "started":
            time_list.append(strs[6])
        # 统计mem used
        if strs[0] == "Mem:":
            mem_used_percent = float(strs[3].strip("k")) / float(strs[1].strip("k"))
            sub_list.append(str(mem_used_percent * 100)[0:6] + "%")
        # 统计200%cpu所在行数据
        if strs[0] == "200%cpu":
            # 取user sys 和 idle
            sub_list.append(strs[1].strip("user"))
            sub_list.append(strs[3].strip("sys"))
            used = 200 - int(strs[4].strip("%idle"))
            sub_list.append(str(used) + "%")
            data_list.append(sub_list)
            sub_list = []
    return data_list, time_list


def get_mem_and_cpu_mean_data(data_list):
    mem_and_cpu_sum_data_list = [0, 0, 0, 0]
    for i in range(len(data_list)):
        for j in range(len(mem_and_cpu_sum_data_list)):
            mem_and_cpu_sum_data_list[j] += float(data_list[i][j].strip("%"))
    mem_and_cpu_mean_data_list = [str(data / len(data_list))[0:6] + "%" for data in mem_and_cpu_sum_data_list]
    return mem_and_cpu_mean_data_list


def write_mem_and_cpu(dir, workbook):
    # 创建sheet对象
    worksheet = workbook.create_sheet("TOP", 2)

    # 一个容器的top数据从一个txt中读取，每个容器CPU数据占3列，从col = 0开始
    files = os.listdir(dir)
    # 存储每个容器的均值
    mem_and_cpu_mean_data_list_all_kbox = []
    time_list = []
    # 第1列写入时间信息
    col = 2
    for file in files:
        # 第一行写容器信息 合并单元格并写入["top_data_android_10", "top_data_android_11"] openpyxl行列从(1, 1)开始, 合并4列
        worksheet.merge_cells(start_row=1, start_column=col, end_row=1, end_column=col + 3)
        worksheet.cell(row=1, column=col, value=file.split(".")[0])
        # 第二行写mem和cpu信息
        worksheet.cell(2, col, "MEM")
        worksheet.merge_cells(start_row=2, start_column=col + 1, end_row=2, end_column=col + 3)
        worksheet.cell(2, col + 1, "CPU")
        # 第三行写MEM["used"]和CPU表头信息["user", "sys", "idle"]
        col_names = ["used", "user", "sys", "used"]
        # 写入列名
        for i, col_name in enumerate(col_names):
            worksheet.cell(3, i + col, col_name)
        # 行偏移量为4 (前3行表头 + 行列计数从1开始)
        row_offset = 4
        # 获取数据
        data = pd.read_table(os.path.join(dir, file), header=None)
        # 获取datalist ["used", "user", "sys", "idle"]数据
        data_list, time_list = get_data_list(data)
        mem_and_cpu_mean_data_list = get_mem_and_cpu_mean_data(data_list)
        # 存储每个容器的平均值
        mem_and_cpu_mean_data_list_all_kbox.extend(mem_and_cpu_mean_data_list)
        for i in range(len(data_list)):
            for j in range(len(data_list[0])):
                worksheet.cell(i + row_offset, j + col, data_list[i][j])
        col = col + 4  # 每个容器MEM占1列 CPU数据占3列

    # 在第1列写入时间信息，从第4行开始
    time_row = 4
    time_col = 1
    for time in time_list:
        worksheet.cell(time_row, time_col, time)
        time_row += 1

    # 最后写入平均值，在最大行往后3行写入数据
    avg_row = worksheet.max_row + 3
    # 数据从第2列开始写, 第一列是时间
    avg_col = 2
    # 先写Avg.
    worksheet.cell(avg_row, avg_col, "Avg.")
    # 往Avg.后一行写平均值
    avg_row += 1
    for i, data in enumerate(mem_and_cpu_mean_data_list_all_kbox):
        worksheet.cell(avg_row, i + avg_col, data)


def merge_cells_and_fill_value(sheet, start_row, start_column, end_row, end_column, value):
    sheet.merge_cells(start_row=start_row, start_column=start_column, end_row=end_row, end_column=end_column)
    sheet.cell(start_row, start_column, value)


def get_ps_data(data, ps_count):
    # 返回列表，表示[["PID", "%CPU", "PS_NAME"]...]
    ps_data_list = []
    time_list = []
    # 记录每组进程数量
    count = 0
    # 记录当前组的下一个进程信息是否还需要记录（超过预定义的进程数量，后续进程信息就不需要记录）
    # 0 - 不需要记录
    # 1 - 需要记录
    isrecord = 0
    for i in range(len(data)):
        line = data.loc[i, 0]
        strs = line.split(" ")
        # 消除strs中的空格
        while "" in strs:
            strs.remove("")
        # 统计时间数据
        if len(strs) > 2 and strs[1] == "started":
            # 一组进程数据需要开始记录了，以时间戳作为每组记录进程的开始
            time_list.append(strs[6])
            # 记录下一组进程数据，并且下一条进程可记录
            count = 0
            isrecord = 1
        # 跳过TOP头信息
        if strs[0] == 'Tasks:' or strs[0] == 'Mem:' or strs[0] == 'Swap:' or strs[0] == '200%cpu' or strs[0] == 'PID' or \
                strs[0] == '#':
            continue
        # 过滤掉乱码信息
        if isrecord == 1 and strs[0].isdigit():
            # 获取PID、CPU%、PS_NAME
            pid = strs[0]
            cpu = strs[1]
            ps_name = strs[3]
            # 拼接进程名
            ps_name_next_str = 4
            while ps_name_next_str < len(strs):
                ps_name += " " + strs[ps_name_next_str]
                ps_name_next_str += 1
            ps_data_list.append([pid, cpu, ps_name])
            count += 1

        if count == ps_count:
            # 超过预定义的进程数量，后续进程信息就不需要记录
            isrecord = 0
    return ps_data_list, time_list


def write_ps_data_list(sheet, ps_data_list, row, col):
    # data 为["PID", "%CPU", "PS_NAME"]
    start_row = row
    for data in ps_data_list:
        for i, element in enumerate(data):
            sheet.cell(start_row, col + i, element)
        start_row += 1


def get_ps_mean_data(ps_data_list, ps_count):
    # ps_data_list为[["PID", "%CPU", "PS_NAME"], ...]
    ps_sum_data_dict = {}
    # 通过PID识别进程, PID为key, value为["%CPU"总和, "PS_NAME", 该进程出现次数]
    for data in ps_data_list:
        # data[0] 为"PID", data[1] 为"%CPU", data[2] 为"PS_NAME"
        if data[0] in ps_sum_data_dict:
            ps_sum_data_dict[data[0]][0] += float(data[1])
            ps_sum_data_dict[data[0]][2] += 1
        else:
            ps_sum_data_dict[data[0]] = [float(data[1]), data[2], 1]

    ps_mean_data_list = []
    for pid in ps_sum_data_dict.keys():
        mean_cpu = ps_sum_data_dict[pid][0] / ps_sum_data_dict[pid][2]
        ps_name = ps_sum_data_dict[pid][1]
        ps_mean_data_list.append([pid, mean_cpu, ps_name])

    # 按CPU占用率从大到小排序
    return sorted(ps_mean_data_list, key=(lambda x: x[1]), reverse=True)[0:ps_count]


def write_ps(dir, workbook):
    # 创建sheet对象
    # sheet “PS”存储每个容器中60次TOP的进程信息，每次取前15个
    ps_sheet = workbook.create_sheet("PS")
    # sheet "PS_MEAN"存储每个容器60次TOP的平均进程信息，按CPU占用从大到小的顺序取前15个
    ps_mean_sheet = workbook.create_sheet("PS_MEAN")
    # 获取前15个进程
    ps_count = 15

    # 需要获取每个进程的"PID", "PS", "%CPU(Mean)"数据
    col_names = ["PID", "%CPU", "PS_NAME"]

    # 遍历top文件
    files = os.listdir(dir)
    # 存储所有文件平均进程信息的CPU和
    # all_mean_ps_data_sum_dict
    # 以进程名作为key, value = ["%CPU"总和, 该进程出现次数]
    all_mean_ps_data_sum_dict = {}
    # 从第1列开始写入
    col = 1
    for file in files:
        # 第一行写容器信息 合并单元格并写入文件名称["top_data_android_1", ...] openpyxl行列从(1, 1)开始
        merge_cells_and_fill_value(ps_sheet, 1, col + 1, 1, col + len(col_names), file.split(".")[0])
        merge_cells_and_fill_value(ps_mean_sheet, 1, col, 1, col + len(col_names) - 1, file.split(".")[0])

        # 写入列名
        for i, col_name in enumerate(col_names):
            ps_sheet.cell(2, i + col + 1, col_name)
            ps_mean_sheet.cell(2, i + col, col_name)

        # 获取数据
        data = pd.read_table(os.path.join(dir, file), header=None)
        ps_data_list, time_list = get_ps_data(data, ps_count)

        # 前两列为表头，数据从第三列开始写
        row_offset = 3

        # 向sheet "PS"写入时间信息
        # 写在第1列，从第3行开始写
        time_row = row_offset
        for time in time_list:
            ps_sheet.cell(time_row, 1, time)
            time_row += ps_count

        # 向sheet "PS"写入进程信息
        write_ps_data_list(ps_sheet, ps_data_list, row_offset, col + 1)

        # 按照CPU占用率从大到小，获取前ps_count个进程的平均数据
        ps_mean_data_list = get_ps_mean_data(ps_data_list, ps_count)
        # 向sheet "PS_MEAN"写入进程信息
        write_ps_data_list(ps_mean_sheet, ps_mean_data_list, row_offset, col)
        # 将进程平均数据保存到all_mean_ps_data_sum_dict中
        # all_mean_ps_data_sum_dict["PS_NAME"] = ["%CPU"总和, 该进程出现次数]
        # ps_mean_data = ["PID", "%CPU", "PS_NAME"]
        for ps_mean_data in ps_mean_data_list:
            if ps_mean_data[2] in all_mean_ps_data_sum_dict:
                all_mean_ps_data_sum_dict[ps_mean_data[2]][0] += float(ps_mean_data[1])
                all_mean_ps_data_sum_dict[ps_mean_data[2]][1] += 1
            else:
                all_mean_ps_data_sum_dict[ps_mean_data[2]] = [float(ps_mean_data[1]), 1]

        col += 3

    # all_mean_ps_data_list保存所有文件中前15个平均进程信息的前15个平均数据信息
    all_mean_ps_data_list = []
    for ps_name in all_mean_ps_data_sum_dict.keys():
        all_mean_ps_data_list.append([ps_name,
                                      all_mean_ps_data_sum_dict[ps_name][0] / all_mean_ps_data_sum_dict[ps_name][1]])

    # 对all_mean_ps_data_list中所有信息按照CPU占用率从大到小排序并取前ps_count个
    all_mean_ps_data_list = sorted(all_mean_ps_data_list, key=(lambda x: x[1]), reverse=True)[0:ps_count]
    # 将all_mean_ps_data_list写入到"PS_MEAN"最后一列
    # 合并单元格，写入表头信息
    col = ps_mean_sheet.max_column + 1
    merge_cells_and_fill_value(ps_mean_sheet, 1, col, 1, col + 1, "MEAN")
    # 第二行写入列名
    ps_mean_sheet.cell(2, col, "PS_NAME")
    ps_mean_sheet.cell(2, col + 1, "%CPU")
    # 写入进程数据信息
    write_ps_data_list(ps_mean_sheet, all_mean_ps_data_list, row_offset, col)
