master

分支 (1)

标签 (13)

管理

管理

master

aocache-0.6.0

aocache-0.5.0

aocache-0.4.5

aocache-0.4.4

aocache-0.4.3

aocache-0.4.2

aocache-0.4.1

aocache-0.4.0

aocache-0.3.2

aocache-0.3.1

aocache-0.3.0

aocache-0.2.0

aocache-0.1.0

aocache
/
aopanalyzer.py

import argparse
import pandas as pd
from datetime import datetime
import os
import sys
from openpyxl import Workbook
from openpyxl.styles import PatternFill
from openpyxl.styles import Alignment
from openpyxl.styles import Font
from openpyxl.utils.exceptions import IllegalCharacterError
from typing import List, Dict, Optional

# 分析aocache生成的csv文件，输出为csv或xlsx

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("input", nargs="?", help="Input CSV file or date(yyyyMMdd)")
    parser.add_argument("--output", help="Output file name or extension(csv/xlsx/html)")
    return parser.parse_args()

def get_input_path(input_arg):
    if input_arg is None:
        date_str = datetime.now().strftime("%Y%m%d")
        return os.path.expanduser(f"~/.aocache/profiler/aocache-perf-{date_str}-log.csv")

    if input_arg.isdigit() and len(input_arg) == 8:
        return os.path.expanduser(f"~/.aocache/profiler/aocache-perf-{input_arg}-log.csv")

    return input_arg

def analyze_data(df):
    grouped = df.groupby("JoinPoint")

    results = []
    for name, group in grouped:
        rc = (group["ComputeCost"] > 0).sum()  # 计算执行次数（ComputeCost>0的总次数）
        cc = (group["ComputeCost"] == 0).sum()   # 缓存命中次数（ComputeCost=0的总次数）
        # 计算RA和CA，避免除以零
        ra = group[group["ComputeCost"] >  0]["Cost"].mean() / 1e6 if rc > 0 else 0 # 转为毫秒
        ca = group[group["ComputeCost"] == 0]["Cost"].mean() / 1e6 if cc > 0 else 0
        # 计算PA，避免除以零
        pa = group[group["ComputeCost"] >  0]["ComputeCost"].mean() / 1e6 if rc > 0 else 0  # 转为毫秒
        # 计算损耗率
        cl = ((ra - pa) / ra) * 100 if ra > pa else 0
        jt = group["Cost"].sum() / 1e6    # 总执行时间
        jc = rc + cc

        cr = (cc / jc * 100) if jc > 0 else 0
        cem = pa / ca if ca > 0 else 0
        pr = jt / (pa * jc)  * 100 if (pa > 0 and jc > 0) else 0
        # 计算综合性能提升符号
        level = "★★★★★" if pr <= 10 else "★★★★" if pr <= 20 else "★★★" if pr <= 25 else "★★" if pr <= 33.33 else "★" if pr <= 50 else "⭕️" if pr < 100 else "⛔️" if pr == 100 else "⚠️"
        results.append({
            "综合性能等级(LEVEL)": level,
            "切入点(JoinPoint)": name,
            "计算执行平均时间[毫秒](RA)": round(ra, 4),  # 保留4位小数
            "纯计算平均时间[毫秒](PA)": round(pa, 4),  # 保留4位小数
            "计算执行损耗率(CL)": round(cl, 4),  # 保留4位小数
            "缓存命中平均时间[毫秒](CA)": round(ca, 4),  # 保留4位小数
            "计算执行次数(RC)": rc,
            "缓存命中次数(CC)": cc,
            "总执行时间(JT)ms": round(jt, 4),          # 保留4位小数
            "总执行次数(JC)": jc,
            "缓存命中比例(CR)%": round(cr, 4),         # 保留4位小数
            "缓存效率倍数(CEM)": round(cem, 4),        # 保留4位小数
            "综合性能(PR)%": round(pr, 4)            # 保留4位小数
        })
    results.sort(key=lambda x: x['综合性能(PR)%'])
    return pd.DataFrame(results)

def export_colorized_excel(
    data: pd.DataFrame,
    output_path: str,
    headers: Optional[List[str]] = None
) -> None:
    """
    根据PR值生成带颜色区分的Excel文件

    :param data: 数据列表，每个字典需包含PR字段
    :param output_path: 输出文件路径（如：'result.xlsx'）
    :param headers: 自定义标题栏（默认使用数据字段的键）
    """
    pr_field: str = "综合性能(PR)%"
    level_field: str = "综合性能等级(LEVEL)"
    sheet_name: str = "Sheet1"
    # 默认颜色配置
    default_colors = {
        "green": PatternFill(start_color="00FF00", end_color="00FF00", fill_type="solid"),
        "green20": PatternFill(start_color="33FF33", end_color="33FF33", fill_type="solid"),
        "green25": PatternFill(start_color="66FF66", end_color="66FF66", fill_type="solid"),
        "green33": PatternFill(start_color="99FF99", end_color="99FF99", fill_type="solid"),
        "green50": PatternFill(start_color="CCFFCC", end_color="CCFFCC", fill_type="solid"),
        "white": PatternFill(start_color="FFFFFF", end_color="FFFFFF", fill_type="solid"),
        "gray": PatternFill(start_color="A0A0A0", end_color="A0A0A0", fill_type="solid"),
        "red": PatternFill(start_color="FF0000", end_color="FF0000", fill_type="solid"),
        "orange": PatternFill(start_color="FFA500", end_color="FFA500", fill_type="solid")
    }
    # colors = color_mapping or default_colors
    colors = default_colors

    # 验证数据有效性
    if data.empty:
        raise ValueError("DataFrame数据不能为空")
    if pr_field not in data.columns:
        raise KeyError(f"DataFrame中缺少字段: {pr_field}")
    if level_field not in data.columns:
        raise KeyError(f"DataFrame中缺少字段: {level_field}")

    # 初始化工作簿
    wb = Workbook()
    ws = wb.active
    ws.title = sheet_name

    # 插入通栏标题
    title = "AOCache 性能分析结果"
    ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=len(data.columns))
    title_cell = ws.cell(row=1, column=1, value=title)
    # 设置标题居中对齐
    title_cell.alignment = Alignment(horizontal='center', vertical='center')

    # 生成标题（如果未指定则自动生成）
    if headers is None:
        headers = data.columns.tolist()
    ws.append(headers)

    pr_col_idx = headers.index(pr_field) + 1  # 获取pr_field的列索引 base on 1
    level_col_idx = headers.index(level_field) + 1  # 获取level_field的列索引 base on 1
    # 写入数据并设置颜色
    for index, row in data.iterrows():
        try:
            row_values = [row.get(key, "") for key in headers]
            ws.append(row_values)

            # 获取当前行和PR值
            row_num = ws.max_row
            pr = row[pr_field]
            level = row[level_field]
            # 确定填充颜色（按优先级顺序）
            if pr <= 10:  # 10%
                fill = colors["green"]
            elif pr <= 20:  # 10% < pr < 20%
                fill = colors["green20"]
            elif pr <= 25:  # 20% < pr < 25%
                fill = colors["green25"]
            elif pr <= 33.33:  # 25% < pr < 33.33%
                fill = colors["green33"]
            elif pr <= 50:  # <= 50%
                fill = colors["green50"]
            elif pr <100:  # 50% < pr < 100%
                fill = colors["white"]
            elif pr == 100: # 100%
                fill = colors["gray"]
            elif pr > 100: # > 100%
                fill = colors["red"]

            # 应用颜色到pr_field单元格
            for cell in ws[row_num]:
                if cell.column == pr_col_idx:
                    cell.fill = fill
                # 设置数字类型字段靠右对齐
                if isinstance(cell.value, (int, float)):
                    cell.alignment = Alignment(horizontal='right')
                # 设置“综合性能等级(LEVEL)”列字体颜色
                if cell.column == level_col_idx:
                    if pr <= 50:
                        font_color = fill.start_color.index
                    elif 50 < pr < 100:
                        font_color = "FFA500"
                    else:
                        font_color = "FF0000"
                    cell.font = Font(color=font_color)

        except IllegalCharacterError as e:
            print(f"跳过包含非法字符的行: {row}，错误: {str(e)}")
            ws.delete_rows(row_num)
            continue

    ws.append([])  # 空行分隔
    ws.append(["字段说明"])
    ws.append(["综合性能等级(LEVEL)：根据综合性能(PR)的值划分的等级，具体规则为：PR <= 10 为★★★★★，PR <= 20 为★★★★，PR <= 25 为★★★，PR <= 33.33 为★★，PR <= 50 为★，PR < 100 为⭕️，PR == 100 为⛔️，PR > 100 为⚠️"])
    ws.append(["切入点(JoinPoint)：代码中的切入点名称"])
    ws.append(["计算执行平均时间[毫秒](RA)：ComputeCost大于0时的平均执行时间（毫秒），计算公式为：group[group['ComputeCost'] > 0]['Cost'].mean() / 1e6"])
    ws.append(["纯计算平均时间[毫秒](PA)：不包含缓存执行损耗平均计算执行时间（毫秒），计算公式为：group[group['ComputeCost'] > 0]['ComputeCost'].mean() / 1e6（计算执行次数 > 0），否则为 0"])
    ws.append(["计算执行损耗率(CL)：计算执行时，花在缓存执行逻辑上的时间与纯计算执行时间的比例（%），计算公式为：((RA - PA) / RA) * 100（RA > PA），否则为 0"])
    ws.append(["缓存命中平均时间[毫秒](CA)：缓存命中时的平均执行时间（毫秒），计算公式为：group[group['ComputeCost'] == 0]['Cost'].mean() / 1e6（缓存命中次数 > 0），否则为 0"])
    ws.append(["计算执行次数(RC)：ComputeCost大于0 的次数，计算公式为：(group['ComputeCost'] > 0).sum()"])
    ws.append(["缓存命中次数(CC)：缓存命中次数，计算公式为：(group['ComputeCost'] == 0).sum()"])
    ws.append(["总执行时间(JT)ms：总执行时间（毫秒），计算公式为：group['Cost'].sum() / 1e6"])
    ws.append(["总执行次数(JC)：总执行次数，计算公式为：RC + CC"])
    ws.append(["缓存命中比例(CR)%：缓存命中次数占总执行次数的比例（%），计算公式为：(CC / JC * 100)（JC > 0），否则为 0"])
    ws.append(["缓存效率倍数(CEM)：计算执行平均时间与缓存命中平均时间的倍数关系，计算公式为：PA / CA（CA > 0），否则为 0"])
    ws.append(["综合性能(PR)%：综合性能指标（%），计算公式为：JT / (PA * JC) * 100（PA > 0 且 JC > 0），否则为 0"])
    ws.append(["计算执行：指代码在没有使用缓存的情况下进行实际计算的过程。"])
    ws.append(["缓存命中：指代码在执行时，所需的数据可以从缓存中获取，无需进行实际计算。"])

    # 保存文件
    wb.save(output_path)

def export_colorized_html(
    data: pd.DataFrame,
    output_path: str,
    headers: Optional[List[str]] = None
) -> None:
    """
    根据PR值生成带颜色区分的HTML文件

    :param data: 数据 DataFrame，需包含PR字段
    :param output_path: 输出文件路径（如：'result.html'）
    :param headers: 自定义标题栏（默认使用数据字段的键）
    """
    pr_field: str = "综合性能(PR)%"
    level_field: str = "综合性能等级(LEVEL)"
    default_colors = {
        "green": "#00FF00",
        "green20": "#33FF33",
        "green25": "#66FF66",
        "green33": "#99FF99",
        "green50": "#CCFFCC",
        "orange": "#FFA500",
        "white": "#FFFFFF",
        "gray": "#A0A0A0",
        "red": "#FF0000",
    }
    # 验证数据有效性
    if data.empty:
        raise ValueError("DataFrame数据不能为空")
    if pr_field not in data.columns:
        raise KeyError(f"DataFrame中缺少字段: {pr_field}")
    if level_field not in data.columns:
        raise KeyError(f"DataFrame中缺少字段: {level_field}")

    # 生成标题（如果未指定则自动生成）
    if headers is None:
        headers = data.columns.tolist()

    # 构建 HTML 表格
    html_content = """
    <!DOCTYPE html>
    <html>
    <head>
        <title>AOCache 性能分析结果</title>
        <link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.13.6/css/jquery.dataTables.min.css">
        <style>
            .dataTables_wrapper {
                margin: 20px;
            }
            th { background-color: #f0f0f0; }
        </style>
    </head>
    <body>
    """
    title = "AOCache 性能分析结果"
    html_content += f"<table id='resultTable' class='display' style='width:100%'>"
    html_content += f"<thead><tr><th colspan='{len(headers)}' style='text-align: center;'>{title}</th></tr><tr>"

    for header in headers:
        html_content += f"<th>{header}</th>"
    html_content += "</tr></thead><tbody>"

    pr_col_idx = headers.index(pr_field)
    level_col_idx = headers.index(level_field)
    # 写入数据并设置颜色
    for index, row in data.iterrows():
        html_content += "<tr>"
        row_values = [row.get(key, "") for key in headers]
        pr = row[pr_field]
        level = row[level_field]
        # 确定填充颜色（按优先级顺序）
        if pr <= 10:  # 10%
            fill = default_colors["green"]
        elif pr <= 20:  # 10% < pr < 20%
            fill = default_colors["green20"]
        elif pr <= 25:  # 20% < pr < 25%
            fill = default_colors["green25"]
        elif pr <= 33.33:  # 25% < pr < 33.33%
            fill = default_colors["green33"]
        elif pr <= 50:  # <= 50%
            fill = default_colors["green50"]
        elif pr < 100:  # 50% < pr < 100%
            fill = default_colors["white"]
        elif pr == 100:  # 100%
            fill = default_colors["gray"]
        elif pr > 100:  # > 100%
            fill = default_colors["red"]

        for i, value in enumerate(row_values):
            style = ""
            if isinstance(value, (int, float)):
                style = "text-align: right;"
            if isinstance(value, float):
                value = "{:.4f}".format(value)  # 显示4位小数
            if i == pr_col_idx:
                html_content += f"<td style='background-color: {fill};{style}'>{value}</td>"
            elif i == level_col_idx:
                if pr <= 50:
                    font_color = fill
                elif 50 < pr < 100:
                    font_color = default_colors["orange"]
                else:
                    font_color = default_colors["red"]
                html_content += f"<td style='color: {font_color};{style}'>{value}</td>"
            else:
                html_content += f"<td style='{style}'>{value}</td>"
        html_content += "</tr>"

    html_content += "</tbody></table>"  # 关闭表格

    # 添加DataTables初始化脚本
    html_content += """
    <script src="https://code.jquery.com/jquery-3.7.0.min.js"></script>
    <script src="https://cdn.datatables.net/1.13.6/js/jquery.dataTables.min.js"></script>
    <script>
        $(document).ready(function() {
            $('#resultTable').DataTable({
                order: [[12, 'asc']], // 默认按第13列（PR%）升序排序
                columnDefs: [
                    { targets: '_all', type: 'num' },  // 所有列默认数值类型
                    { targets: [0,1], orderable: false }  // 禁用前两列排序
                ],
                paging: false,
                autoWidth: false,
                info: false
            });
        });
    </script>
    """

    # 添加字段说明到div中
    html_content += """
    <div style="margin-top: 20px; font-size: 0.9em;">
        <h3>字段说明：</h3>
        <p><strong>综合性能等级(LEVEL)</strong>：根据综合性能(PR)的值划分的等级，具体规则为：PR <= 10 为★★★★★，PR <= 20 为★★★★，PR <= 25 为★★★，PR <= 33.33 为★★，PR <= 50 为★，PR < 100 为⭕️，PR == 100 为⛔️，PR > 100 为⚠️</p>
        <p><strong>切入点(JoinPoint)</strong>：代码中的切入点名称</p>
        <p><strong>计算执行平均时间[毫秒](RA)</strong>：计算执行次数为 1 时的平均执行时间（毫秒），计算公式为：group[group['ComputeCost'] > 0]['Cost'].mean() / 1e6</p>
        <p><strong>纯计算平均时间[毫秒](PA)</strong>:不包含缓存执行损耗平均计算执行时间（毫秒），计算公式为：group[group['ComputeCost'] > 0]['ComputeCost'].mean() / 1e6（计算执行次数 > 0），否则为 0</p>
        <p><strong>计算执行损耗率(CL)</strong>：计算执行时，花在缓存执行逻辑上的时间与纯计算执行时间的比例（%），计算公式为：((RA - PA) / RA) * 100（RA > PA），否则为 0</p>
        <p><strong>缓存命中平均时间[毫秒](CA)</strong>:缓存命中时的平均执行时间（毫秒），计算公式为：group[group['ComputeCost'] == 0]['Cost'].mean() / 1e6（缓存命中次数 > 0），否则为 0</p>
        <p><strong>计算执行次数(RC)</strong>:ComputeCost大于0 的次数，计算公式为：(group['ComputeCost'] > 0).sum()</p>
        <p><strong>缓存命中次数(CC)</strong>：缓存命中次数，计算公式为：(group['ComputeCost'] == 0).sum()</p>
        <p><strong>总执行时间(JT)ms</strong>:总执行时间（毫秒），计算公式为：group['Cost'].sum() / 1e6</p>
        <p><strong>总执行次数(JC)</strong>:总执行次数，计算公式为：RC + CC</p>
        <p><strong>缓存命中比例(CR)%</strong>：缓存命中次数占总执行次数的比例（%），计算公式为：(CC / JC * 100)（JC > 0），否则为 0</p>
        <p><strong>缓存效率倍数(CEM)</strong>:计算执行平均时间与缓存命中平均时间的倍数关系，计算公式为：PA / CA（CA > 0），否则为 0</p>
        <p><strong>综合性能(PR)%</strong>：综合性能指标（%），计算公式为：JT / (PA * JC) * 100（PA > 0 且 JC > 0），否则为 0</p>
        <p><strong>计算执行</strong>：指代码在没有使用缓存的情况下进行实际计算的过程。</p>
        <p><strong>缓存命中</strong>：指代码在执行时，所需的数据可以从缓存中获取，无需进行实际计算。</p>
    </div>
    """

    # 保存文件
    with open(output_path, 'w', encoding='utf-8') as f:
        f.write(html_content)

def main():
    args = parse_args()
    input_path = get_input_path(args.input)

    print(f"Input file: {input_path}")
    if not os.path.exists(input_path):
        sys.exit(f"Error: Input file {input_path} not found")

    df = pd.read_csv(input_path, quotechar='"', escapechar='\\')

    # 性能指标计算
    result_df = analyze_data(df)

    # 处理输出
    if args.output:
        if "." in args.output:
            output_path = args.output
            if not output_path.endswith((".csv", ".xlsx", ".html")):
                sys.exit("Error: Output file must be .csv, .xlsx or .html")
        else:
            base_name = os.path.splitext(os.path.basename(input_path))[0]
            output_path = f"{base_name}_analysis.{args.output}"
    else:
        # 默认输出格式为 HTML
        base_name = os.path.splitext(os.path.basename(input_path))[0]
        output_path = f"{base_name}_analysis.html"

    if output_path.endswith(".xlsx"):
        export_colorized_excel(result_df, output_path)
    elif output_path.endswith(".html"):
        export_colorized_html(result_df, output_path)
    else:
        result_df.to_csv(output_path, index=False)

    print(f"Output file: {output_path}")

if __name__ == "__main__":
    main()