/**
 * @file instanta_layer2vi_example.c
 * @brief Test layer2 performance.
 * @copyright Copyright (c) 2023 YUSUR Technology Co., Ltd. All Rights Reserved. Learn more at www.yusur.tech.
 * @author matianhao (math@yusur.tech)
 * @date 2023-06-15 15:12:46
 * @last_author: Kaihua Guo
 * @last_edit_time: 2023-09-04 19:16:48
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
#include <sys/time.h>
#include <sys/mman.h>
#include <arpa/inet.h>
#include <errno.h>
#include <getopt.h>

#include "instanta_layer2vi.h"

#define USER_INPUT_MAX_LENGTH 32
#define PACKET_MAX_LENGTH     10000

#define DEFAULT_PAYCKET_LEN 64

#pragma pack(1)
typedef struct
{
    // MAC 包头
    uint8_t  dst_mac[6];
    uint8_t  src_mac[6];
    uint16_t ethertype;

    // IP 包头
    uint8_t  unused_info_ip_part1[6];
    uint16_t frag_and_offset;
    uint8_t  unused_info_ip_part2;
    uint8_t  protocol;
    uint8_t  unused_info_ip_part3[2];
    uint32_t src_ip;
    uint32_t dst_ip;

    // 传输层包头
    uint16_t src_port;
    uint16_t dst_port;
    uint8_t  unused_info_trans[16];
} common_packet_header_t;
#pragma pack()

static int       cfg_iter       = 100000;
static int       cfg_warmups    = 10000;
static int       cfg_packet_len = DEFAULT_PAYCKET_LEN;
static double    last_mean_latency_usec;
static uint64_t *timings;

#if defined(__amd64__)
static inline uint64_t layer2vi_frc64_get(void);
static inline int      try_get_hz(unsigned int *cpu_khz_out)
{
    int            interval_usec = 100000;
    struct timeval tv_s, tv_e;
    uint64_t       tsc_s, tsc_e, tsc_e2;
    uint64_t       tsc_gtod, min_tsc_gtod, usec = 0;
    int            n, skew = 0;

    tsc_s = layer2vi_frc64_get();
    gettimeofday(&tv_s, NULL);
    tsc_e2       = layer2vi_frc64_get();
    min_tsc_gtod = tsc_e2 - tsc_s;
    n            = 0;
    do
    {
        tsc_s = layer2vi_frc64_get();
        gettimeofday(&tv_s, NULL);
        tsc_e2   = layer2vi_frc64_get();
        tsc_gtod = tsc_e2 - tsc_s;
        if (tsc_gtod < min_tsc_gtod)
            min_tsc_gtod = tsc_gtod;
    } while (++n < 20 || (tsc_gtod > min_tsc_gtod * 2 && n < 100));

    do
    {
        tsc_e = layer2vi_frc64_get();
        gettimeofday(&tv_e, NULL);
        tsc_e2 = layer2vi_frc64_get();
        if (tsc_e2 < tsc_e || timercmp(&tv_e, &tv_s, <))
        {
            skew = 1;
            break;
        }
        tsc_gtod = tsc_e2 - tsc_e;
        usec     = (tv_e.tv_sec - tv_s.tv_sec) * (uint64_t)1000000;
        usec += tv_e.tv_usec - tv_s.tv_usec;
    } while (usec < interval_usec || tsc_gtod > min_tsc_gtod * 2);

    if (skew)
        return 0;

    *cpu_khz_out = (tsc_e - tsc_s) * 1000 / usec;
    return 1;
}

int get_cpu_khz(unsigned int *cpu_khz_out)
{
    int rv = 0;

    while (rv == 0)
    {
        rv = try_get_hz(cpu_khz_out);
    }
    return 0;
}

static inline void layer2vi_frc64(uint64_t *pval)
{
    uint64_t low, high;

    __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
    *pval = (high << 32) | low;
}

#elif defined(__aarch64__)
int get_cpu_khz(unsigned int *cpu_khz_out)
{
    uint64_t freq;

    __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(freq));
    *cpu_khz_out = freq / (1e3);

    return 0;
}

static inline void layer2vi_frc64(uint64_t *pval)
{
    __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(*pval));
}

#endif

static inline uint64_t layer2vi_frc64_get(void)
{
    uint64_t now;

    layer2vi_frc64(&now);

    return now;
}

static int cmp_u64(const void *ap, const void *bp)
{
    uint64_t a = *(const uint64_t *)ap;
    uint64_t b = *(const uint64_t *)bp;

    if (a < b)
        return -1;

    return a > b;
}

static void output_results(uint64_t total_time)
{
    unsigned int freq = 0;
    double       div;

    get_cpu_khz(&freq);
    div = freq / 1e3;

    qsort(timings, cfg_iter, sizeof(timings[0]), cmp_u64);
    printf(
        "%d\t%0.3lf\t%0.3lf\t%0.3lf\t%0.3lf\t%0.3lf\t%0.3lf\n",
        cfg_packet_len,
        total_time / div / cfg_iter,
        timings[0] / div,
        timings[cfg_iter / 2] / div,
        timings[cfg_iter - cfg_iter / 20] / div,
        timings[cfg_iter - cfg_iter / 100] / div,
        timings[cfg_iter - 1] / div);
    last_mean_latency_usec = total_time / div / cfg_iter;
}

static int build_send_packet(char *tx_frame)
{
    if (NULL == tx_frame)
    {
        printf("NULL == tx_frame\n");
        return -1;
    }

    common_packet_header_t send_packet_header;
    memset(&send_packet_header, 0x66, sizeof(send_packet_header));

    send_packet_header.dst_mac[0] = 0x01;
    send_packet_header.dst_mac[1] = 0x02;
    send_packet_header.dst_mac[2] = 0x03;
    send_packet_header.dst_mac[3] = 0x04;
    send_packet_header.dst_mac[4] = 0x05;
    send_packet_header.dst_mac[5] = 0x06;

    uint16_t ethertype           = 0x0800;
    send_packet_header.ethertype = htons(ethertype);

    memcpy(tx_frame, &send_packet_header, sizeof(send_packet_header));

    return 0;
}

static int set_rx_filter(LAYER2VI layer2vi)
{
    layer2vi_filter_t filter;
    memset(&filter, 0, sizeof(layer2vi_filter_t));
    uint16_t ethertype = 0x0800;

    filter.rule_type                      = LAYER2VI_FILTER_TYPE_MAC;
    filter.u.mac_filter.dst_mac[0]        = 0x01;
    filter.u.mac_filter.dst_mac[1]        = 0x02;
    filter.u.mac_filter.dst_mac[2]        = 0x03;
    filter.u.mac_filter.dst_mac[3]        = 0x04;
    filter.u.mac_filter.dst_mac[4]        = 0x05;
    filter.u.mac_filter.dst_mac[5]        = 0x06;
    filter.u.mac_filter.ethertype         = htons(ethertype);
    filter.u.mac_filter.vlan              = 0;
    filter.u.mac_filter.vlan_match_method = 0;

    if (layer2vi_add_filter(layer2vi, filter) < 0)
    {
        printf("Add MAC filter failed.\n");
        return -1;
    }

    return 0;
}

static int layer2vi_ping(LAYER2VI layer2vi, int with_break)
{
    uint64_t packet_total                                             = 0;
    int      tx_rv                                                    = 0;
    int      rx_rv                                                    = 0;
    char     tx_frame[PACKET_MAX_LENGTH] __attribute__((aligned(64))) = {0};
    char     rx_frame[PACKET_MAX_LENGTH] __attribute__((aligned(64))) = {0};
    uint64_t total_time                                               = 0;
    uint64_t tmp_time                                                 = 0;
    int      i;

    if (build_send_packet(tx_frame) < 0)
    {
        printf("build_send_packet failed\n");
        return -1;
    }

    if (set_rx_filter(layer2vi) < 0)
    {
        printf("set_rx_filter failed\n");
        return -1;
    }

    for (i = 0; i < cfg_warmups; ++i)
    {
        tx_rv = layer2vi_transmit_frame(layer2vi, tx_frame, cfg_packet_len);
        if (tx_rv < 0)
        {
            printf("send error\n");
            return -1;
        }

        do
        {
            rx_rv = layer2vi_receive_frame_nonblock(layer2vi, rx_frame, sizeof(rx_frame));
            if (rx_rv < 0 && with_break == 1 && EAGAIN != errno)
            {
                printf("Got an invalid frame, error %d\n", rx_rv);
                return -1;
            }
        } while (rx_rv <= 0);
    }

    while (packet_total < cfg_iter)
    {
        uint64_t start = layer2vi_frc64_get();

        tx_rv = layer2vi_transmit_frame(layer2vi, tx_frame, cfg_packet_len);
        if (tx_rv < 0)
        {
            printf("send error\n");
            return -1;
        }

        packet_total++;

        do
        {
            rx_rv = layer2vi_receive_frame_nonblock(layer2vi, rx_frame, sizeof(rx_frame));
            if (rx_rv < 0 && with_break == 1 && EAGAIN != errno)
            {
                printf("Got an invalid frame, error %d\n", rx_rv);
                return -1;
            }
        } while (rx_rv <= 0);

        uint64_t stop = layer2vi_frc64_get();

        tmp_time                  = stop - start;
        timings[packet_total - 1] = tmp_time;
        total_time += tmp_time;
    }

    output_results(total_time);

    return 0;
}

static int layer2vi_pong(LAYER2VI layer2vi, int with_break)
{
    uint64_t packet_total                                             = 0;
    int      rx_rv                                                    = 0;
    int      tx_rv                                                    = 0;
    char     tx_frame[PACKET_MAX_LENGTH] __attribute__((aligned(64))) = {0};
    char     rx_frame[PACKET_MAX_LENGTH] __attribute__((aligned(64))) = {0};

    if (build_send_packet(tx_frame) < 0)
    {
        printf("build_send_packet failed\n");
        return -1;
    }

    if (set_rx_filter(layer2vi) < 0)
    {
        printf("set_rx_filter failed\n");
        return -1;
    }

    while (packet_total < cfg_iter + cfg_warmups)
    {
        do
        {
            rx_rv = layer2vi_receive_frame_nonblock(layer2vi, rx_frame, sizeof(rx_frame));
            if (rx_rv < 0 && with_break == 1 && EAGAIN != errno)
            {
                printf("Got an invalid frame, error %d\n", rx_rv);
                return -1;
            }
        } while (rx_rv <= 0);

        tx_rv = layer2vi_transmit_frame(layer2vi, tx_frame, cfg_packet_len);
        if (tx_rv < 0)
        {
            printf("send error\n");
            return -1;
        }

        packet_total++;
    }

    return 0;
}

static int layer2vi_test(char *device, LAYER2VI layer2vi, int with_break, bool ping)
{
    if (NULL == layer2vi)
    {
        printf("layer2vi is NULL\n");
        return -1;
    }

    if (ping)
    {
        printf("paylen\tmean\tmin\t50%%\t95%%\t99%%\tmax\n");

        if (layer2vi_ping(layer2vi, with_break) < 0)
        {
            printf("layer2vi_ping error.\n");
            return -1;
        }

        printf("mean round-trip time: %.3lf usec\n", last_mean_latency_usec);
    }
    else
    {
        if (layer2vi_pong(layer2vi, with_break) < 0)
        {
            printf("layer2vi_pong error.\n");
            return -1;
        }
    }

    return 0;
}

static void usage(void)
{
    fprintf(stderr, "\nusage:\n");
    fprintf(stderr, "  instanta_layer2vi_latency [options] <ping|pong> <interface>\n");
    fprintf(stderr, "\noptions:\n");
    fprintf(stderr, "  -n <iterations>     - set number of iterations\n");
    fprintf(stderr, "  -s <message-size>   - set packet size.\n");
    fprintf(stderr, "  -w <iterations>     - set number of warmup iterations\n");
    fprintf(stderr, "\n");
    exit(1);
}

int main(int argc, char *argv[])
{
    int  c;
    bool ping                               = false;
    char device[USER_INPUT_MAX_LENGTH]      = {0};
    char buffer_name[USER_INPUT_MAX_LENGTH] = {0};
    int  with_break                         = 0;

    while ((c = getopt(argc, argv, "n:s:w:o:q:b:")) != -1)
    {
        switch (c)
        {
        case 'n':
            cfg_iter = atoi(optarg);
            break;
        case 's':
            cfg_packet_len = atoi(optarg);
            break;
        case 'w':
            cfg_warmups = atoi(optarg);
            break;
        case 'b':
            with_break = atoi(optarg);
            break;
        default:
            usage();
        }
    }

    if (cfg_packet_len > PACKET_MAX_LENGTH)
    {
        fprintf(stderr, "cfg_packet_len cannot exceed %d\n", PACKET_MAX_LENGTH);
        return -1;
    }

    argc -= optind;
    argv += optind;

    if (argc != 2)
        usage();

    strncpy(device, argv[1], strlen(argv[1]));
    strncpy(buffer_name, "buffer1", strlen("buffer1"));
    // 创建设备
    LAYER2VI layer2vi = NULL;
    layer2vi = layer2vi_create(device, buffer_name);
    if (NULL == layer2vi)
    {
        printf("Create layer2vi fail!\n");
        usage();
    }

    if (strcmp(argv[0], "ping") == 0)
        ping = true;
    else if (strcmp(argv[0], "pong") != 0)
        usage();

    if (ping)
    {
        timings = mmap(
            NULL,
            cfg_iter * sizeof(timings[0]),
            PROT_READ | PROT_WRITE,
            MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
            -1,
            0);
        if (timings == MAP_FAILED)
        {
            fprintf(stderr, "mmap failed, error: %s\n", strerror(errno));
            return -1;
        }
    }

    printf("# test device %s\n", device);
    printf("# iterations: %d\n", cfg_iter);
    printf("# warmups: %d\n", cfg_warmups);
    printf("# frame len: %d\n", cfg_packet_len);

    if (layer2vi_test(device, layer2vi, with_break, ping) < 0)
    {
        printf("layer2vi_test failed\n");
        layer2vi_destroy(layer2vi);
        return -1;
    }

    return 0;
}
