#!/usr/bin/perl
use strict;
use File::Copy;
use File::Spec;
use File::Basename;
use File::Which;
use Cwd 'realpath';
use Getopt::Std;
use List::Util qw(max);

sub usage {
  print("Usage: $0 [OPTION]... -i <input>\n");
  print("Extract the device kernels from an hcc executable.\n\n");
  print("-h          \t\t\t\tshow this help message\n");
  print("-i <input>  \t\t\t\tinput file\n");
  exit;
}

my $debug = 0;

# use clang offload bundler (instead of "dd")
# to extract device object from the bundle
my $use_clang_offload_bundler = 1;

my %options=();
getopts('hi:', \%options);

if (!%options || defined $options{h}) {
  usage();
}

my $input_file;
defined $options{i} || die("input not specified");
$input_file = $options{i};
(-f $input_file) || die("can't find $input_file");

# derive HIP_PATH via env var or use parent directory of extractkernel
my $HIP_PATH=$ENV{'HIP_PATH'} // dirname(Cwd::abs_path("$0/../"));
my $HIP_COMPILER = `$HIP_PATH/bin/hipconfig --compiler`;
my $ROCM_PATH = `$HIP_PATH/bin/hipconfig --rocmpath`;
my $HIP_CLANG_PATH = `$HIP_PATH/bin/hipconfig --hipclangpath`;

# look for llvm-objdump and clang-offload-bundler
my $tools_path_prefix;
my $llvm_objdump;
my $clang_offload_bundler;

if (defined $HIP_COMPILER and $HIP_COMPILER eq "clang"){
  # Search the path with respect to HIP_CLANG_PATH
  $tools_path_prefix = $HIP_CLANG_PATH;
}
else {
  if (defined $HIP_COMPILER and $HIP_COMPILER eq "hcc") {
    # Search the path with respect to HCC_HOME if it is set, else search in ROCM_PATH
    if (defined $ENV{'HCC_HOME'}) {
      $tools_path_prefix = File::Spec->catfile($ENV{'HCC_HOME'}, "bin");
    }
    else {
      $tools_path_prefix = realpath($ROCM_PATH."/hcc/bin");
    }
  }
}
# Find llvm-objdump and clang-offload-bundler in the path set above
$llvm_objdump = File::Spec->catfile($tools_path_prefix, "llvm-objdump");
$clang_offload_bundler = File::Spec->catfile($tools_path_prefix, "clang-offload-bundler");

if (!(-f $llvm_objdump)) {
  $llvm_objdump = which("llvm-objdump");
  if (!(-f $llvm_objdump)) {
    die("Can't find llvm-objdump\n");
  }
}

if (!(-f $clang_offload_bundler)) {
  $clang_offload_bundler = which("clang-offload-bundler");
  if (!(-f $clang_offload_bundler)) {
    die("Can't find clang-offload-bundler\n");
  }
}

# kernel section information for HCC
my $kernel_section_name = ".kernel";
my $kernel_triple = "hcc-amdgcn-amd-amdhsa--";
my $kernel_blob_alignment = 1;

my $kernel_section_size =  hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
if (!$kernel_section_size) {

  # If there isn't a section created by HCC,
  # try to detect a kernel section created by HIP-Clang
  $kernel_section_name = ".hip_fatbin";
  $kernel_triple = "hip-amdgcn-amd-amdhsa-";
  $kernel_blob_alignment = 8;

  $kernel_section_size =  hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$3}'`);
  $kernel_section_size or die("No kernel section found\n");
}

my $kernel_section_offset =  hex(`objdump -h $input_file | grep $kernel_section_name | awk '{print \$6}'`);
my $kernel_section_end = $kernel_section_offset + $kernel_section_size;
if ($debug) {
  print "kernel section size: $kernel_section_size\n";
  print "kernel section offset: $kernel_section_offset\n";
  print "kernel section end: $kernel_section_end\n";
}

# parse kernel bundle header
open INPUT_FP, $input_file || die $!;
binmode INPUT_FP;

my $current_blob_offset = $kernel_section_offset;
my $num_blobs = 0;
#while ($current_blob_offset < $kernel_section_end) {
while(1) {

  # adjust the offset to the blob alignment
  $current_blob_offset = int(($current_blob_offset + ($kernel_blob_alignment - 1)) / $kernel_blob_alignment) * $kernel_blob_alignment;  
  if ($debug) {
    print "Current blob offset: $current_blob_offset\n";
  }

  if ($current_blob_offset >= $kernel_section_end) {
    if ($debug) {
      print "reached end of kernel section\n";
    }
    last;
  }

  seek(INPUT_FP, $current_blob_offset, 0);

  # skip OFFLOAD_BUNDLER_MAGIC_STR
  my $magic_str;
  my $read_bytes = read(INPUT_FP, $magic_str, 24);
  if (($read_bytes != 24) || ($magic_str ne "__CLANG_OFFLOAD_BUNDLE__")) {
    # didn't detect the bundle magic string
    if ($debug) {
      print "Offload bundle magic string not detected\n";
    }
    last;
  }
  # read number of bundles
  my $num_bundles;
  $read_bytes = read(INPUT_FP, $num_bundles, 8);
  $read_bytes == 8 or die("Fail to parse number of bundles\n");
  $num_bundles = unpack("Q", $num_bundles);
  if ($debug) {
    print "Blob $num_blobs, number of bundles: $num_bundles\n";
  }

  # detected GPU targets
  my @asic_target_array;

  my $last_bundle_offset = 0;
  my $last_bundle_size = 0;

  # strings for creating new files
  my $file_blob_number = sprintf("%03d", $num_blobs);
  my $filename_prefix = "${input_file}-${file_blob_number}";

  my $clang_offloadbundler_outputs="-outputs=/dev/null";
  my $clang_offloadbundler_targets="-targets=host-x86_64-unknown-linux";

  for (my $iter = 0; $iter < $num_bundles; $iter++) {
    # read bundle offset
    my $offset;
    $read_bytes = read(INPUT_FP, $offset, 8);
    $read_bytes == 8 or die("Fail to parse bundle offset\n");
    $offset = unpack("Q", $offset);
    $last_bundle_offset = max($last_bundle_offset, $offset);

    # read bundle size
    my $size;
    $read_bytes = read(INPUT_FP, $size, 8);
    $read_bytes == 8 or die("Fail to parse bundle size\n");
    $size = unpack("Q", $size);
    if ($last_bundle_offset == $offset) {
      $last_bundle_size = $size;
    }

    # read triple size
    my $triple_size;
    $read_bytes = read(INPUT_FP, $triple_size, 8);
    $read_bytes == 8 or die("Fail to parse triple size\n");
    $triple_size = unpack("Q", $triple_size);

    # triple
    my $triple;
    $read_bytes = read(INPUT_FP, $triple, $triple_size);
    $read_bytes == $triple_size or die("Fail to parse triple\n");

    if ($debug) {
      print("\t bundle $iter: offset=$offset, size=$size, triple_size=$triple_size, triple=$triple\n");
    }

    # Only process GPU targets, skip host targets
    my $triple_pattern = "^$kernel_triple";
    if ($triple =~ /$triple_pattern/) {
      my $asic_target = substr($triple, length($kernel_triple));

      # augment arguments for clang-offload-bundler
      my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
      $clang_offloadbundler_outputs = "${clang_offloadbundler_outputs},${hsaco_file_name}";
      $clang_offloadbundler_targets = "${clang_offloadbundler_targets},${triple}";

      # add into asic_target_array
      $asic_target_array[$#asic_target_array + 1]=$asic_target;

      if (!$use_clang_offload_bundler) {
        my $offset_for_hsaco = $current_blob_offset + $offset;
        my $dd_command ="dd if=${input_file} of=${hsaco_file_name} skip=$offset_for_hsaco count=$size bs=1 status=none";
        if ($debug) {
          print("extract code bundle with dd: $dd_command\n");
        }
        system($dd_command) == 0
          or die("Fail to extract code bundle with dd\n");
      }

    } else {
      #print("Host target: " . $Triple . "\n");
    }
  }

  # extract the code blob
  my $blob_filename = "${filename_prefix}.bundle";
  my $write_bytes = $last_bundle_offset + $last_bundle_size;
  system("dd if=$input_file of=$blob_filename skip=$current_blob_offset count=$write_bytes bs=1 status=none") == 0
    or die("Extracting kernel bundle file failed: $?");

  if ($use_clang_offload_bundler) {
    # use clang-offload-bundler to unbundle HSACO
    my $command = "${clang_offload_bundler} -unbundle -type=o -inputs=${blob_filename} ${clang_offloadbundler_outputs} ${clang_offloadbundler_targets}";
    if ($debug) {
      print("clang offload bundler command: $command\n");
    }
    system($command) == 0 
      or die("Fail to execute clang-offload-bundler");
  }

  for (my $iter = 0; $iter <= $#asic_target_array; $iter++) {
    my $asic_target = $asic_target_array[$iter];
    my $hsaco_file_name = "${filename_prefix}-${asic_target}.hsaco";
    my $isa_file_name = "${filename_prefix}-${asic_target}.isa";

    # use llvm-objdump to dump out GCN ISA
    system("$llvm_objdump --disassemble --mcpu=$asic_target $hsaco_file_name > $isa_file_name") == 0 or die("Fail to disassemble AMDGPU ISA for target" . $asic_target);
      
    if ($debug) {
      print("Generated GCN ISA for " . $asic_target . " at: " . $isa_file_name . "\n");
    }
  }

  $current_blob_offset = $current_blob_offset + $last_bundle_offset + $last_bundle_size;
  $num_blobs++;
}

$num_blobs or die("No device code found.\n");
exit(0);

