#
# Copyright 2020 NVIDIA Corporation. All rights reserved
# This sample is for Linux desktop platforms. (Power, x86_64, ARM Server)
#

ifndef OS
OS   := $(shell uname)
HOST_ARCH := $(shell uname -m)
endif

TARGET_ARCH ?= $(HOST_ARCH)
TARGET_OS ?= $(shell uname | tr A-Z a-z)

ifneq ($(TARGET_ARCH), $(HOST_ARCH))
    INCLUDES += -I../../../../targets/$(HOST_ARCH)-$(shell uname | tr A-Z a-z)/include
    LIB_PATH ?= ../../../../targets/$(TARGET_ARCH)-$(TARGET_OS)/lib
    TARGET_CUDA_PATH = -L $(LIB_PATH)/stubs
else
    INCLUDES += -I../../../../include
    INCLUDES += -I../../include
    EXTRAS_LIB_PATH := ../../lib64
    LIB_PATH ?= ../../../../lib64
endif

LIBS :=
ifeq ($(HOST_ARCH), $(TARGET_ARCH))
    LIBS = -L $(EXTRAS_LIB_PATH)
endif
LIBS += $(TARGET_CUDA_PATH) -lcuda -L $(LIB_PATH) -lcupti

NVCCFLAGS :=
ifneq ($(TARGET_ARCH), $(HOST_ARCH))
    ifeq ($(TARGET_ARCH), aarch64)
        ifeq ($(TARGET_OS), linux)
            HOST_COMPILER ?= aarch64-linux-gnu-g++
        endif
    endif

    ifdef HOST_COMPILER
        NVCC_COMPILER = -ccbin $(HOST_COMPILER)
    endif
endif

FINALIZE_LIB = libCuptiFinalize.so

cupti_finalize: libinject.cpp
	nvcc $(NVCC_COMPILER) $(NVCCFLAGS) $(INCLUDES) -o $(FINALIZE_LIB) -Xcompiler -fPIC -shared $< $(LIBS)
	$(info export CUDA_INJECTION64_PATH=<full_path>/libCuptiFinalize.so and run any CUDA sample with runtime more than 10 seconds(Graphics/Simulations samples) for demonstration)
clean:
	rm -f $(FINALIZE_LIB) libinject.o
