From 8ece9bf9e576f84b5d749379e6f5bb89ffba12eb Mon Sep 17 00:00:00 2001 From: Evgeny Mankov Date: Tue, 11 Jun 2024 20:19:44 +0100 Subject: [PATCH] [HIPIFY][BLAS][6.2.0] cuBLAS support - Step 2 - 64-bit functions + Updated synthetic tests, the regenerated `hipify-perl`, and `BLAS` `CUDA2HIP` documentation --- bin/hipify-perl | 36 ++++++++++++----- docs/tables/CUBLAS_API_supported_by_HIP.md | 20 +++++----- .../CUBLAS_API_supported_by_HIP_and_ROC.md | 20 +++++----- src/CUDA2HIP_BLAS_API_functions.cpp | 40 +++++++++++-------- .../synthetic/libraries/cublas2hipblas_v2.cu | 34 +++++++++++++++- 5 files changed, 102 insertions(+), 48 deletions(-) diff --git a/bin/hipify-perl b/bin/hipify-perl index 4aeefb5e..b883e1b5 100755 --- a/bin/hipify-perl +++ b/bin/hipify-perl @@ -1444,12 +1444,20 @@ my %experimental_funcs = ( "cudaDriverEntryPointSymbolNotFound" => "6.2.0", "cudaDriverEntryPointSuccess" => "6.2.0", "cudaDriverEntryPointQueryResult" => "6.2.0", + "cublasZgemv_v2_64" => "6.2.0", + "cublasZgemv_64" => "6.2.0", "cublasZgbmv_v2_64" => "6.2.0", "cublasZgbmv_64" => "6.2.0", + "cublasSgemv_v2_64" => "6.2.0", + "cublasSgemv_64" => "6.2.0", "cublasSgbmv_v2_64" => "6.2.0", "cublasSgbmv_64" => "6.2.0", + "cublasDgemv_v2_64" => "6.2.0", + "cublasDgemv_64" => "6.2.0", "cublasDgbmv_v2_64" => "6.2.0", "cublasDgbmv_64" => "6.2.0", + "cublasCgemv_v2_64" => "6.2.0", + "cublasCgemv_64" => "6.2.0", "cublasCgbmv_v2_64" => "6.2.0", "cublasCgbmv_64" => "6.2.0", "cuStreamBeginCaptureToGraph" => "6.2.0", @@ -1653,13 +1661,21 @@ sub experimentalSubstitutions { subst("cudaGetDriverEntryPoint", "hipGetProcAddress", "driver_entry_point"); subst("cudaGetFuncBySymbol", "hipGetFuncBySymbol", "driver_interact"); subst("cublasCgbmv_64", "hipblasCgbmv_64", "library"); - subst("cublasCgbmv_v2_64", "hipblasCgbmv_64", "library"); + subst("cublasCgbmv_v2_64", "hipblasCgbmv_v2_64", "library"); + subst("cublasCgemv_64", "hipblasCgemv_64", "library"); + subst("cublasCgemv_v2_64", "hipblasCgemv_v2_64", "library"); subst("cublasDgbmv_64", "hipblasDgbmv_64", "library"); subst("cublasDgbmv_v2_64", "hipblasDgbmv_64", "library"); + subst("cublasDgemv_64", "hipblasDgemv_64", "library"); + subst("cublasDgemv_v2_64", "hipblasDgemv_64", "library"); subst("cublasSgbmv_64", "hipblasSgbmv_64", "library"); subst("cublasSgbmv_v2_64", "hipblasSgbmv_64", "library"); + subst("cublasSgemv_64", "hipblasSgemv_64", "library"); + subst("cublasSgemv_v2_64", "hipblasSgemv_64", "library"); subst("cublasZgbmv_64", "hipblasZgbmv_64", "library"); - subst("cublasZgbmv_v2_64", "hipblasZgbmv_64", "library"); + subst("cublasZgbmv_v2_64", "hipblasZgbmv_v2_64", "library"); + subst("cublasZgemv_64", "hipblasZgemv_64", "library"); + subst("cublasZgemv_v2_64", "hipblasZgemv_v2_64", "library"); subst("curandSetGeneratorOrdering", "hiprandSetGeneratorOrdering", "library"); subst("cusolverDnCreateParams", "hipsolverDnCreateParams", "library"); subst("cusolverDnDestroyParams", "hipsolverDnDestroyParams", "library"); @@ -11318,8 +11334,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasZgeru_64", "cublasZgerc_v2_64", "cublasZgerc_64", - "cublasZgemv_v2_64", - "cublasZgemv_64", "cublasZgemvStridedBatched_64", "cublasZgemvBatched_64", "cublasZgemm_v2_64", @@ -11386,8 +11400,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasShutdown", "cublasSger_v2_64", "cublasSger_64", - "cublasSgemv_v2_64", - "cublasSgemv_64", "cublasSgemvStridedBatched_64", "cublasSgemvStridedBatched", "cublasSgemvBatched_64", @@ -11533,8 +11545,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasDmatinvBatched", "cublasDger_v2_64", "cublasDger_64", - "cublasDgemv_v2_64", - "cublasDgemv_64", "cublasDgemvStridedBatched_64", "cublasDgemvStridedBatched", "cublasDgemvBatched_64", @@ -11616,8 +11626,6 @@ sub warnHipOnlyUnsupportedFunctions { "cublasCgeru_64", "cublasCgerc_v2_64", "cublasCgerc_64", - "cublasCgemv_v2_64", - "cublasCgemv_64", "cublasCgemvStridedBatched_64", "cublasCgemvBatched_64", "cublasCgemm_v2_64", @@ -12013,6 +12021,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasZgemm3m", "cublasZgelsBatched", "cublasZgeam_64", + "cublasZgbmv_v2_64", + "cublasZgbmv_64", "cublasZdgmm_64", "cublasXerbla", "cublasUint8gemmBias", @@ -12086,6 +12096,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasSgemmBatched_64", "cublasSgelsBatched", "cublasSgeam_64", + "cublasSgbmv_v2_64", + "cublasSgbmv_64", "cublasSetVector_64", "cublasSetVectorAsync_64", "cublasSetSmCountTarget", @@ -12252,6 +12264,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasDgemmBatched_64", "cublasDgelsBatched", "cublasDgeam_64", + "cublasDgbmv_v2_64", + "cublasDgbmv_64", "cublasDdgmm_64", "cublasCtrttp", "cublasCtrsv_v2_64", @@ -12345,6 +12359,8 @@ sub warnRocOnlyUnsupportedFunctions { "cublasCgemm3m", "cublasCgelsBatched", "cublasCgeam_64", + "cublasCgbmv_v2_64", + "cublasCgbmv_64", "cublasCdgmm_64", "cublasAsumEx_64", "cublasAsumEx", diff --git a/docs/tables/CUBLAS_API_supported_by_HIP.md b/docs/tables/CUBLAS_API_supported_by_HIP.md index 2c44d4a8..59d9e7c2 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP.md @@ -725,11 +725,11 @@ |`cublasCgbmv`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | | |`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| |`cublasCgbmv_v2`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| +|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgemv`| | | | |`hipblasCgemv_v2`|6.0.0| | | | | -|`cublasCgemv_64`|12.0| | | | | | | | | | +|`cublasCgemv_64`|12.0| | | |`hipblasCgemv_64`|6.2.0| | | |6.2.0| |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | | -|`cublasCgemv_v2_64`|12.0| | | | | | | | | | +|`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | | |`cublasCgerc_64`|12.0| | | | | | | | | | |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | | @@ -807,9 +807,9 @@ |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | | |`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| |`cublasDgemv`| | | | |`hipblasDgemv`|1.8.2| | | | | -|`cublasDgemv_64`|12.0| | | | | | | | | | +|`cublasDgemv_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | | -|`cublasDgemv_v2_64`|12.0| | | | | | | | | | +|`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | | |`cublasDger_64`|12.0| | | | | | | | | | |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | | @@ -871,9 +871,9 @@ |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | | |`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| |`cublasSgemv`| | | | |`hipblasSgemv`|1.8.2| | | | | -|`cublasSgemv_64`|12.0| | | | | | | | | | +|`cublasSgemv_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | | -|`cublasSgemv_v2_64`|12.0| | | | | | | | | | +|`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | | |`cublasSger_64`|12.0| | | | | | | | | | |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | | @@ -933,11 +933,11 @@ |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | | |`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| +|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgemv`| | | | |`hipblasZgemv_v2`|6.0.0| | | | | -|`cublasZgemv_64`|12.0| | | | | | | | | | +|`cublasZgemv_64`|12.0| | | |`hipblasZgemv_64`|6.2.0| | | |6.2.0| |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | | -|`cublasZgemv_v2_64`|12.0| | | | | | | | | | +|`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | | |`cublasZgerc_64`|12.0| | | | | | | | | | |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | | diff --git a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md index baefeb22..9d4d7798 100644 --- a/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md +++ b/docs/tables/CUBLAS_API_supported_by_HIP_and_ROC.md @@ -725,11 +725,11 @@ |`cublasCgbmv`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | |`cublasCgbmv_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgbmv_v2`| | | | |`hipblasCgbmv_v2`|6.0.0| | | | |`rocblas_cgbmv`|3.5.0| | | | | -|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasCgbmv_v2_64`|12.0| | | |`hipblasCgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgemv`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | -|`cublasCgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemv_64`|12.0| | | |`hipblasCgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgemv_v2`| | | | |`hipblasCgemv_v2`|6.0.0| | | | |`rocblas_cgemv`|1.5.0| | | | | -|`cublasCgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasCgemv_v2_64`|12.0| | | |`hipblasCgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasCgerc`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | |`cublasCgerc_64`|12.0| | | | | | | | | | | | | | | | |`cublasCgerc_v2`| | | | |`hipblasCgerc_v2`|6.0.0| | | | |`rocblas_cgerc`|3.5.0| | | | | @@ -807,9 +807,9 @@ |`cublasDgbmv_v2`| | | | |`hipblasDgbmv`|3.5.0| | | | |`rocblas_dgbmv`|3.5.0| | | | | |`cublasDgbmv_v2_64`|12.0| | | |`hipblasDgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgemv`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | -|`cublasDgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemv_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDgemv_v2`| | | | |`hipblasDgemv`|1.8.2| | | | |`rocblas_dgemv`|1.5.0| | | | | -|`cublasDgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasDgemv_v2_64`|12.0| | | |`hipblasDgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasDger`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | |`cublasDger_64`|12.0| | | | | | | | | | | | | | | | |`cublasDger_v2`| | | | |`hipblasDger`|1.8.2| | | | |`rocblas_dger`|1.5.0| | | | | @@ -871,9 +871,9 @@ |`cublasSgbmv_v2`| | | | |`hipblasSgbmv`|3.5.0| | | | |`rocblas_sgbmv`|3.5.0| | | | | |`cublasSgbmv_v2_64`|12.0| | | |`hipblasSgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgemv`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | -|`cublasSgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemv_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSgemv_v2`| | | | |`hipblasSgemv`|1.8.2| | | | |`rocblas_sgemv`|1.5.0| | | | | -|`cublasSgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasSgemv_v2_64`|12.0| | | |`hipblasSgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasSger`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | |`cublasSger_64`|12.0| | | | | | | | | | | | | | | | |`cublasSger_v2`| | | | |`hipblasSger`|1.8.2| | | | |`rocblas_sger`|1.5.0| | | | | @@ -933,11 +933,11 @@ |`cublasZgbmv`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | |`cublasZgbmv_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgbmv_v2`| | | | |`hipblasZgbmv_v2`|6.0.0| | | | |`rocblas_zgbmv`|3.5.0| | | | | -|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_64`|6.2.0| | | |6.2.0| | | | | | | +|`cublasZgbmv_v2_64`|12.0| | | |`hipblasZgbmv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgemv`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | -|`cublasZgemv_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemv_64`|12.0| | | |`hipblasZgemv_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgemv_v2`| | | | |`hipblasZgemv_v2`|6.0.0| | | | |`rocblas_zgemv`|1.5.0| | | | | -|`cublasZgemv_v2_64`|12.0| | | | | | | | | | | | | | | | +|`cublasZgemv_v2_64`|12.0| | | |`hipblasZgemv_v2_64`|6.2.0| | | |6.2.0| | | | | | | |`cublasZgerc`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | |`cublasZgerc_64`|12.0| | | | | | | | | | | | | | | | |`cublasZgerc_v2`| | | | |`hipblasZgerc_v2`|6.0.0| | | | |`rocblas_zgerc`|3.5.0| | | | | diff --git a/src/CUDA2HIP_BLAS_API_functions.cpp b/src/CUDA2HIP_BLAS_API_functions.cpp index 806f0065..d9b736c0 100644 --- a/src/CUDA2HIP_BLAS_API_functions.cpp +++ b/src/CUDA2HIP_BLAS_API_functions.cpp @@ -222,23 +222,23 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GEMV {"cublasSgemv", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgemv_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSgemv_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgemv", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgemv_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDgemv_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemv", {"hipblasCgemv_v2", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgemv_64", {"hipblasCgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgemv_64", {"hipblasCgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemv", {"hipblasZgemv_v2", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgemv_64", {"hipblasZgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgemv_64", {"hipblasZgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GBMV {"cublasSgbmv", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasSgbmv_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasSgbmv_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgbmv", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasDgbmv_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasDgbmv_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgbmv", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasCgbmv_64", {"hipblasCgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasCgbmv_64", {"hipblasCgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgbmv", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, - {"cublasZgbmv_64", {"hipblasZgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasZgbmv_64", {"hipblasZgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRMV {"cublasStrmv", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_SUPPORTED_V2_ONLY}}, @@ -638,23 +638,23 @@ const std::map CUDA_BLAS_FUNCTION_MAP { // GEMV {"cublasSgemv_v2", {"hipblasSgemv", "rocblas_sgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSgemv_v2_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasSgemv_v2_64", {"hipblasSgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgemv_v2", {"hipblasDgemv", "rocblas_dgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDgemv_v2_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasDgemv_v2_64", {"hipblasDgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgemv_v2", {"hipblasCgemv_v2", "rocblas_cgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgemv_v2_64", {"hipblasCgemv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasCgemv_v2_64", {"hipblasCgemv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgemv_v2", {"hipblasZgemv_v2", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgemv_v2_64", {"hipblasZgemv_64", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, UNSUPPORTED}}, + {"cublasZgemv_v2_64", {"hipblasZgemv_v2_64", "rocblas_zgemv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // GBMV {"cublasSgbmv_v2", {"hipblasSgbmv", "rocblas_sgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasSgbmv_v2_64", {"hipblasSgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasDgbmv_v2", {"hipblasDgbmv", "rocblas_dgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasDgbmv_v2_64", {"hipblasDgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasCgbmv_v2", {"hipblasCgbmv_v2", "rocblas_cgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasCgbmv_v2_64", {"hipblasCgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasCgbmv_v2_64", {"hipblasCgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, {"cublasZgbmv_v2", {"hipblasZgbmv_v2", "rocblas_zgbmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, - {"cublasZgbmv_v2_64", {"hipblasZgbmv_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, HIP_EXPERIMENTAL}}, + {"cublasZgbmv_v2_64", {"hipblasZgbmv_v2_64", "", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2, ROC_UNSUPPORTED | HIP_EXPERIMENTAL}}, // TRMV {"cublasStrmv_v2", {"hipblasStrmv", "rocblas_strmv", CONV_LIB_FUNC, API_BLAS, SEC::BLAS_LEVEL_2}}, @@ -2049,7 +2049,15 @@ const std::map HIP_BLAS_FUNCTION_VER_MAP { {"hipblasSgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasDgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasCgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"hipblasZgbmv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgbmv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasSgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasDgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasCgemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemv_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, + {"hipblasZgemv_v2_64", {HIP_6020, HIP_0, HIP_0, HIP_LATEST}}, {"rocblas_status_to_string", {HIP_3050, HIP_0, HIP_0 }}, {"rocblas_sscal", {HIP_1050, HIP_0, HIP_0 }}, diff --git a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu index 7260f9e0..1c3c3749 100644 --- a/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu +++ b/tests/unit_tests/synthetic/libraries/cublas2hipblas_v2.cu @@ -2204,16 +2204,46 @@ int main() { // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgbmv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasComplex* alpha, const hipblasComplex* AP, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy); // CHECK: blasStatus = hipblasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); - // CHECK-NEXT: blasStatus = hipblasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); blasStatus = cublasCgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); blasStatus = cublasCgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgbmv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgbmv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, int64_t kl, int64_t ku, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy); // CHECK: blasStatus = hipblasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); - // CHECK-NEXT: blasStatus = hipblasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); blasStatus = cublasZgbmv_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); blasStatus = cublasZgbmv_v2_64(blasHandle, blasOperation, m_64, n_64, kl_64, ku_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasSgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* A, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasSgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const float* alpha, const float* AP, int64_t lda, const float* x, int64_t incx, const float* beta, float* y, int64_t incy); + // CHECK: blasStatus = hipblasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + // CHECK-NEXT: blasStatus = hipblasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgemv_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + blasStatus = cublasSgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &fa, &fAP, lda_64, &fx, incx_64, &fb, &fy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasDgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* A, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasDgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const double* alpha, const double* AP, int64_t lda, const double* x, int64_t incx, const double* beta, double* y, int64_t incy); + // CHECK: blasStatus = hipblasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + // CHECK-NEXT: blasStatus = hipblasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgemv_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + blasStatus = cublasDgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &da, &dA, lda_64, &dx, incx_64, &db, &dy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasCgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuComplex* alpha, const cuComplex* A, int64_t lda, const cuComplex* x, int64_t incx, const cuComplex* beta, cuComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipblasComplex* alpha, const hipblasComplex* AP, int64_t lda, const hipblasComplex* x, int64_t incx, const hipblasComplex* beta, hipblasComplex* y, int64_t incy); + // HIP v2: HIPBLAS_EXPORT hipblasStatus_t hipblasCgemv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipComplex* alpha, const hipComplex* AP, int64_t lda, const hipComplex* x, int64_t incx, const hipComplex* beta, hipComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasCgemv_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasCgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgemv_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + blasStatus = cublasCgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &complexa, &complexA, lda_64, &complexx, incx_64, &complexb, &complexy, incy_64); + + // CUDA: CUBLASAPI cublasStatus_t CUBLASWINAPI cublasZgemv_v2_64(cublasHandle_t handle, cublasOperation_t trans, int64_t m, int64_t n, const cuDoubleComplex* alpha, const cuDoubleComplex* A, int64_t lda, const cuDoubleComplex* x, int64_t incx, const cuDoubleComplex* beta, cuDoubleComplex* y, int64_t incy); + // HIP: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipblasDoubleComplex* alpha, const hipblasDoubleComplex* AP, int64_t lda, const hipblasDoubleComplex* x, int64_t incx, const hipblasDoubleComplex* beta, hipblasDoubleComplex* y, int64_t incy); + // HIP v2: HIPBLAS_EXPORT hipblasStatus_t hipblasZgemv_v2_64(hipblasHandle_t handle, hipblasOperation_t trans, int64_t m, int64_t n, const hipDoubleComplex* alpha, const hipDoubleComplex* AP, int64_t lda, const hipDoubleComplex* x, int64_t incx, const hipDoubleComplex* beta, hipDoubleComplex* y, int64_t incy); + // CHECK: blasStatus = hipblasZgemv_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + // CHECK-NEXT: blasStatus = hipblasZgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgemv_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); + blasStatus = cublasZgemv_v2_64(blasHandle, blasOperation, m_64, n_64, &dcomplexa, &dcomplexA, lda_64, &dcomplexx, incx_64, &dcomplexb, &dcomplexy, incy_64); #endif return 0;