Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT: utility module for Intel data parallel libs; import checks in one place #1936

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
16 changes: 10 additions & 6 deletions daal4py/sklearn/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,21 @@ def daal_check_version(
return False


@functools.lru_cache(maxsize=256, typed=False)
def sklearn_check_version(ver):
if hasattr(Version(ver), "base_version"):
base_sklearn_version = Version(sklearn_version).base_version
res = bool(Version(base_sklearn_version) >= Version(ver))
def _package_check_version(version_to_check, available_version):
if hasattr(Version(version_to_check), "base_version"):
base_package_version = Version(available_version).base_version
res = bool(Version(base_package_version) >= Version(version_to_check))
else:
# packaging module not available
res = bool(Version(sklearn_version) >= Version(ver))
res = bool(Version(available_version) >= Version(version_to_check))
return res


@functools.lru_cache(maxsize=256, typed=False)
def sklearn_check_version(ver):
return _package_check_version(ver, sklearn_version)


def parse_dtype(dt):
if dt == np.double:
return "double"
Expand Down
13 changes: 3 additions & 10 deletions onedal/_device_offload.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,25 +23,18 @@

from ._config import _get_config
from .utils._array_api import _asarray, _is_numpy_namespace
from .utils._dpep_helpers import dpctl_available, dpnp_available

try:
if dpctl_available:
from dpctl import SyclQueue
from dpctl.memory import MemoryUSMDevice, as_usm_memory
from dpctl.tensor import usm_ndarray

dpctl_available = True
except ImportError:
dpctl_available = False

try:
if dpnp_available:
import dpnp

from .utils._array_api import _convert_to_dpnp

dpnp_available = True
except ImportError:
dpnp_available = False


class DummySyclQueue:
"""This class is designed to act like dpctl.SyclQueue
Expand Down
7 changes: 4 additions & 3 deletions onedal/common/tests/test_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
device_type_to_str,
get_memory_usm,
get_queues,
is_dpctl_available,
is_dpctl_device_available,
)
from onedal.utils._dpep_helpers import dpctl_available


@pytest.mark.parametrize("queue", get_queues())
Expand All @@ -43,7 +44,7 @@ def test_with_numpy_data(queue):
assert _get_policy(queue, X, y).get_device_name() == device_name


@pytest.mark.skipif(not is_dpctl_available(), reason="depends on dpctl")
@pytest.mark.skipif(not dpctl_available, reason="depends on dpctl")
@pytest.mark.parametrize("queue", get_queues("cpu,gpu"))
@pytest.mark.parametrize("memtype", get_memory_usm())
def test_with_usm_ndarray_data(queue, memtype):
Expand All @@ -61,7 +62,7 @@ def test_with_usm_ndarray_data(queue, memtype):


@pytest.mark.skipif(
not is_dpctl_available(["cpu", "gpu"]), reason="test uses multiple devices"
not is_dpctl_device_available(["cpu", "gpu"]), reason="test uses multiple devices"
)
@pytest.mark.parametrize("memtype", get_memory_usm())
def test_queue_parameter_with_usm_ndarray(memtype):
Expand Down
9 changes: 4 additions & 5 deletions onedal/datatypes/_data_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,14 @@
from onedal import _backend, _is_dpc_backend

from ..utils import _is_csr
from ..utils._dpep_helpers import is_dpctl_available

try:
dpctl_available = is_dpctl_available("0.14")

if dpctl_available:
import dpctl
import dpctl.tensor as dpt

dpctl_available = dpctl.__version__ >= "0.14"
except ImportError:
dpctl_available = False


def _apply_and_pass(func, *args):
if len(args) == 1:
Expand Down
9 changes: 4 additions & 5 deletions onedal/datatypes/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,14 @@
from onedal.datatypes import from_table, to_table
from onedal.primitives import linear_kernel
from onedal.tests.utils._device_selection import get_queues
from onedal.utils._dpep_helpers import is_dpctl_available

try:
dpctl_available = is_dpctl_available("0.14")

if dpctl_available:
import dpctl
import dpctl.tensor as dpt

dpctl_available = dpctl.__version__ >= "0.14"
except ImportError:
dpctl_available = False


def _test_input_format_c_contiguous_numpy(queue, dtype):
rng = np.random.RandomState(0)
Expand Down
14 changes: 4 additions & 10 deletions onedal/tests/utils/_dataframes_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,14 @@

from sklearnex import get_config

try:
import dpctl.tensor as dpt
from ...utils._dpep_helpers import dpctl_available, dpnp_available

dpctl_available = True
except ImportError:
dpctl_available = False
if dpctl_available:
import dpctl.tensor as dpt

try:
if dpnp_available:
import dpnp

dpnp_available = True
except ImportError:
dpnp_available = False

try:
# This should be lazy imported in the
# future along with other popular
Expand Down
11 changes: 5 additions & 6 deletions onedal/tests/utils/_device_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import pytest

from ...utils._dpep_helpers import dpctl_available


def get_queues(filter_="cpu,gpu"):
"""Get available dpctl.SycQueues for testing.
Expand Down Expand Up @@ -61,20 +63,17 @@ def get_memory_usm():
return []


def is_dpctl_available(targets=None):
try:
def is_dpctl_device_available(targets):
if dpctl_available:
samir-nasibli marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if dpctl_available check can be made an import-time check, rather than runtime.

import dpctl

if targets is None:
return True
for device in targets:
if device == "cpu" and not dpctl.has_cpu_devices():
return False
if device == "gpu" and not dpctl.has_gpu_devices():
return False
return True
except ImportError:
return False
return False


def device_type_to_str(queue):
Expand Down
16 changes: 3 additions & 13 deletions onedal/utils/_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,20 +18,10 @@

from collections.abc import Iterable

try:
from dpctl.tensor import usm_ndarray

dpctl_available = True
except ImportError:
dpctl_available = False

try:
import dpnp

dpnp_available = True
except ImportError:
dpnp_available = False
from ._dpep_helpers import dpctl_available, dpnp_available

if dpctl_available:
from dpctl.tensor import usm_ndarray

if dpnp_available:
import dpnp
Expand Down
50 changes: 50 additions & 0 deletions onedal/utils/_dpep_helpers.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does dpep mean? I believe it is confusing name for helper file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://github.com/IntelPython/DPEP
https://intelpython.github.io/DPEP/main/
Do you want me provide links in the docs of the module or just rename?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have provided small description in the top of the module

Copy link
Contributor

@Alexsandruss Alexsandruss Sep 24, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

data_parallel_extensions_helper / data_parallel_ext_helper would be better.

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# ==============================================================================
# Copyright 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Utilities for Data Parallel Extensions libs, such as DPNP, DPCtl"""

from daal4py.sklearn._utils import _package_check_version


def is_dpctl_available(version=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions need caching just in case they will be used more extensively in future.

"""Checks availability of DPCtl package"""
try:
import dpctl
import dpctl.tensor as dpt

dpctl_available = True
except ImportError:
dpctl_available = False
if dpctl_available and version is not None:
dpctl_available = _package_check_version(version, dpctl.__version__)
return dpctl_available


def is_dpnp_available(version=None):
"""Checks availability of DPNP package"""
try:
import dpnp

dpnp_available = True
except ImportError:
dpnp_available = False
if dpnp_available and version is not None:
dpnp_available = _package_check_version(version, dpnp.__version__)
return dpnp_available


dpctl_available = is_dpctl_available()
dpnp_available = is_dpnp_available()
4 changes: 2 additions & 2 deletions sklearnex/tests/test_memory_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
_convert_to_dataframe,
get_dataframes_and_queues,
)
from onedal.tests.utils._device_selection import get_queues, is_dpctl_available
from onedal.tests.utils._device_selection import get_queues, is_dpctl_device_available
from sklearnex import config_context
from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES
from sklearnex.utils._array_api import get_namespace
Expand Down Expand Up @@ -275,7 +275,7 @@ def test_memory_leaks(estimator, dataframe, queue, order, data_shape):


@pytest.mark.skipif(
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu"),
os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_device_available("gpu"),
reason="SYCL device memory leak check requires the level zero sysman",
)
@pytest.mark.parametrize("queue", get_queues("gpu"))
Expand Down
2 changes: 1 addition & 1 deletion tests/run_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from daal4py import __has_dist__
from daal4py.sklearn._utils import get_daal_version
from onedal._device_offload import dpctl_available
from onedal.utils._dpep_helpers import dpctl_available

print("Starting examples validation")
# First item is major version - 2021,
Expand Down