From c470d024af9a973fd575bf930f5a44bd1451bbb4 Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Wed, 27 Jul 2022 18:10:08 -0700 Subject: [PATCH 1/7] refactor: simplify constructor parameters --- powersimdata/data_access/context.py | 10 ++++---- powersimdata/data_access/data_access.py | 31 ++++++++----------------- 2 files changed, 15 insertions(+), 26 deletions(-) diff --git a/powersimdata/data_access/context.py b/powersimdata/data_access/context.py index e2fafed4d..c99b8f060 100644 --- a/powersimdata/data_access/context.py +++ b/powersimdata/data_access/context.py @@ -8,18 +8,18 @@ class Context: """Factory for data access instances""" @staticmethod - def get_data_access(): + def get_data_access(_fs=None): """Return a data access instance appropriate for the current environment. + :param fs.base.FS _fs: a filesystem instance, or None to use a class specific + default :return: (:class:`powersimdata.data_access.data_access.DataAccess`) -- a data access instance """ - root = server_setup.DATA_ROOT_DIR - if server_setup.DEPLOYMENT_MODE == DeploymentMode.Server: - return SSHDataAccess(root) - return LocalDataAccess(root) + return SSHDataAccess(_fs) + return LocalDataAccess(_fs) @staticmethod def get_launcher(scenario): diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index b49e25022..91f45eef4 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -16,9 +16,8 @@ class DataAccess: """Interface to a local or remote data store.""" - def __init__(self, root): + def __init__(self): """Constructor""" - self.root = root self.join = fs.path.join self.local_fs = None @@ -163,10 +162,10 @@ def push(self, file_name, checksum): class LocalDataAccess(DataAccess): """Interface to shared data volume""" - def __init__(self, root=server_setup.LOCAL_DIR): - super().__init__(root) - self.local_fs = fs.open_fs(root) - self.fs = self._get_fs() + def __init__(self, _fs=None): + super().__init__() + self.local_fs = fs.open_fs(server_setup.LOCAL_DIR) + self.fs = _fs if _fs is not None else self._get_fs() def _get_fs(self): mfs = MultiFS() @@ -193,23 +192,13 @@ def push(self, file_name, checksum): class SSHDataAccess(DataAccess): """Interface to a remote data store, accessed via SSH.""" - def __init__(self, root=server_setup.DATA_ROOT_DIR): + def __init__(self, _fs=None): """Constructor""" - super().__init__(root) - self._fs = None + super().__init__() + self.root = server_setup.DATA_ROOT_DIR + self.fs = _fs if _fs is not None else get_multi_fs(self.root) self.local_fs = fs.open_fs(server_setup.LOCAL_DIR) - @property - def fs(self): - """Get or create the filesystem object - - :raises IOError: if connection failed or still within retry window - :return: (*fs.multifs.MultiFS*) -- filesystem instance - """ - if self._fs is None: - self._fs = get_multi_fs(self.root) - return self._fs - def exec_command(self, command): ssh_fs = self.fs.get_fs("ssh_fs") return ssh_fs.exec_command(command) @@ -285,7 +274,7 @@ class _DataAccessTemplate(SSHDataAccess): def __init__(self, fs_url): self.local_fs = fs.open_fs(fs_url) - self._fs = self._get_fs(fs_url) + self.fs = self._get_fs(fs_url) self.root = "foo" self.join = fs.path.join From c8ec216ee5ed751a1e8d21af1ec222094030346a Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Wed, 27 Jul 2022 18:39:01 -0700 Subject: [PATCH 2/7] perf: pass minimal multifs based on the context --- powersimdata/data_access/context.py | 12 +++++++----- powersimdata/data_access/fs_helper.py | 19 +++++++++++++++++++ powersimdata/input/input_base.py | 3 +-- powersimdata/input/input_data.py | 3 +++ powersimdata/input/profile_input.py | 3 +++ powersimdata/output/output_data.py | 3 ++- 6 files changed, 35 insertions(+), 8 deletions(-) diff --git a/powersimdata/data_access/context.py b/powersimdata/data_access/context.py index c99b8f060..5b2aa2b19 100644 --- a/powersimdata/data_access/context.py +++ b/powersimdata/data_access/context.py @@ -8,18 +8,20 @@ class Context: """Factory for data access instances""" @staticmethod - def get_data_access(_fs=None): + def get_data_access(make_fs=None): """Return a data access instance appropriate for the current environment. - :param fs.base.FS _fs: a filesystem instance, or None to use a class specific - default + :param callable make_fs: a function that returns a filesystem instance, or + None to use a default :return: (:class:`powersimdata.data_access.data_access.DataAccess`) -- a data access instance """ if server_setup.DEPLOYMENT_MODE == DeploymentMode.Server: - return SSHDataAccess(_fs) - return LocalDataAccess(_fs) + if make_fs is None: + make_fs = lambda: None # noqa: E731 + return SSHDataAccess(make_fs()) + return LocalDataAccess() @staticmethod def get_launcher(scenario): diff --git a/powersimdata/data_access/fs_helper.py b/powersimdata/data_access/fs_helper.py index 78ef551c4..2919962fa 100644 --- a/powersimdata/data_access/fs_helper.py +++ b/powersimdata/data_access/fs_helper.py @@ -49,3 +49,22 @@ def get_multi_fs(root): remotes = ",".join([f[0] for f in mfs.iterate_fs()]) print(f"Initialized remote filesystem with {remotes}") return mfs + + +def get_scenario_fs(): + """Create filesystem combining the server (if connected) with blob storage, + prioritizing the server if connected. + + :return: (*fs.base.FS*) -- filesystem instance + """ + scenario_data = get_blob_fs("scenariodata") + mfs = MultiFS() + try: + ssh_fs = get_ssh_fs(server_setup.DATA_ROOT_DIR) + mfs.add_fs("ssh_fs", ssh_fs, write=True, priority=2) + except: # noqa + print("Could not connect to ssh server") + mfs.add_fs("scenario_fs", scenario_data, priority=1) + remotes = ",".join([f[0] for f in mfs.iterate_fs()]) + print(f"Initialized remote filesystem with {remotes}") + return mfs diff --git a/powersimdata/input/input_base.py b/powersimdata/input/input_base.py index deb957d96..ba5879171 100644 --- a/powersimdata/input/input_base.py +++ b/powersimdata/input/input_base.py @@ -1,4 +1,3 @@ -from powersimdata.data_access.context import Context from powersimdata.utility.helpers import MemoryCache, cache_key _cache = MemoryCache() @@ -11,7 +10,7 @@ class InputBase: def __init__(self): """Constructor.""" - self.data_access = Context.get_data_access() + self.data_access = None self._file_extension = {} def _check_field(self, field_name): diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index 9fb302e28..134f4706e 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -3,6 +3,8 @@ import pandas as pd +from powersimdata.data_access.context import Context +from powersimdata.data_access.fs_helper import get_scenario_fs from powersimdata.input.input_base import InputBase from powersimdata.utility import server_setup @@ -13,6 +15,7 @@ class InputData(InputBase): def __init__(self): super().__init__() self._file_extension = {"ct": "pkl", "grid": "mat"} + self.data_access = Context.get_data_access(get_scenario_fs) def _get_file_path(self, scenario_info, field_name): """Get the path to either grid or ct for the scenario diff --git a/powersimdata/input/profile_input.py b/powersimdata/input/profile_input.py index 7d6167bd0..1c2183d4e 100644 --- a/powersimdata/input/profile_input.py +++ b/powersimdata/input/profile_input.py @@ -1,5 +1,7 @@ import pandas as pd +from powersimdata.data_access.context import Context +from powersimdata.data_access.fs_helper import get_blob_fs from powersimdata.input.input_base import InputBase profile_kind = { @@ -39,6 +41,7 @@ class ProfileInput(InputBase): def __init__(self): super().__init__() self._file_extension = {k: "csv" for k in profile_kind} + self.data_access = Context.get_data_access(lambda: get_blob_fs("profiles")) def _get_file_path(self, scenario_info, field_name): """Get the path to the specified profile diff --git a/powersimdata/output/output_data.py b/powersimdata/output/output_data.py index 7ec848236..7ae3843a7 100644 --- a/powersimdata/output/output_data.py +++ b/powersimdata/output/output_data.py @@ -3,6 +3,7 @@ from scipy.sparse import coo_matrix from powersimdata.data_access.context import Context +from powersimdata.data_access.fs_helper import get_scenario_fs from powersimdata.input.input_data import distribute_demand_from_zones_to_buses from powersimdata.input.transform_profile import TransformProfile from powersimdata.utility import server_setup @@ -13,7 +14,7 @@ class OutputData: def __init__(self): """Constructor""" - self._data_access = Context.get_data_access() + self.data_access = Context.get_data_access(get_scenario_fs) def get_data(self, scenario_id, field_name): """Returns data either from server or from local directory. From 768d5c4ca6a8d3f6e1fd6e7a35c29bbd77ee32d1 Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Wed, 27 Jul 2022 18:58:39 -0700 Subject: [PATCH 3/7] fix: keep delayed evaluation of ssh fs --- powersimdata/data_access/data_access.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 91f45eef4..b417be5d3 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -196,9 +196,15 @@ def __init__(self, _fs=None): """Constructor""" super().__init__() self.root = server_setup.DATA_ROOT_DIR - self.fs = _fs if _fs is not None else get_multi_fs(self.root) + self._fs = _fs self.local_fs = fs.open_fs(server_setup.LOCAL_DIR) + @property + def fs(self): + if self._fs is None: + self._fs = get_multi_fs(self.root) + return self._fs + def exec_command(self, command): ssh_fs = self.fs.get_fs("ssh_fs") return ssh_fs.exec_command(command) @@ -274,7 +280,7 @@ class _DataAccessTemplate(SSHDataAccess): def __init__(self, fs_url): self.local_fs = fs.open_fs(fs_url) - self.fs = self._get_fs(fs_url) + self._fs = self._get_fs(fs_url) self.root = "foo" self.join = fs.path.join From b127e0a7a8c9e6bb41ede76fcad7fec23f9d7d64 Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Fri, 5 Aug 2022 13:39:19 -0400 Subject: [PATCH 4/7] feat: use new storage account --- powersimdata/data_access/fs_helper.py | 5 +++-- powersimdata/utility/server_setup.py | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/powersimdata/data_access/fs_helper.py b/powersimdata/data_access/fs_helper.py index 2919962fa..1cfbc0440 100644 --- a/powersimdata/data_access/fs_helper.py +++ b/powersimdata/data_access/fs_helper.py @@ -11,8 +11,9 @@ def get_blob_fs(container): :param str container: the container name :return: (*fs.base.FS*) -- filesystem instance """ - account = "besciences" - return fs.open_fs(f"azblob://{account}@{container}") + account = "esmi" + sas_token = server_setup.BLOB_TOKEN_RO + return fs.open_fs(f"azblobv2://{account}:{sas_token}@{container}") def get_ssh_fs(root=""): diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py index 1587fefd3..5988ddd47 100644 --- a/powersimdata/utility/server_setup.py +++ b/powersimdata/utility/server_setup.py @@ -14,6 +14,7 @@ MODEL_DIR = config.MODEL_DIR ENGINE_DIR = config.ENGINE_DIR DEPLOYMENT_MODE = get_deployment_mode() +BLOB_TOKEN_RO = "?sv=2021-06-08&ss=b&srt=co&sp=rl&se=2050-08-06T01:31:08Z&st=2022-08-05T17:31:08Z&spr=https&sig=ORHiRQQCocyaHXV2phhSN92GFhRnaHuGOecskxsmG3U%3D" os.makedirs(LOCAL_DIR, exist_ok=True) From 748c6dfaa8087f999099f693d02a0449b2c2282b Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Fri, 5 Aug 2022 15:54:02 -0400 Subject: [PATCH 5/7] chore: update remote fs for scenario object --- powersimdata/scenario/scenario.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/powersimdata/scenario/scenario.py b/powersimdata/scenario/scenario.py index 29e4683de..e9130eff8 100644 --- a/powersimdata/scenario/scenario.py +++ b/powersimdata/scenario/scenario.py @@ -4,6 +4,7 @@ from powersimdata.data_access.context import Context from powersimdata.data_access.execute_list import ExecuteListManager +from powersimdata.data_access.fs_helper import get_scenario_fs from powersimdata.data_access.scenario_list import ScenarioListManager from powersimdata.scenario.analyze import Analyze from powersimdata.scenario.create import Create, _Builder @@ -52,7 +53,7 @@ def __init__(self, descriptor=None): if descriptor is not None and not isinstance(descriptor, str): raise TypeError("Descriptor must be a string or int (for a Scenario ID)") - self.data_access = Context.get_data_access() + self.data_access = Context.get_data_access(get_scenario_fs) self._scenario_list_manager = ScenarioListManager(self.data_access) self._execute_list_manager = ExecuteListManager(self.data_access) From d183e1d3c19019b6f967c01402cfeac59eddff03 Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Fri, 5 Aug 2022 16:43:32 -0400 Subject: [PATCH 6/7] fix: typo while renaming --- powersimdata/output/output_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/powersimdata/output/output_data.py b/powersimdata/output/output_data.py index 7ae3843a7..bc5e6c7cd 100644 --- a/powersimdata/output/output_data.py +++ b/powersimdata/output/output_data.py @@ -14,7 +14,7 @@ class OutputData: def __init__(self): """Constructor""" - self.data_access = Context.get_data_access(get_scenario_fs) + self._data_access = Context.get_data_access(get_scenario_fs) def get_data(self, scenario_id, field_name): """Returns data either from server or from local directory. From c51d095e799686a079fc6d86c37e9c93f092e19e Mon Sep 17 00:00:00 2001 From: Jen Hagg Date: Wed, 17 Aug 2022 10:46:32 -0700 Subject: [PATCH 7/7] docs: fix docstrings --- powersimdata/data_access/data_access.py | 5 +++++ powersimdata/data_access/fs_helper.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index b417be5d3..8e6eaff7d 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -201,6 +201,11 @@ def __init__(self, _fs=None): @property def fs(self): + """Get or create a filesystem object, defaulting to a MultiFS that combines the + server and blob containers. + + :return: (*fs.base.FS*) -- filesystem instance + """ if self._fs is None: self._fs = get_multi_fs(self.root) return self._fs diff --git a/powersimdata/data_access/fs_helper.py b/powersimdata/data_access/fs_helper.py index 1cfbc0440..45624dd37 100644 --- a/powersimdata/data_access/fs_helper.py +++ b/powersimdata/data_access/fs_helper.py @@ -54,7 +54,7 @@ def get_multi_fs(root): def get_scenario_fs(): """Create filesystem combining the server (if connected) with blob storage, - prioritizing the server if connected. + prioritizing the server if connected. :return: (*fs.base.FS*) -- filesystem instance """