Add JsonImporter (#22)

Co-authored-by: Henrik Ingo <[email protected]> Co-authored-by: Matt Fleming <[email protected]>
datastax-labs · Sep 23, 2024 · e1fee80 · e1fee80
1 parent 84e3e74
commit e1fee80
Show file tree

Hide file tree

Showing 2 changed files with 156 additions and 6 deletions.
diff --git a/hunter/importer.py b/hunter/importer.py
@@ -1,4 +1,5 @@
 import csv
+import json
 from collections import OrderedDict
 from contextlib import contextmanager
 from dataclasses import dataclass
@@ -16,6 +17,7 @@
     CsvTestConfig,
     GraphiteTestConfig,
     HistoStatTestConfig,
+    JsonTestConfig,
     PostgresMetric,
     PostgresTestConfig,
     TestConfig,
@@ -268,7 +270,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
                     # Read metric values. Note we can still fail on conversion to float,
                     # because the user is free to override the column selection and thus
                     # they may select a column that contains non-numeric data:
-                    for (name, i) in zip(metric_names, metric_indexes):
+                    for name, i in zip(metric_names, metric_indexes):
                         try:
                             data[name].append(float(row[i]))
                         except ValueError as err:
@@ -498,7 +500,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
             # Read metric values. Note we can still fail on conversion to float,
             # because the user is free to override the column selection and thus
             # they may select a column that contains non-numeric data:
-            for (name, i) in zip(metric_names, metric_indexes):
+            for name, i in zip(metric_names, metric_indexes):
                 try:
                     data[name].append(float(row[i]))
                 except ValueError as err:
@@ -537,19 +539,133 @@ def fetch_all_metric_names(self, test_conf: PostgresTestConfig) -> List[str]:
         return [m for m in test_conf.metrics.keys()]
 
 
+class JsonImporter(Importer):
+    def __init__(self):
+        self._data = {}
+
+    @staticmethod
+    def _read_json_file(filename: str):
+        try:
+            return json.load(open(filename))
+        except FileNotFoundError:
+            raise DataImportError(f"Input file not found: {filename}")
+
+    def inputfile(self, test_conf: JsonTestConfig):
+        if test_conf.file not in self._data:
+            self._data[test_conf.file] = self._read_json_file(test_conf.file)
+        return self._data[test_conf.file]
+
+    def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelector()) -> Series:
+
+        if not isinstance(test_conf, JsonTestConfig):
+            raise ValueError("Expected JsonTestConfig")
+
+        # TODO: refactor. THis is copy pasted from CSV importer
+        since_time = selector.since_time
+        until_time = selector.until_time
+
+        if since_time.timestamp() > until_time.timestamp():
+            raise DataImportError(
+                f"Invalid time range: ["
+                f"{format_timestamp(int(since_time.timestamp()))}, "
+                f"{format_timestamp(int(until_time.timestamp()))}]"
+            )
+
+        time = []
+        data = OrderedDict()
+        metrics = OrderedDict()
+        attributes = OrderedDict()
+
+        for name in self.fetch_all_metric_names(test_conf):
+            # Ignore metrics if selector.metrics is not None and name is not in selector.metrics
+            if selector.metrics is not None and name not in selector.metrics:
+                continue
+            data[name] = []
+
+        attr_names = self.fetch_all_attribute_names(test_conf)
+        for name in attr_names:
+            attributes[name] = []
+
+        # If the user specified a branch, only include results from that branch.
+        # Otherwise if the test config specifies a branch, only include results from that branch.
+        # Else include all results.
+        branch = None
+        if selector.branch:
+            branch = selector.branch
+        elif test_conf.base_branch:
+            branch = test_conf.base_branch
+
+        objs = self.inputfile(test_conf)
+        list_of_json_obj = []
+        for o in objs:
+            if branch and o["attributes"]["branch"] != branch:
+                continue
+            list_of_json_obj.append(o)
+
+        for result in list_of_json_obj:
+            time.append(result["timestamp"])
+            for metric in result["metrics"]:
+                # Skip metrics not in selector.metrics if selector.metrics is enabled
+                if metric["name"] not in data:
+                    continue
+
+                data[metric["name"]].append(metric["value"])
+                metrics[metric["name"]] = Metric(1, 1.0)
+        for a in attr_names:
+            attributes[a] = [o["attributes"][a] for o in list_of_json_obj]
+
+        # Leave last n points:
+        time = time[-selector.last_n_points :]
+        tmp = data
+        data = {}
+        for k, v in tmp.items():
+            data[k] = v[-selector.last_n_points :]
+        tmp = attributes
+        attributes = {}
+        for k, v in tmp.items():
+            attributes[k] = v[-selector.last_n_points :]
+
+        return Series(
+            test_conf.name,
+            branch=None,
+            time=time,
+            metrics=metrics,
+            data=data,
+            attributes=attributes,
+        )
+
+    def fetch_all_metric_names(self, test_conf: JsonTestConfig) -> List[str]:
+        metric_names = set()
+        list_of_json_obj = self.inputfile(test_conf)
+        for result in list_of_json_obj:
+            for metric in result["metrics"]:
+                metric_names.add(metric["name"])
+        return [m for m in metric_names]
+
+    def fetch_all_attribute_names(self, test_conf: JsonTestConfig) -> List[str]:
+        attr_names = set()
+        list_of_json_obj = self.inputfile(test_conf)
+        for result in list_of_json_obj:
+            for a in result["attributes"].keys():
+                attr_names.add(a)
+        return [m for m in attr_names]
+
+
 class Importers:
     __config: Config
     __csv_importer: Optional[CsvImporter]
     __graphite_importer: Optional[GraphiteImporter]
     __histostat_importer: Optional[HistoStatImporter]
     __postgres_importer: Optional[PostgresImporter]
+    __json_importer: Optional[JsonImporter]
 
     def __init__(self, config: Config):
         self.__config = config
         self.__csv_importer = None
         self.__graphite_importer = None
         self.__histostat_importer = None
         self.__postgres_importer = None
+        self.__json_importer = None
 
     def csv_importer(self) -> CsvImporter:
         if self.__csv_importer is None:
@@ -571,6 +687,11 @@ def postgres_importer(self) -> PostgresImporter:
             self.__postgres_importer = PostgresImporter(Postgres(self.__config.postgres))
         return self.__postgres_importer
 
+    def json_importer(self) -> JsonImporter:
+        if self.__json_importer is None:
+            self.__json_importer = JsonImporter()
+        return self.__json_importer
+
     def get(self, test: TestConfig) -> Importer:
         if isinstance(test, CsvTestConfig):
             return self.csv_importer()
@@ -580,5 +701,7 @@ def get(self, test: TestConfig) -> Importer:
             return self.histostat_importer()
         elif isinstance(test, PostgresTestConfig):
             return self.postgres_importer()
+        elif isinstance(test, JsonTestConfig):
+            return self.json_importer()
         else:
             raise ValueError(f"Unsupported test type {type(test)}")
diff --git a/hunter/test_config.py b/hunter/test_config.py
@@ -162,7 +162,7 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
     Loads properties of a test from a dictionary read from hunter's config file
     This dictionary must have the `type` property to determine the type of the test.
     Other properties depend on the type.
-    Currently supported test types are `fallout`, `graphite`, `csv`, and `psql`.
+    Currently supported test types are `fallout`, `graphite`, `csv`, `json`, and `psql`.
     """
     test_type = config.get("type")
     if test_type == "csv":
@@ -173,6 +173,8 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
         return create_histostat_test_config(name, config)
     elif test_type == "postgres":
         return create_postgres_test_config(name, config)
+    elif test_type == "json":
+        return create_json_test_config(name, config)
     elif test_type is None:
         raise TestConfigError(f"Test type not set for test {name}")
     else:
@@ -192,7 +194,7 @@ def create_csv_test_config(test_name: str, test_info: Dict) -> CsvTestConfig:
         for name in metrics_info:
             metrics.append(CsvMetric(name, 1, 1.0, name))
     elif isinstance(metrics_info, Dict):
-        for (metric_name, metric_conf) in metrics_info.items():
+        for metric_name, metric_conf in metrics_info.items():
             metrics.append(
                 CsvMetric(
                     name=metric_name,
@@ -231,7 +233,7 @@ def create_graphite_test_config(name: str, test_info: Dict) -> GraphiteTestConfi
 
     metrics = []
     try:
-        for (metric_name, metric_conf) in metrics_info.items():
+        for metric_name, metric_conf in metrics_info.items():
             metrics.append(
                 GraphiteMetric(
                     name=metric_name,
@@ -279,7 +281,7 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
             for name in metrics_info:
                 metrics.append(CsvMetric(name, 1, 1.0))
         elif isinstance(metrics_info, Dict):
-            for (metric_name, metric_conf) in metrics_info.items():
+            for metric_name, metric_conf in metrics_info.items():
                 metrics.append(
                     PostgresMetric(
                         name=metric_name,
@@ -294,3 +296,28 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
         return PostgresTestConfig(test_name, query, update_stmt, time_column, metrics, attributes)
     except KeyError as e:
         raise TestConfigError(f"Configuration key not found in test {test_name}: {e.args[0]}")
+
+
+@dataclass
+class JsonTestConfig(TestConfig):
+    name: str
+    file: str
+    base_branch: str
+
+    # TODO: This should return the list defined in the config file hunter.yaml
+    def fully_qualified_metric_names(self):
+        from hunter.importer import JsonImporter
+
+        metric_names = JsonImporter().fetch_all_metric_names(self)
+        return [f"{self.name}.{m}" for m in metric_names]
+
+
+def create_json_test_config(name: str, test_info: Dict) -> JsonTestConfig:
+    try:
+        file = test_info["file"]
+    except KeyError as e:
+        raise TestConfigError(f"Configuration key not found in test {name}: {e.args[0]}")
+    if not os.path.exists(file):
+        raise TestConfigError(f"Configuration file not found: {file}")
+    base_branch = test_info.get("base_branch", None)
+    return JsonTestConfig(name, file, base_branch)