Skip to content

Commit

Permalink
Add JsonImporter (#22)
Browse files Browse the repository at this point in the history
Co-authored-by: Henrik Ingo <[email protected]>
Co-authored-by: Matt Fleming <[email protected]>
  • Loading branch information
3 people authored Sep 23, 2024
1 parent 84e3e74 commit e1fee80
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 6 deletions.
127 changes: 125 additions & 2 deletions hunter/importer.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import json
from collections import OrderedDict
from contextlib import contextmanager
from dataclasses import dataclass
Expand All @@ -16,6 +17,7 @@
CsvTestConfig,
GraphiteTestConfig,
HistoStatTestConfig,
JsonTestConfig,
PostgresMetric,
PostgresTestConfig,
TestConfig,
Expand Down Expand Up @@ -268,7 +270,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
# Read metric values. Note we can still fail on conversion to float,
# because the user is free to override the column selection and thus
# they may select a column that contains non-numeric data:
for (name, i) in zip(metric_names, metric_indexes):
for name, i in zip(metric_names, metric_indexes):
try:
data[name].append(float(row[i]))
except ValueError as err:
Expand Down Expand Up @@ -498,7 +500,7 @@ def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelecto
# Read metric values. Note we can still fail on conversion to float,
# because the user is free to override the column selection and thus
# they may select a column that contains non-numeric data:
for (name, i) in zip(metric_names, metric_indexes):
for name, i in zip(metric_names, metric_indexes):
try:
data[name].append(float(row[i]))
except ValueError as err:
Expand Down Expand Up @@ -537,19 +539,133 @@ def fetch_all_metric_names(self, test_conf: PostgresTestConfig) -> List[str]:
return [m for m in test_conf.metrics.keys()]


class JsonImporter(Importer):
def __init__(self):
self._data = {}

@staticmethod
def _read_json_file(filename: str):
try:
return json.load(open(filename))
except FileNotFoundError:
raise DataImportError(f"Input file not found: {filename}")

def inputfile(self, test_conf: JsonTestConfig):
if test_conf.file not in self._data:
self._data[test_conf.file] = self._read_json_file(test_conf.file)
return self._data[test_conf.file]

def fetch_data(self, test_conf: TestConfig, selector: DataSelector = DataSelector()) -> Series:

if not isinstance(test_conf, JsonTestConfig):
raise ValueError("Expected JsonTestConfig")

# TODO: refactor. THis is copy pasted from CSV importer
since_time = selector.since_time
until_time = selector.until_time

if since_time.timestamp() > until_time.timestamp():
raise DataImportError(
f"Invalid time range: ["
f"{format_timestamp(int(since_time.timestamp()))}, "
f"{format_timestamp(int(until_time.timestamp()))}]"
)

time = []
data = OrderedDict()
metrics = OrderedDict()
attributes = OrderedDict()

for name in self.fetch_all_metric_names(test_conf):
# Ignore metrics if selector.metrics is not None and name is not in selector.metrics
if selector.metrics is not None and name not in selector.metrics:
continue
data[name] = []

attr_names = self.fetch_all_attribute_names(test_conf)
for name in attr_names:
attributes[name] = []

# If the user specified a branch, only include results from that branch.
# Otherwise if the test config specifies a branch, only include results from that branch.
# Else include all results.
branch = None
if selector.branch:
branch = selector.branch
elif test_conf.base_branch:
branch = test_conf.base_branch

objs = self.inputfile(test_conf)
list_of_json_obj = []
for o in objs:
if branch and o["attributes"]["branch"] != branch:
continue
list_of_json_obj.append(o)

for result in list_of_json_obj:
time.append(result["timestamp"])
for metric in result["metrics"]:
# Skip metrics not in selector.metrics if selector.metrics is enabled
if metric["name"] not in data:
continue

data[metric["name"]].append(metric["value"])
metrics[metric["name"]] = Metric(1, 1.0)
for a in attr_names:
attributes[a] = [o["attributes"][a] for o in list_of_json_obj]

# Leave last n points:
time = time[-selector.last_n_points :]
tmp = data
data = {}
for k, v in tmp.items():
data[k] = v[-selector.last_n_points :]
tmp = attributes
attributes = {}
for k, v in tmp.items():
attributes[k] = v[-selector.last_n_points :]

return Series(
test_conf.name,
branch=None,
time=time,
metrics=metrics,
data=data,
attributes=attributes,
)

def fetch_all_metric_names(self, test_conf: JsonTestConfig) -> List[str]:
metric_names = set()
list_of_json_obj = self.inputfile(test_conf)
for result in list_of_json_obj:
for metric in result["metrics"]:
metric_names.add(metric["name"])
return [m for m in metric_names]

def fetch_all_attribute_names(self, test_conf: JsonTestConfig) -> List[str]:
attr_names = set()
list_of_json_obj = self.inputfile(test_conf)
for result in list_of_json_obj:
for a in result["attributes"].keys():
attr_names.add(a)
return [m for m in attr_names]


class Importers:
__config: Config
__csv_importer: Optional[CsvImporter]
__graphite_importer: Optional[GraphiteImporter]
__histostat_importer: Optional[HistoStatImporter]
__postgres_importer: Optional[PostgresImporter]
__json_importer: Optional[JsonImporter]

def __init__(self, config: Config):
self.__config = config
self.__csv_importer = None
self.__graphite_importer = None
self.__histostat_importer = None
self.__postgres_importer = None
self.__json_importer = None

def csv_importer(self) -> CsvImporter:
if self.__csv_importer is None:
Expand All @@ -571,6 +687,11 @@ def postgres_importer(self) -> PostgresImporter:
self.__postgres_importer = PostgresImporter(Postgres(self.__config.postgres))
return self.__postgres_importer

def json_importer(self) -> JsonImporter:
if self.__json_importer is None:
self.__json_importer = JsonImporter()
return self.__json_importer

def get(self, test: TestConfig) -> Importer:
if isinstance(test, CsvTestConfig):
return self.csv_importer()
Expand All @@ -580,5 +701,7 @@ def get(self, test: TestConfig) -> Importer:
return self.histostat_importer()
elif isinstance(test, PostgresTestConfig):
return self.postgres_importer()
elif isinstance(test, JsonTestConfig):
return self.json_importer()
else:
raise ValueError(f"Unsupported test type {type(test)}")
35 changes: 31 additions & 4 deletions hunter/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
Loads properties of a test from a dictionary read from hunter's config file
This dictionary must have the `type` property to determine the type of the test.
Other properties depend on the type.
Currently supported test types are `fallout`, `graphite`, `csv`, and `psql`.
Currently supported test types are `fallout`, `graphite`, `csv`, `json`, and `psql`.
"""
test_type = config.get("type")
if test_type == "csv":
Expand All @@ -173,6 +173,8 @@ def create_test_config(name: str, config: Dict) -> TestConfig:
return create_histostat_test_config(name, config)
elif test_type == "postgres":
return create_postgres_test_config(name, config)
elif test_type == "json":
return create_json_test_config(name, config)
elif test_type is None:
raise TestConfigError(f"Test type not set for test {name}")
else:
Expand All @@ -192,7 +194,7 @@ def create_csv_test_config(test_name: str, test_info: Dict) -> CsvTestConfig:
for name in metrics_info:
metrics.append(CsvMetric(name, 1, 1.0, name))
elif isinstance(metrics_info, Dict):
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
CsvMetric(
name=metric_name,
Expand Down Expand Up @@ -231,7 +233,7 @@ def create_graphite_test_config(name: str, test_info: Dict) -> GraphiteTestConfi

metrics = []
try:
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
GraphiteMetric(
name=metric_name,
Expand Down Expand Up @@ -279,7 +281,7 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
for name in metrics_info:
metrics.append(CsvMetric(name, 1, 1.0))
elif isinstance(metrics_info, Dict):
for (metric_name, metric_conf) in metrics_info.items():
for metric_name, metric_conf in metrics_info.items():
metrics.append(
PostgresMetric(
name=metric_name,
Expand All @@ -294,3 +296,28 @@ def create_postgres_test_config(test_name: str, test_info: Dict) -> PostgresTest
return PostgresTestConfig(test_name, query, update_stmt, time_column, metrics, attributes)
except KeyError as e:
raise TestConfigError(f"Configuration key not found in test {test_name}: {e.args[0]}")


@dataclass
class JsonTestConfig(TestConfig):
name: str
file: str
base_branch: str

# TODO: This should return the list defined in the config file hunter.yaml
def fully_qualified_metric_names(self):
from hunter.importer import JsonImporter

metric_names = JsonImporter().fetch_all_metric_names(self)
return [f"{self.name}.{m}" for m in metric_names]


def create_json_test_config(name: str, test_info: Dict) -> JsonTestConfig:
try:
file = test_info["file"]
except KeyError as e:
raise TestConfigError(f"Configuration key not found in test {name}: {e.args[0]}")
if not os.path.exists(file):
raise TestConfigError(f"Configuration file not found: {file}")
base_branch = test_info.get("base_branch", None)
return JsonTestConfig(name, file, base_branch)

0 comments on commit e1fee80

Please sign in to comment.