From 93290d7ba0021199ff7102a4a2bda3dc83f8b472 Mon Sep 17 00:00:00 2001 From: Joseph Counts <94138069+jcountsNR@users.noreply.github.com> Date: Mon, 19 Aug 2024 08:29:35 -0700 Subject: [PATCH] feat: Databricks cluster (#1732) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: Databricks cluster * chore: Fix dashboard issue * chore: remove pages * chore: Fix account ids * chore: Resolve thresholds error * chore: Resolve folder name * chore: Fix dashboard --------- Co-authored-by: Otávio Carvalho --- .../ext-databricks_cluster/dashboard.json | 407 ++++++++++++++++++ .../ext-databricks_cluster/definition.yml | 23 + .../ext-databricks_cluster/golden_metrics.yml | 17 + .../summary_metrics.yml | 8 + 4 files changed, 455 insertions(+) create mode 100644 entity-types/ext-databricks_cluster/dashboard.json create mode 100644 entity-types/ext-databricks_cluster/definition.yml create mode 100644 entity-types/ext-databricks_cluster/golden_metrics.yml create mode 100644 entity-types/ext-databricks_cluster/summary_metrics.yml diff --git a/entity-types/ext-databricks_cluster/dashboard.json b/entity-types/ext-databricks_cluster/dashboard.json new file mode 100644 index 000000000..7d8a68a95 --- /dev/null +++ b/entity-types/ext-databricks_cluster/dashboard.json @@ -0,0 +1,407 @@ +{ + "name": "Databricks Spark", + "description": null, + "pages": [ + + { + "name": "Stages", + "description": null, + "widgets": [ + { + "title": "Average Executor Total Task Run Time", + "layout": { + "column": 1, + "row": 1, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.executor.runTime) WHERE spark.app.stage.executor.runTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Average Executor Total Task CPU Time", + "layout": { + "column": 5, + "row": 1, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.executor.cpuTime) / 1000000 WHERE spark.app.stage.executor.cpuTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Average JVM GC Time", + "layout": { + "column": 9, + "row": 1, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.jvmGcTime) WHERE spark.app.stage.jvmGcTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Average Task Deserialization Time", + "layout": { + "column": 1, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.executor.deserializeTime) WHERE spark.app.stage.executor.deserializeTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Average Task Deserialization CPU Time", + "layout": { + "column": 5, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.executor.deserializeCpuTime) / 1000000 WHERE spark.app.stage.executor.deserializeCpuTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Average Result Serialization Time", + "layout": { + "column": 9, + "row": 4, + "width": 4, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT average(spark.app.stage.resultSerializationTime) WHERE spark.app.stage.resultSerializationTime IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "units": { + "unit": "MS" + }, + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "I/O Bytes", + "layout": { + "column": 1, + "row": 7, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.inputBytes) AS 'Bytes In' WHERE spark.app.stage.inputBytes IS NOT NULL TIMESERIES" + }, + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.outputBytes) AS 'Bytes Out' WHERE spark.app.stage.outputBytes IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "I/O Records", + "layout": { + "column": 7, + "row": 7, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.inputRecords) AS 'Records In' WHERE spark.app.stage.inputRecords IS NOT NULL TIMESERIES" + }, + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.outputRecords) AS 'Records Out' WHERE spark.app.stage.outputRecords IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Shuffle I/O Bytes", + "layout": { + "column": 1, + "row": 10, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.shuffle.readBytes) AS 'Bytes Written' WHERE spark.app.stage.shuffle.readBytes IS NOT NULL TIMESERIES" + }, + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.shuffle.writeBytes) AS 'Bytes Out' WHERE spark.app.stage.shuffle.writeBytes IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + }, + { + "title": "Shuffle I/O Records", + "layout": { + "column": 7, + "row": 10, + "width": 6, + "height": 3 + }, + "visualization": { + "id": "viz.line" + }, + "rawConfiguration": { + "facet": { + "showOtherSeries": false + }, + "legend": { + "enabled": true + }, + "nrqlQueries": [ + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.shuffle.readRecords) AS 'Records Read' WHERE spark.app.stage.shuffle.readRecords IS NOT NULL TIMESERIES" + }, + { + "accountId": 0, + "query": "FROM Metric SELECT latest(spark.app.stage.shuffle.writeRecords) AS 'Records Written' WHERE spark.app.stage.shuffle.writeRecords IS NOT NULL TIMESERIES" + } + ], + "platformOptions": { + "ignoreTimeRange": false + }, + "thresholds": [], + "yAxisLeft": { + "zero": true + }, + "yAxisRight": { + "zero": true + } + } + } + ] + } + ] + } \ No newline at end of file diff --git a/entity-types/ext-databricks_cluster/definition.yml b/entity-types/ext-databricks_cluster/definition.yml new file mode 100644 index 000000000..8b5e40fb7 --- /dev/null +++ b/entity-types/ext-databricks_cluster/definition.yml @@ -0,0 +1,23 @@ +domain: EXT +type: DATABRICKS_CLUSTER + +synthesis: + rules: + - identifier: databricksClusterId + name: Databricks Cluster + encodeIdentifierInGUID: true + conditions: + - attribute: clusterProvider + value: databricks + tags: + instrumentation.provider: + entityTagName: Instrumentation provider + instrumentation.name: + entityTagName: Instrumentation name +dashboardTemplates: + newRelic: + template: dashboard.json + +configuration: + entityExpirationTime: DAILY + alertable: true \ No newline at end of file diff --git a/entity-types/ext-databricks_cluster/golden_metrics.yml b/entity-types/ext-databricks_cluster/golden_metrics.yml new file mode 100644 index 000000000..e5f0220be --- /dev/null +++ b/entity-types/ext-databricks_cluster/golden_metrics.yml @@ -0,0 +1,17 @@ +driverMemory: + title: Average Driver Memory Used + query: + select: average(spark.app.executor.memoryUsed) + from: Metric + eventId: entity.guid + +jobs: + title: Spark Jobs + query: + select: latest(spark.app.jobs) + from: Metric + facet: sparkAppJobStatus + eventId: entity.guid + + + diff --git a/entity-types/ext-databricks_cluster/summary_metrics.yml b/entity-types/ext-databricks_cluster/summary_metrics.yml new file mode 100644 index 000000000..c42e56ca6 --- /dev/null +++ b/entity-types/ext-databricks_cluster/summary_metrics.yml @@ -0,0 +1,8 @@ +driverMemory: + goldenMetric: driverMemory + title: Average Driver Memory Used + unit: BYTES +jobs: + goldenMetric: jobs + title: Spark Jobs + unit: COUNT \ No newline at end of file