From 02806e617b6982b1c9fd171b820b8bd631557b42 Mon Sep 17 00:00:00 2001 From: Eduardo Blancas Date: Wed, 18 Sep 2024 11:12:38 -0600 Subject: [PATCH] removes telemetry --- CHANGELOG.md | 2 + doc/community/developer-guide.md | 2 +- doc/community/support.md | 4 - src/sql/connection/connection.py | 16 +- src/sql/ggplot/aes.py | 4 - src/sql/ggplot/facet_wrap.py | 2 - src/sql/ggplot/geom/geom_boxplot.py | 2 - src/sql/ggplot/geom/geom_histogram.py | 2 - src/sql/ggplot/ggplot.py | 2 - src/sql/inspect.py | 4 - src/sql/magic.py | 10 +- src/sql/plot.py | 18 +- src/sql/run/resultset.py | 10 +- src/sql/telemetry.py | 13 -- src/sql/widgets/table_widget/table_widget.py | 2 - .../integration/test_generic_db_operations.py | 45 +---- src/tests/test_inspect.py | 11 -- src/tests/test_telemetry.py | 168 ------------------ 18 files changed, 12 insertions(+), 305 deletions(-) delete mode 100644 src/sql/telemetry.py delete mode 100644 src/tests/test_telemetry.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 66194bc9a..9dd2088ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## 0.10.14dev +* [Feature] Removes telemetry + ## 0.10.13 (2024-09-12) * [Feature] `ploomber-extension` is no longer a dependency diff --git a/doc/community/developer-guide.md b/doc/community/developer-guide.md index 862a952de..c4b05710d 100644 --- a/doc/community/developer-guide.md +++ b/doc/community/developer-guide.md @@ -237,7 +237,7 @@ print("all: ", results.fetchall()) Functions that expect a `conn` (sometimes named `con`) input variable should assume the input argument is a connection objects (either `SQLAlchemyConnection` or `DBAPIConnection`): ```python -def histogram(payload, table, column, bins, with_=None, conn=None): +def histogram(table, column, bins, with_=None, conn=None): pass ``` diff --git a/doc/community/support.md b/doc/community/support.md index f4b997aed..8c28c4f08 100644 --- a/doc/community/support.md +++ b/doc/community/support.md @@ -1,7 +1,3 @@ # Support For support, feature requests, and product updates: [join our community](https://ploomber.io/community) or follow us on [Twitter](https://twitter.com/ploomber)/[LinkedIn](https://www.linkedin.com/company/ploomber/). - -# Telemetry - -We collect (optional) anonymous statistics to understand and improve usage. For more details of what we collect and how to opt-out the telemetry collection, [see here](https://docs.ploomber.io/en/latest/community/user-stats.html). \ No newline at end of file diff --git a/src/sql/connection/connection.py b/src/sql/connection/connection.py index 7cd74feb1..c684427a4 100644 --- a/src/sql/connection/connection.py +++ b/src/sql/connection/connection.py @@ -26,7 +26,6 @@ from sql.store import store -from sql.telemetry import telemetry from sql import exceptions, display from sql.error_handler import handle_exception from sql.parse import ( @@ -990,13 +989,7 @@ class DBAPIConnection(AbstractConnection): is_dbapi_connection = True - @telemetry.log_call("DBAPIConnection", payload=True) - def __init__(self, payload, connection, alias=None, config=None): - try: - payload["engine"] = type(connection) - except Exception as e: - payload["engine_parsing_error"] = str(e) - + def __init__(self, connection, alias=None, config=None): # detect if the engine is a native duckdb connection _is_duckdb_native = _check_if_duckdb_dbapi_connection(connection) @@ -1084,12 +1077,7 @@ def to_table(self, table_name, data_frame, if_exists, index, schema=None): class SparkConnectConnection(AbstractConnection): is_dbapi_connection = False - @telemetry.log_call("SparkConnectConnection", payload=True) - def __init__(self, payload, connection, alias=None, config=None): - try: - payload["engine"] = type(connection) - except Exception as e: - payload["engine_parsing_error"] = str(e) + def __init__(self, connection, alias=None, config=None): self._driver = None # TODO: implement the dialect blacklist and add unit tests diff --git a/src/sql/ggplot/aes.py b/src/sql/ggplot/aes.py index 4f6768926..43a9b3d10 100644 --- a/src/sql/ggplot/aes.py +++ b/src/sql/ggplot/aes.py @@ -1,6 +1,3 @@ -from sql.telemetry import telemetry - - class aes: """ Aesthetic mappings @@ -17,7 +14,6 @@ class aes: The edge color of a shape """ - @telemetry.log_call("aes-init") def __init__(self, x=None, fill=None, color=None): self.x = x self.fill = fill diff --git a/src/sql/ggplot/facet_wrap.py b/src/sql/ggplot/facet_wrap.py index d044641cc..e6c0d422f 100644 --- a/src/sql/ggplot/facet_wrap.py +++ b/src/sql/ggplot/facet_wrap.py @@ -1,7 +1,6 @@ from jinja2 import Template import math import sql.connection -from sql.telemetry import telemetry from sql.util import enclose_table_with_double_quotations @@ -42,7 +41,6 @@ class facet_wrap(facet): Column to groupby and plot on different panels. """ - @telemetry.log_call("facet-wrap-init") def __init__(self, facet: str, legend=True): self.facet = facet self.legend = legend diff --git a/src/sql/ggplot/geom/geom_boxplot.py b/src/sql/ggplot/geom/geom_boxplot.py index 40ca0f5c5..16a36135a 100644 --- a/src/sql/ggplot/geom/geom_boxplot.py +++ b/src/sql/ggplot/geom/geom_boxplot.py @@ -1,6 +1,5 @@ from sql import plot from sql.ggplot.geom.geom import geom -from sql.telemetry import telemetry class geom_boxplot(geom): @@ -11,7 +10,6 @@ class geom_boxplot(geom): def __init__(self): pass - @telemetry.log_call("ggplot-boxplot") def draw(self, gg, ax=None): plot.boxplot( table=gg.table, diff --git a/src/sql/ggplot/geom/geom_histogram.py b/src/sql/ggplot/geom/geom_histogram.py index 34d0e8293..5cf353f3e 100644 --- a/src/sql/ggplot/geom/geom_histogram.py +++ b/src/sql/ggplot/geom/geom_histogram.py @@ -1,6 +1,5 @@ from sql import plot from sql.ggplot.geom.geom import geom -from sql.telemetry import telemetry class geom_histogram(geom): @@ -36,7 +35,6 @@ def __init__( self.binwidth = binwidth super().__init__(**kwargs) - @telemetry.log_call("ggplot-histogram") def draw(self, gg, ax=None, facet=None): plot.histogram( table=gg.table, diff --git a/src/sql/ggplot/ggplot.py b/src/sql/ggplot/ggplot.py index e4b5a5e2b..1a6fd7b29 100644 --- a/src/sql/ggplot/ggplot.py +++ b/src/sql/ggplot/ggplot.py @@ -4,7 +4,6 @@ import matplotlib as mpl import matplotlib.pyplot as plt from ploomber_core.dependencies import requires -from sql.telemetry import telemetry def _expand_to_multipanel_ax(figure, ax_to_clear=None): @@ -28,7 +27,6 @@ class ggplot: figure: mpl.figure.Figure axs: list - @telemetry.log_call("ggplot-init") def __init__(self, table, mapping: aes = None, conn=None, with_=None) -> None: self.table = table self.with_ = [with_] if with_ else None diff --git a/src/sql/inspect.py b/src/sql/inspect.py index c21051da4..250678930 100644 --- a/src/sql/inspect.py +++ b/src/sql/inspect.py @@ -2,7 +2,6 @@ from prettytable import PrettyTable from ploomber_core.exceptions import modify_exceptions from sql.connection import ConnectionManager -from sql.telemetry import telemetry from sql import exceptions import math from sql import util @@ -477,19 +476,16 @@ def __init__(self, table_name, schema=None) -> None: self._table_txt = self._table.get_string() -@telemetry.log_call() def get_table_names(schema=None): """Get table names for a given connection""" return Tables(schema) -@telemetry.log_call() def get_columns(name, schema=None): """Get column names for a given connection""" return Columns(name, schema) -@telemetry.log_call() def get_table_statistics(name, schema=None): """Get table statistics for a given connection. diff --git a/src/sql/magic.py b/src/sql/magic.py index e073b29da..6d8d32f70 100644 --- a/src/sql/magic.py +++ b/src/sql/magic.py @@ -54,8 +54,6 @@ DataFrame = None Series = None -from sql.telemetry import telemetry - SUPPORT_INTERACTIVE_WIDGETS = ["Checkbox", "Text", "IntSlider", ""] IF_NOT_SELECT_MESSAGE = "The query is not a SELECT type query and as \ @@ -81,7 +79,6 @@ class RenderMagic(Magics): action="append", dest="with_", ) - @telemetry.log_call("sqlrender") def sqlrender(self, line): args = parse_argstring(self.sqlrender, line) warnings.warn( @@ -187,7 +184,6 @@ class SqlMagic(Magics, Configurable): ), ) - @telemetry.log_call("init") def __init__(self, shell): self._store = store @@ -370,14 +366,15 @@ def execute(self, line="", cell="", local_ns=None): line=line, cell=cell, local_ns=local_ns, is_interactive_mode=False ) - @telemetry.log_call("execute", payload=True) @modify_exceptions - def _execute(self, payload, line, cell, local_ns, is_interactive_mode=False): + def _execute(self, line, cell, local_ns, is_interactive_mode=False): """ This function implements the cell logic; we create this private method so we can control how the function is called. Otherwise, decorating ``SqlMagic.execute`` will break when adding the ``@log_call`` decorator with ``payload=True`` + + NOTE: telemetry has been removed, we can remove this function """ def interactive_execute_wrapper(**kwargs): @@ -518,7 +515,6 @@ def interactive_execute_wrapper(**kwargs): alias=args.section if args.section else args.alias, config=self, ) - payload["connection_info"] = conn._get_database_information() if args.persist_replace and args.append: raise exceptions.UsageError( diff --git a/src/sql/plot.py b/src/sql/plot.py index ec4e22b7e..2c2e106c1 100644 --- a/src/sql/plot.py +++ b/src/sql/plot.py @@ -29,7 +29,6 @@ np = None import sql.connection -from sql.telemetry import telemetry import warnings @@ -178,10 +177,7 @@ def _compute_conf_interval(N, med, iqr): # https://github.com/matplotlib/matplotlib/blob/ddc260ce5a53958839c244c0ef0565160aeec174/lib/matplotlib/axes/_axes.py#L3915 @requires(["matplotlib"]) -@telemetry.log_call("boxplot", payload=True) -def boxplot( - payload, table, column, *, orient="v", with_=None, conn=None, ax=None, schema=None -): +def boxplot(table, column, *, orient="v", with_=None, conn=None, ax=None, schema=None): """Plot boxplot Parameters @@ -231,8 +227,6 @@ def boxplot( if not conn: conn = sql.connection.ConnectionManager.current - payload["connection_info"] = conn._get_database_information() - _table = enclose_table_with_double_quotations(table, conn) if schema: _table = f'"{schema}"."{_table}"' @@ -316,9 +310,7 @@ def _get_bar_width(ax, bins, bin_size, binwidth): @requires(["matplotlib"]) -@telemetry.log_call("histogram", payload=True) def histogram( - payload, table, column, bins, @@ -409,7 +401,7 @@ def histogram( _table = f'"{schema}"."{_table}"' ax = ax or plt.gca() - payload["connection_info"] = conn._get_database_information() + if category: if isinstance(column, list): if len(column) > 1: @@ -874,9 +866,7 @@ def _bar(table, column, with_=None, conn=None): @requires(["matplotlib"]) -@telemetry.log_call("bar", payload=True) def bar( - payload, table, column, show_num=False, @@ -928,7 +918,6 @@ def bar( _table = f'"{schema}"."{_table}"' ax = ax or plt.gca() - payload["connection_info"] = conn._get_database_information() if column is None: raise exceptions.UsageError("Column name has not been specified") @@ -1059,9 +1048,7 @@ def _pie(table, column, with_=None, conn=None): @requires(["matplotlib"]) -@telemetry.log_call("bar", payload=True) def pie( - payload, table, column, show_num=False, @@ -1107,7 +1094,6 @@ def pie( _table = f'"{schema}"."{_table}"' ax = ax or plt.gca() - payload["connection_info"] = conn._get_database_information() if column is None: raise exceptions.UsageError("Column name has not been specified") diff --git a/src/sql/run/resultset.py b/src/sql/run/resultset.py index 4b977a8e2..50bb01c24 100644 --- a/src/sql/run/resultset.py +++ b/src/sql/run/resultset.py @@ -10,7 +10,6 @@ from sql.column_guesser import ColumnGuesserMixin from sql.run.csv import CSVWriter, CSVResultDescriptor -from sql.telemetry import telemetry from sql.run.table import CustomPrettyTable from sql._current import _config_feedback_all @@ -246,15 +245,12 @@ def dicts(self): for row in self: yield dict(zip(self.keys, row)) - @telemetry.log_call("data-frame", payload=True) - def DataFrame(self, payload): + def DataFrame(self): """Returns a Pandas DataFrame instance built from the result set.""" - payload["connection_info"] = self._conn._get_database_information() import pandas as pd return _convert_to_data_frame(self, "df", pd.DataFrame) - @telemetry.log_call("polars-data-frame") def PolarsDataFrame(self, **polars_dataframe_kwargs): """Returns a Polars DataFrame instance built from the result set.""" import polars as pl @@ -262,7 +258,6 @@ def PolarsDataFrame(self, **polars_dataframe_kwargs): polars_dataframe_kwargs["schema"] = self.keys return _convert_to_data_frame(self, "pl", pl.DataFrame, polars_dataframe_kwargs) - @telemetry.log_call("pie") def pie(self, key_word_sep=" ", title=None, **kwargs): """Generates a pylab pie chart from the result set. @@ -302,7 +297,6 @@ def pie(self, key_word_sep=" ", title=None, **kwargs): ax.set_title(title or self.ys[0].name) return ax - @telemetry.log_call("plot") def plot(self, title=None, **kwargs): """Generates a pylab plot from the result set. @@ -349,7 +343,6 @@ def plot(self, title=None, **kwargs): return ax - @telemetry.log_call("bar") def bar(self, key_word_sep=" ", title=None, **kwargs): """Generates a pylab bar plot from the result set. @@ -393,7 +386,6 @@ def bar(self, key_word_sep=" ", title=None, **kwargs): ax.set_ylabel(self.ys[0].name) return ax - @telemetry.log_call("generate-csv") def csv(self, filename=None, **format_params): """Generate results in comma-separated form. Write to ``filename`` if given. Any other parameters will be passed on to csv.writer.""" diff --git a/src/sql/telemetry.py b/src/sql/telemetry.py deleted file mode 100644 index 678148596..000000000 --- a/src/sql/telemetry.py +++ /dev/null @@ -1,13 +0,0 @@ -from ploomber_core.telemetry.telemetry import Telemetry - -try: - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - - -telemetry = Telemetry( - api_key="phc_P9SpSeypyPwxrMdFn2edOOEooQioF2axppyEeDwtMSP", - package_name="jupysql", - version=version("jupysql"), -) diff --git a/src/sql/widgets/table_widget/table_widget.py b/src/sql/widgets/table_widget/table_widget.py index 17da7ecd9..cdd157161 100644 --- a/src/sql/widgets/table_widget/table_widget.py +++ b/src/sql/widgets/table_widget/table_widget.py @@ -5,7 +5,6 @@ from sql.util import parse_sql_results_to_json from sql.inspect import fetch_sql_with_pagination, is_table_exists from sql.widgets import utils -from sql.telemetry import telemetry import os from ploomber_core.dependencies import check_installed @@ -15,7 +14,6 @@ class TableWidget: - @telemetry.log_call("TableWidget-init") def __init__(self, table, schema=None): """ Creates an HTML table element and populates it with SQL table diff --git a/src/tests/integration/test_generic_db_operations.py b/src/tests/integration/test_generic_db_operations.py index 415206296..9d7b51cba 100644 --- a/src/tests/integration/test_generic_db_operations.py +++ b/src/tests/integration/test_generic_db_operations.py @@ -3,9 +3,8 @@ from matplotlib import pyplot as plt import pytest import warnings -from sql.telemetry import telemetry from sql.error_handler import CTE_MSG -from unittest.mock import ANY, Mock +from unittest.mock import ANY from IPython.core.error import UsageError import math @@ -38,13 +37,6 @@ def run_around_tests(tmpdir_factory): shutil.rmtree(str(my_tmpdir)) -@pytest.fixture -def mock_log_api(monkeypatch): - mock_log_api = Mock() - monkeypatch.setattr(telemetry, "log_api", mock_log_api) - yield mock_log_api - - @pytest.mark.parametrize( "ip_with_dynamic_db, query_prefix, query_suffix", [ @@ -269,41 +261,6 @@ def test_close_and_connect( assert get_connection_count(ip_with_dynamic_db) == 1 -@pytest.mark.parametrize( - "ip_with_dynamic_db, expected_dialect, expected_driver", - [ - ("ip_with_postgreSQL", "postgresql", "psycopg2"), - ("ip_with_mySQL", "mysql", "pymysql"), - ("ip_with_mariaDB", "mysql", "pymysql"), - ("ip_with_SQLite", "sqlite", "pysqlite"), - ("ip_with_duckDB", "duckdb", "duckdb_engine"), - ("ip_with_duckDB_native", "duckdb", "DuckDBPyConnection"), - ("ip_with_MSSQL", "mssql", "pyodbc"), - ("ip_with_Snowflake", "snowflake", "snowflake"), - ("ip_with_oracle", "oracle", "oracledb"), - ("ip_with_clickhouse", "clickhouse", "native"), - ("ip_with_spark", "spark2", "SparkSession"), - ], -) -def test_telemetry_execute_command_has_connection_info( - ip_with_dynamic_db, expected_dialect, expected_driver, mock_log_api, request -): - ip_with_dynamic_db = request.getfixturevalue(ip_with_dynamic_db) - - mock_log_api.assert_called_with( - action="jupysql-execute-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": { - "dialect": expected_dialect, - "driver": expected_driver, - "server_version_info": ANY, - }, - }, - ) - - @pytest.mark.parametrize( "cell", [ diff --git a/src/tests/test_inspect.py b/src/tests/test_inspect.py index 4e13c3557..696341606 100644 --- a/src/tests/test_inspect.py +++ b/src/tests/test_inspect.py @@ -161,17 +161,6 @@ def test_nonexistent_table(sample_db, name, schema, error): assert error.lower() in str(excinfo.value).lower() -@pytest.mark.parametrize( - "function", - [ - inspect.get_table_names, - inspect.get_columns, - ], -) -def test_telemetry(function): - assert "@telemetry.log_call" in getsource(function) - - def test_get_schema_names(ip): ip.run_cell( """%%sql sqlite:///my.db diff --git a/src/tests/test_telemetry.py b/src/tests/test_telemetry.py deleted file mode 100644 index 851098ff3..000000000 --- a/src/tests/test_telemetry.py +++ /dev/null @@ -1,168 +0,0 @@ -from pathlib import Path -from unittest.mock import ANY, Mock -import pytest -import urllib.request -from sql.telemetry import telemetry -from sql import plot -from sql.connection import SQLAlchemyConnection -from sqlalchemy import create_engine - -# Ref: https://pytest.org/en/7.2.x/how-to/tmp_path.html# -# Utilize tmp directory to store downloaded csv - - -@pytest.fixture -def simple_file_path_iris(tmpdir): - file_path_str = str(tmpdir.join("iris.csv")) - - if not Path(file_path_str).is_file(): - urllib.request.urlretrieve( - "https://raw.githubusercontent.com/plotly/datasets/master/iris-data.csv", - file_path_str, - ) - - yield file_path_str - - -@pytest.fixture -def simple_file_path_penguins(tmpdir): - file_path_str = str(tmpdir.join("penguins.csv")) - - if not Path(file_path_str).is_file(): - urllib.request.urlretrieve( - "https://raw.githubusercontent.com" - "/mwaskom/seaborn-data/master/penguins.csv", - file_path_str, - ) - - yield file_path_str - - -@pytest.fixture -def simple_db_conn(): - engine = create_engine("duckdb://") - return SQLAlchemyConnection(engine=engine) - - -@pytest.fixture -def mock_log_api(monkeypatch): - mock_log_api = Mock() - monkeypatch.setattr(telemetry, "log_api", mock_log_api) - yield mock_log_api - - -excepted_duckdb_connection_info = { - "dialect": "duckdb", - "driver": "duckdb_engine", - "server_version_info": ANY, -} - -excepted_sqlite_connection_info = { - "dialect": "sqlite", - "driver": "pysqlite", - "server_version_info": ANY, -} - - -def test_boxplot_telemetry_execution( - mock_log_api, simple_db_conn, simple_file_path_iris, ip -): - ip.run_cell("%sql duckdb://") - plot.boxplot(simple_file_path_iris, "petal width", conn=simple_db_conn, orient="h") - mock_log_api.assert_called_with( - action="jupysql-boxplot-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_duckdb_connection_info, - }, - ) - - -def test_histogram_telemetry_execution( - mock_log_api, simple_db_conn, simple_file_path_iris, ip -): - ip.run_cell("%sql duckdb://") - plot.histogram(simple_file_path_iris, "petal width", bins=50, conn=simple_db_conn) - - mock_log_api.assert_called_with( - action="jupysql-histogram-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_duckdb_connection_info, - }, - ) - - -def test_data_frame_telemetry_execution(mock_log_api, ip, simple_file_path_iris): - # Simulate the cell query & get the DataFrame - ip.run_cell("%sql duckdb://") - ip.run_cell( - "result = %sql SELECT * FROM read_csv_auto('" + simple_file_path_iris + "')" - ) - ip.run_cell("result.DataFrame()") - mock_log_api.assert_called_with( - action="jupysql-data-frame-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_duckdb_connection_info, - }, - ) - - -def test_sqlcmd_snippets_query_telemetry_execution( - mock_log_api, ip, simple_file_path_iris -): - # Simulate the sqlcmd snippets query - ip.run_cell("%sql duckdb://") - ip.run_cell( - "%sql --save class_setosa --no-execute " - "SELECT * FROM read_csv_auto('" - + simple_file_path_iris - + "')" - + " WHERE class='Iris-setosa'" - ) - ip.run_cell("%sqlcmd snippets class_setosa") - - mock_log_api.assert_called_with( - action="jupysql-execute-success", total_runtime=ANY, metadata=ANY - ) - - -def test_execute_telemetry_execution(mock_log_api, ip): - ip.run_cell("%sql duckdb://") - - mock_log_api.assert_called_with( - action="jupysql-execute-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_duckdb_connection_info, - }, - ) - - -def test_switch_connection_with_correct_telemetry_connection_info(mock_log_api, ip): - ip.run_cell("%sql duckdb://") - - mock_log_api.assert_called_with( - action="jupysql-execute-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_duckdb_connection_info, - }, - ) - - ip.run_cell("%sql sqlite://") - - mock_log_api.assert_called_with( - action="jupysql-execute-success", - total_runtime=ANY, - metadata={ - "argv": ANY, - "connection_info": excepted_sqlite_connection_info, - }, - )