Skip to content

Commit

Permalink
feat: move to Sentry tracing from Elastic APM (#231)
Browse files Browse the repository at this point in the history
It's the preferred tracing/performance system for DBT Platform, which
this is being migrated to.
  • Loading branch information
michalc authored Jun 14, 2024
2 parents 2ac87de + b30e292 commit f4c5289
Show file tree
Hide file tree
Showing 10 changed files with 14 additions and 83 deletions.
3 changes: 0 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,6 @@ DOCS_GITHUB_REPO_URL=https://github.com/uktrade/public-data-api \
| READONLY_AWS_SECRET_ACCESS_KEY | The secret part of the readonly AWS access key |
| READ_AND_WRITE_AWS_ACCESS_KEY_ID | The AWS access key ID that has write permissions on the S3 bucket (for the csv-generating worker) |
| READ_AND_WRITE_AWS_SECRET_ACCESS_KEY | The secret part of the read+write AWS access key |
| APM_SECRET_TOKEN | A secret token to authorize requests to the APM Server. |
| APM_SERVER_URL | The URL of the APM server<hr>`https://apm.elk.uktrade.digital`|
| APM_SERVER_TIMEOUT | The maximum amount of time to wait for connections to the APM server<hr>`20s`|
| ENVIRONMENT | The current environment where the application is running<hr>`develop` |
| GA_ENDPOINT | The endpoint to send analytics info to |
| GA_TRACKING_ID | The unique identifier for the google analytics property |
Expand Down
17 changes: 6 additions & 11 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import requests

from elasticapm.contrib.flask import ElasticAPM
from flask import (
Flask,
Response,
Expand Down Expand Up @@ -122,7 +121,6 @@ def start():

def stop():
server.stop()
apm.client.close()

def track_analytics(handler):
"""Decorator to send analytics data to google in the background."""
Expand Down Expand Up @@ -965,15 +963,6 @@ def _add_headers(resp):
resp.headers['x-robots-tag'] = 'no-index, no-follow'
return resp

apm = ElasticAPM(
app,
service_name='public-data-api',
secret_token=os.environ['APM_SECRET_TOKEN'],
server_url=os.environ['APM_SERVER_URL'],
environment=os.environ['ENVIRONMENT'],
server_timeout=os.environ.get('APM_SERVER_TIMEOUT', None),
)

app.add_url_rule('/v1/datasets', view_func=list_all_datasets)
app.add_url_rule(
'/v1/datasets/<string:dataset_id>/metadata',
Expand Down Expand Up @@ -1058,10 +1047,16 @@ def main():
# Session tracking makes graceful shutdown difficult since it starts a thread but there
# is no quick way to kill it
auto_session_tracking=False,
enable_tracing=True,
)

gevent.signal_handler(signal.SIGTERM, stop)

start()

sentry_client = sentry_sdk.Hub.current.client
if sentry_client is not None:
sentry_client.close(timeout=2.0)
gevent.get_hub().join()
logger.info('Shut down gracefully')

Expand Down
4 changes: 4 additions & 0 deletions app_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,7 @@ def main():
if os.environ.get('SENTRY_DSN'):
sentry_sdk.init( # pylint: disable=abstract-class-instantiated
dsn=os.environ['SENTRY_DSN'],
enable_tracing=True,
)

parsed_endpoint = urllib.parse.urlsplit(os.environ['AWS_S3_ENDPOINT'])
Expand Down Expand Up @@ -462,6 +463,9 @@ def stop(_, __):
heartbeat_thread.join()
logger.info('Shut down heartbeat')

sentry_client = sentry_sdk.Hub.current.client
if sentry_client is not None:
sentry_client.close(timeout=2.0)
logger.info('Shut down gracefully')


Expand Down
1 change: 0 additions & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ Flask
gevent
urllib3
sentry-sdk[flask]
elastic-apm[flask]
tidy-json-to-csv
requests
sqlite-s3-query
Expand Down
9 changes: 0 additions & 9 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@ anyio==4.3.0
# via httpx
blinker==1.8.1
# via
# elastic-apm
# flask
# sentry-sdk
certifi==2024.2.2
# via
# elastic-apm
# httpcore
# httpx
# requests
Expand All @@ -22,10 +20,6 @@ charset-normalizer==3.3.2
# via requests
click==8.1.7
# via flask
ecs-logging==2.1.0
# via elastic-apm
elastic-apm[flask]==6.22.0
# via -r requirements.in
flask==3.0.3
# via
# -r requirements.in
Expand Down Expand Up @@ -77,13 +71,10 @@ tidy-json-to-csv==0.0.13
urllib3==2.2.1
# via
# -r requirements.in
# elastic-apm
# requests
# sentry-sdk
werkzeug==3.0.3
# via flask
wrapt==1.14.1
# via elastic-apm
zope-event==5.0
# via gevent
zope-interface==6.3
Expand Down
12 changes: 3 additions & 9 deletions requirements_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,9 @@ dill==0.3.8
distlib==0.3.8
# via virtualenv
ecs-logging==2.1.0
# via
# -r requirements.txt
# elastic-apm
# via elastic-apm
elastic-apm[flask]==6.22.0
# via
# -r requirements.txt
# -r requirements_test.in
# via -r requirements_test.in
filelock==3.14.0
# via virtualenv
flask==3.0.3
Expand Down Expand Up @@ -191,9 +187,7 @@ werkzeug==3.0.3
wheel==0.43.0
# via pip-tools
wrapt==1.14.1
# via
# -r requirements.txt
# elastic-apm
# via elastic-apm
zope-event==5.0
# via
# -r requirements.txt
Expand Down
2 changes: 0 additions & 2 deletions sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ AWS_S3_REGION=us-east-1
PORT=7000
AWS_S3_ENDPOINT=http://127.0.0.1:9000/my-bucket/
ENVIRONMENT=test
APM_SECRET_TOKEN=secret_token
APM_SERVER_URL=http://127.0.0.1:8201
DOCS_DEPARTMENT_NAME="<department name>"
DOCS_SERVICE_NAME="Data API"
DOCS_GITHUB_REPO_URL=https://github.com/uktrade/public-data-api
Expand Down
10 changes: 0 additions & 10 deletions start-services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,3 @@ docker run --rm -p 9000:9000 --name s3proxy-minio -d \
--entrypoint sh \
minio/minio:RELEASE.2021-11-24T23-19-33Z.hotfix.1d85a4563 \
-c 'mkdir -p /data1 && mkdir -p /data2 && mkdir -p /data3 && mkdir -p /data4 && minio server /data{1...4}'

docker run --network public-data-api-network --rm -p 9201:9200 -p 9301:9300 --name elasticsearch -d \
-e "discovery.type=single-node" \
docker.elastic.co/elasticsearch/elasticsearch:7.8.0

docker run --network public-data-api-network --rm -p 8201:8200 --name=apm-server -d \
--user=apm-server \
docker.elastic.co/apm/apm-server:7.8.0 \
--strict.perms=false -e \
-E output.elasticsearch.hosts=["elasticsearch:9200"]
2 changes: 0 additions & 2 deletions stop-services.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,4 @@
set -e

docker stop s3proxy-minio
docker stop elasticsearch
docker stop apm-server
docker network rm public-data-api-network
37 changes: 1 addition & 36 deletions test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,6 @@ def application(port=8080, max_attempts=500, aws_access_key_id='AKIAIOSFODNN7EXA
'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
),
'AWS_S3_ENDPOINT': 'http://127.0.0.1:9000/my-bucket/',
'APM_SECRET_TOKEN': 'secret_token',
'APM_SERVER_URL': 'http://localhost:8201',
'ENVIRONMENT': 'test',
'SENTRY_DSN': 'http://foo@localhost:9001/1',
'GA_ENDPOINT': 'http://localhost:9002/collect',
Expand All @@ -100,7 +98,7 @@ def stop():
for _, process in processes.items():
process.terminate()
for _, process in processes.items():
process.wait(timeout=10)
process.wait(timeout=20)
output_errors = {
name: (read_and_close(stdout), read_and_close(stderr))
for name, (stdout, stderr) in process_outs.items()
Expand Down Expand Up @@ -1826,39 +1824,6 @@ def test_check_heartbeat():
assert result.returncode == 1


def test_elastic_apm(processes):
dataset_id = str(uuid.uuid4())
content = str(uuid.uuid4()).encode() * 100000
version = 'v0.0.1'
put_version_data(dataset_id, version, content, 'json')
url = f'/v1/datasets/{dataset_id}/versions/{version}/data'
query = json.dumps({
'query': {
'match': {
'url.path': url
}
}
})
with requests.Session() as session:
retry = 0
while retry < 20:
session.get(version_data_public_url(dataset_id, version, 'json'))
time.sleep(1)
response = requests.get(
url='http://localhost:9201/apm-7.8.0-transaction/_search',
data=query,
headers={'Accept': 'application/json', 'Content-type': 'application/json'}
)
res = json.loads(response.text)
if retry > 0 and 'hits' in res and res['hits']['total']['value']:
break
time.sleep(3)
retry += 1

assert 'hits' in res, f'Unexpected Elastic Search api response: {str(res)}'
assert res['hits']['total']['value'] >= 1, 'No hits found'


def test_healthcheck_ok(processes):
dataset_id = 'healthcheck'
content_str = {'status': 'OK'}
Expand Down

0 comments on commit f4c5289

Please sign in to comment.