-
Notifications
You must be signed in to change notification settings - Fork 51
/
dns_demo_queries.sql
115 lines (98 loc) · 4.38 KB
/
dns_demo_queries.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
/*
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
SPDX-License-Identifier: Apache-2.0
*/
-- PREVIEW TABLE
-- preview first 10 rows with all fields, quick way to verify everything is setup correctly
SELECT * from r53dns
LIMIT 10;
-- PARTITION TESTS
/* NOTE: if there are no constraints a partition (account, region, or date) then by default ALL data will be scanned
this could lead to costly query, always consider using at least one partition constraint.
Note that this is the case even if you have other constraints in a query (e.g. sourceaddress = '192.0.2.1'),
only constraints using partition fields (date_partition, region_partition, account_partition)
will limit the amount of data scanned.
*/
-- preview first 10 rows with all fields, limited to a single account
SELECT * from r53dns
WHERE account_partition = '111122223333'
LIMIT 10;
-- preview first 10 rows with all fields, limited to multiple accounts
SELECT * from r53dns
WHERE account_partition in ('111122223333','444455556666','123456789012')
LIMIT 10;
-- preview first 10 rows with all fields, limited to a single vpc
SELECT * from r53dns
WHERE vpc_partition = 'vpc-00000001'
LIMIT 10;
-- preview first 10 rows with all fields, limited to multiple vpcs
SELECT * from r53dns
WHERE vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
LIMIT 10;
-- NOTE: date_partition format is 'YYYY/MM/DD' as a string
-- preview first 10 rows with all fields, limited to a certain date range
SELECT * from r53dns
WHERE date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
LIMIT 10;
-- preview first 10 rows with all fields, limited to the past 30 days (relative)
SELECT * from r53dns
WHERE date_partition >= date_format(date_add('day',-30,current_timestamp), '%Y/%m/%d')
LIMIT 10;
-- preview first 10 rows with all fields, limited by a combination partition constraints
-- NOTE: narrowing the scope of the query as much as possible will improve performance and minimize cost
SELECT * from r53dns
WHERE date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
LIMIT 10;
-- ANALYSIS EXAMPLES
-- Sort queries by requestor instance count and query count
SELECT query_name, query_type, array_distinct(filter(array_agg(srcids), q -> q.instance IS NOT NULL)) as instances,
cardinality(array_distinct(filter(array_agg(srcids), q -> q.instance IS NOT NULL))) as query_count
FROM r53dns
WHERE date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
GROUP BY query_name, query_type
ORDER by query_count DESC;
-- Summary with count of each time a name name queried
SELECT query_name, query_type, count(*) as query_count FROM r53dns
WHERE date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
GROUP BY query_name, query_type
ORDER BY query_count DESC;
-- Summary with count of each time a AAAA record name name queried
SELECT query_name, query_type, count(*) as query_count FROM r53dns
WHERE query_type <> 'AAAA'
AND date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
GROUP BY query_name, query_type
ORDER BY query_count DESC;
-- Summary with count of each time a AAAA record name name queried
-- split out TLD and SLD (note: doesn't properly handle TLDs containing a '.' (e.g. .com.br)
SELECT element_at(split(query_name,'.'),-2) AS tld,
element_at(split(query_name,'.'),-3) AS sld,
query_name, query_type,
count(*) AS query_count
FROM r53dns
WHERE query_type <> 'AAAA'
AND date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003')
GROUP BY query_name, query_type
ORDER BY query_count DESC;
-- Get records that that resolve to a specific IP
SELECT * FROM r53dns
WHERE contains(transform(answers, x-> x.rdata), '203.0.113.2')
AND date_partition >= '2020/07/01'
AND date_partition <= '2020/07/31'
AND account_partition = '111122223333'
AND vpc_partition in ('vpc-00000001','vpc-00000002','vpc-00000003');