-
Notifications
You must be signed in to change notification settings - Fork 1
/
agol_connection.py
144 lines (113 loc) · 5.24 KB
/
agol_connection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from arcgis.gis import GIS
from arcgis.features import FeatureLayerCollection, FeatureSet, Table, Feature
import tempfile
import shutil
import json
import csv
import os
class AGOLConnection(object):
def __init__(self, verbose=False):
creds = self._load_credentials()
if creds is None:
raise Exception("no arcgis credentials supplied")
self.creds = creds
self.layers = self._get_layers()
self.gis = self._make_connection()
self.verbose = verbose
def _load_credentials(self):
cred_path = "creds.csv"
if not os.path.isfile(cred_path):
return None
creds = {}
with open(cred_path, newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
if row['service'] == 'arcgis':
creds['username'] = row['username']
creds['password'] = row['password']
creds['host'] = row['host']
return creds
def _get_layers(self):
with open("agol_layers.json", "r") as openf:
configs = json.loads(openf.read())
return configs
def _make_connection(self):
username = self.creds['username']
password = self.creds['password']
host = self.creds['host']
return GIS(host, username, password)
def get_arcgis_feature_collection_from_item_id(self, arcgis_item_id):
# You might ask - why do you not just use the FeatureLayerCollection's URL?
# Because you get a 403 if you try that. Instead, if you grab the parent container
# from the published layer, you can use the FLC manager's overwrite() method successfully.
feature_item = self.gis.content.get(arcgis_item_id)
if "type:Table" in str(feature_item):
fs = feature_item.tables[0].container
else:
fs = feature_item.layers[0].container
return fs
def overwrite_arcgis_layer(self, dataset_name, source_data_dir, source_data_file, dry_run=False):
if self.verbose:
print(f"Begin upload to ArcGIS Online")
if dry_run is True:
print("** DRY RUN -- NO UPLOAD WILL HAPPEN **")
try:
layer_config = self.layers[dataset_name]
except KeyError:
if self.verbose:
print(f"Invalid dataset name: {dataset_name}. Valid options are"
" {list(self.layers.keys())}. Alter agol_layers.json to add more.")
return False
original_file_name = layer_config['original_file_name']
item_id = layer_config['id']
if self.verbose:
print(f" ArcGIS Online item id: {layer_config['id']}")
print(f" CSV name used for upload: {layer_config['original_file_name']}")
fs = self.get_arcgis_feature_collection_from_item_id(item_id)
# Overwrite docs:
# https://developers.arcgis.com/python/api-reference/arcgis.features.managers.html#featurelayercollectionmanager
# Note that the filename (not the path, just the filename) must match the filename of the data the feature layer
# was originally created from, because reasons. We rename here.
# Note that if you have a feature service you want to use overwrite() on, you must share that
# feature service with everyone (share->everyone). If you don't, you'll get a 403. You must also
# share the underlying CSV with everyone.
result = ""
with tempfile.TemporaryDirectory() as tmpdirname:
shutil.copyfile(os.path.join(source_data_dir, source_data_file),
os.path.join(tmpdirname, original_file_name))
if self.verbose:
print(f" local CSV file name: {source_data_dir}/{source_data_file}")
original_dir = os.getcwd()
os.chdir(tmpdirname)
if dry_run is False:
try:
if self.verbose:
print(" starting upload...")
result = fs.manager.overwrite(original_file_name)
except Exception as e:
if self.verbose:
print(f"Caught exception {e} during upload, retrying")
result = fs.manager.overwrite(original_file_name)
if self.verbose:
print(" finished.")
else:
if self.verbose:
result = "Dry run complete"
os.chdir(original_dir)
return result
def get_already_processed_files(self, dataset_name):
print(f"Getting already processed files for {dataset_name}")
item_id = self.layers[dataset_name]['id']
fs = self.gis.content.get(item_id)
if "type:Table" in str(fs):
t = fs.tables[0]
else:
t = fs.layers[0]
qr = t.query(
out_fields='Source_Filename',
return_geometry=False, # we don't need the geometries
return_distinct_values=True, # get distinct values based on out_fields
order_by_fields="Source_Filename ASC", # sort for good measure
)
filenames_to_not_sftp = [f.attributes["Source_Filename"] for f in qr.features]
return filenames_to_not_sftp