Skip to content

Commit

Permalink
Merge pull request #118 from lanl/multiple_yaml_toml_files
Browse files Browse the repository at this point in the history
Multiple yaml toml files
  • Loading branch information
jpulidojr authored Sep 16, 2024
2 parents 2cdc9eb + e78f7aa commit 05e203a
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 38 deletions.
78 changes: 57 additions & 21 deletions dsi/backends/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,29 +548,47 @@ def query_fctime(self, operator, ctime, isVerbose=False):

return resout

def yamlToSqlite(self, filename, db_name, deleteSql=True):
def yamlDataToList(self, filenames):
"""
Function that reads a YAML file or files into a list
"""

yamlData = []
for filename in filenames:
with open(filename, 'r') as yaml_file:
editedString = yaml_file.read()
editedString = re.sub('specification', r'columns:\n specification', editedString)
editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString)
yml_data = yaml.safe_load_all(editedString)

for table in yml_data:
yamlData.append(table)

return yamlData

def yamlToSqlite(self, filenames, db_name, deleteSql=True):
"""
Function that ingests a YAML file into a sqlite database based on the given database name
`filename`: name of YAML file that is ingested
`filenames`: name of YAML file or a list of YAML files to be ingested
`db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory.
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""
with open(filename, 'r') as yaml_file, open(db_name+".sql", "w") as sql_file:
editedString = yaml_file.read()
editedString = re.sub('specification', r'columns:\n specification', editedString)
editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString)
yml_data = yaml.safe_load_all(editedString)

for table in yml_data:
if isinstance(filenames, str):
filenames = [filenames]

with open(db_name+".sql", "w") as sql_file:
yml_list = self.yamlDataToList(filenames)
for table in yml_list:
tableName = table["segment"]

data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}
if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
createStmt = f"CREATE TABLE {tableName} ( "
createUnitStmt = f"CREATE TABLE {tableName}_units ( "
createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( "
createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( "
insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "

for key, val in table['columns'].items():
Expand All @@ -597,30 +615,48 @@ def yamlToSqlite(self, filename, db_name, deleteSql=True):

sql_file.write(insertStmt[:-2] + ");\n\n")

subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))
subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

if deleteSql == True:
os.remove(db_name+".sql")
if deleteSql == True:
os.remove(db_name+".sql")

def tomlToSqlite(self, filename, db_name, deleteSql=True):
def tomlDataToList(self, filenames):
"""
Function that reads a TOML file or files into a list
"""

toml_data = []
for filename in filenames:
with open(filename, 'r') as toml_file:
data = toml.load(toml_file)
for tableName, tableData in data.items():
toml_data.append([tableName, tableData])

return toml_data

def tomlToSqlite(self, filenames, db_name, deleteSql=True):
"""
Function that ingests a TOML file into a sqlite database based on the given database name
`filename`: name of TOML file that is ingested
`filenames`: name of TOML file or a list of TOML files to be ingested
`db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory.
`deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs
"""
with open(filename, 'r') as toml_file, open(db_name+".sql", "w") as sql_file:
data = toml.load(toml_file)
if isinstance(filenames, str):
filenames = [filenames]

with open(db_name+".sql", "w") as sql_file:
data = self.tomlDataToList(filenames)

for tableName, tableData in data.items():
for item in data:
tableName, tableData = item
data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"}

if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0:
createStmt = f"CREATE TABLE {tableName} ( "
createUnitStmt = f"CREATE TABLE {tableName}_units ( "
createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( "
createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( "
insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( "

for key, val in tableData.items():
Expand Down Expand Up @@ -650,4 +686,4 @@ def tomlToSqlite(self, filename, db_name, deleteSql=True):
subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r"))

if deleteSql == True:
os.remove(db_name+".sql")
os.remove(db_name+".sql")
14 changes: 3 additions & 11 deletions dsi/backends/tests/test_sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,26 +83,18 @@ def test_artifact_query():
assert True


test_jsondata_artifact_put()

def test_yaml_reader():
reader = Sqlite("yaml-test.db")
reader.yamlToSqlite("examples/data/schema.yml", "yaml-test", deleteSql=False)
reader.yamlToSqlite(["examples/data/schema.yml", "examples/data/schema2.yml"], "yaml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")
os.remove("compare_sql.txt")
os.remove("yaml-test.sql")
os.remove("yaml-test.db")

assert file_size == 0 #difference between sql files should be 0 characters

def test_toml_reader():
reader = Sqlite("toml-test.db")
reader.tomlToSqlite("examples/data/schema.toml", "toml-test", deleteSql=False)
reader.tomlToSqlite(["examples/data/schema.toml", "examples/data/schema2.toml"], "toml-test", deleteSql=False)
subprocess.run(["diff", "examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w"))
file_size = os.path.getsize("compare_sql.txt")
os.remove("compare_sql.txt")
os.remove("toml-test.sql")
os.remove("toml-test.db")

assert file_size == 0 #difference between sql files should be 0 characters
assert file_size == 0 #difference between sql files should be 0 characters
36 changes: 30 additions & 6 deletions examples/data/compare-schema.sql
Original file line number Diff line number Diff line change
@@ -1,24 +1,48 @@
CREATE TABLE math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT);
CREATE TABLE IF NOT EXISTS math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT);

CREATE TABLE math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR);
CREATE TABLE IF NOT EXISTS math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR);

INSERT INTO math_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL);

INSERT INTO math VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089);

CREATE TABLE address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT);
CREATE TABLE IF NOT EXISTS address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT);

CREATE TABLE address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR);
CREATE TABLE IF NOT EXISTS address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR);

INSERT INTO address_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

INSERT INTO address VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99);

CREATE TABLE physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT);
CREATE TABLE IF NOT EXISTS physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT);

CREATE TABLE physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR);
CREATE TABLE IF NOT EXISTS physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR);

INSERT INTO physics_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL);

INSERT INTO physics VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012);

CREATE TABLE IF NOT EXISTS math2 ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT);

CREATE TABLE IF NOT EXISTS math2_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR);

INSERT INTO math2_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL);

INSERT INTO math2 VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089);

CREATE TABLE IF NOT EXISTS address2 ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT);

CREATE TABLE IF NOT EXISTS address2_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR);

INSERT INTO address2_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL);

INSERT INTO address2 VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99);

CREATE TABLE IF NOT EXISTS physics2 ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT);

CREATE TABLE IF NOT EXISTS physics2_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR);

INSERT INTO physics2_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL);

INSERT INTO physics2 VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012);

28 changes: 28 additions & 0 deletions examples/data/schema2.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[math2]
specification = "!jack"
a = 1
b = "there is CM"
c = ["45.98", "cm"]
d = 2
e = 34.8
f = 89.0e-4

[address2]
specification = "!sam"
fileLoc = '/home/sam/lib/data'
g = "good memories"
h = "556place street"
i = 2
j = 3
k = 4
l = 10000.0e-4
m = 99

[physics2]
specification = "!amy"
n = ["9.8", "m / s / s"]
o = "gravity"
p = ["23", "s"]
q = "home 23"
r = ['1', 'million grams']
s = -12.0e-4
29 changes: 29 additions & 0 deletions examples/data/schema2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
---
segment: math2
specification: !jack
a: 1
b: "there is CM"
c: "45.98 cm"
d: 2
e: 34.8
f: 89.0e-4
---
segment: address2
specification: !sam
fileLoc: '/home/sam/lib/data'
g: "good memories"
h: "556place street"
i: 2
j: 3
k: 4
l: 10000.0e-4
m: 99
---
segment: physics2
specification: !amy
n: "9.8 m / s / s"
o: "gravity"
p: "23 s"
q: "home 23"
r: '1 million grams'
s: -12.0e-4

0 comments on commit 05e203a

Please sign in to comment.