diff --git a/dsi/backends/sqlite.py b/dsi/backends/sqlite.py index 69adf90..f278c08 100644 --- a/dsi/backends/sqlite.py +++ b/dsi/backends/sqlite.py @@ -548,29 +548,47 @@ def query_fctime(self, operator, ctime, isVerbose=False): return resout - def yamlToSqlite(self, filename, db_name, deleteSql=True): + def yamlDataToList(self, filenames): + """ + Function that reads a YAML file or files into a list + """ + + yamlData = [] + for filename in filenames: + with open(filename, 'r') as yaml_file: + editedString = yaml_file.read() + editedString = re.sub('specification', r'columns:\n specification', editedString) + editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString) + yml_data = yaml.safe_load_all(editedString) + + for table in yml_data: + yamlData.append(table) + + return yamlData + + def yamlToSqlite(self, filenames, db_name, deleteSql=True): """ Function that ingests a YAML file into a sqlite database based on the given database name - `filename`: name of YAML file that is ingested + `filenames`: name of YAML file or a list of YAML files to be ingested `db_name`: name of database that YAML file should be added to. Database will be created if it does not exist in local directory. `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs """ - with open(filename, 'r') as yaml_file, open(db_name+".sql", "w") as sql_file: - editedString = yaml_file.read() - editedString = re.sub('specification', r'columns:\n specification', editedString) - editedString = re.sub(r'(!.+)\n', r"'\1'\n", editedString) - yml_data = yaml.safe_load_all(editedString) - for table in yml_data: + if isinstance(filenames, str): + filenames = [filenames] + + with open(db_name+".sql", "w") as sql_file: + yml_list = self.yamlDataToList(filenames) + for table in yml_list: tableName = table["segment"] data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: - createStmt = f"CREATE TABLE {tableName} ( " - createUnitStmt = f"CREATE TABLE {tableName}_units ( " + createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " + createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " for key, val in table['columns'].items(): @@ -597,30 +615,48 @@ def yamlToSqlite(self, filename, db_name, deleteSql=True): sql_file.write(insertStmt[:-2] + ");\n\n") - subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) + subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) - if deleteSql == True: - os.remove(db_name+".sql") + if deleteSql == True: + os.remove(db_name+".sql") - def tomlToSqlite(self, filename, db_name, deleteSql=True): + def tomlDataToList(self, filenames): + """ + Function that reads a TOML file or files into a list + """ + + toml_data = [] + for filename in filenames: + with open(filename, 'r') as toml_file: + data = toml.load(toml_file) + for tableName, tableData in data.items(): + toml_data.append([tableName, tableData]) + + return toml_data + + def tomlToSqlite(self, filenames, db_name, deleteSql=True): """ Function that ingests a TOML file into a sqlite database based on the given database name - `filename`: name of TOML file that is ingested + `filenames`: name of TOML file or a list of TOML files to be ingested `db_name`: name of database that TOML file should be added to. Database will be created if it does not exist in local directory. `deleteSql`: flag to delete temp SQL file that creates the database. Default is True, but change to False for testing or comparing outputs """ - with open(filename, 'r') as toml_file, open(db_name+".sql", "w") as sql_file: - data = toml.load(toml_file) + if isinstance(filenames, str): + filenames = [filenames] + + with open(db_name+".sql", "w") as sql_file: + data = self.tomlDataToList(filenames) - for tableName, tableData in data.items(): + for item in data: + tableName, tableData = item data_types = {float: "FLOAT", str: "VARCHAR", int: "INT"} if not os.path.isfile(db_name+".db") or os.path.getsize(db_name+".db") == 0: - createStmt = f"CREATE TABLE {tableName} ( " - createUnitStmt = f"CREATE TABLE {tableName}_units ( " + createStmt = f"CREATE TABLE IF NOT EXISTS {tableName} ( " + createUnitStmt = f"CREATE TABLE IF NOT EXISTS {tableName}_units ( " insertUnitStmt = f"INSERT INTO {tableName}_units VALUES( " for key, val in tableData.items(): @@ -650,4 +686,4 @@ def tomlToSqlite(self, filename, db_name, deleteSql=True): subprocess.run(["sqlite3", db_name+".db"], stdin= open(db_name+".sql", "r")) if deleteSql == True: - os.remove(db_name+".sql") \ No newline at end of file + os.remove(db_name+".sql") diff --git a/dsi/backends/tests/test_sqlite.py b/dsi/backends/tests/test_sqlite.py index d6dc7f3..43ee825 100644 --- a/dsi/backends/tests/test_sqlite.py +++ b/dsi/backends/tests/test_sqlite.py @@ -83,26 +83,18 @@ def test_artifact_query(): assert True -test_jsondata_artifact_put() - def test_yaml_reader(): reader = Sqlite("yaml-test.db") - reader.yamlToSqlite("examples/data/schema.yml", "yaml-test", deleteSql=False) + reader.yamlToSqlite(["examples/data/schema.yml", "examples/data/schema2.yml"], "yaml-test", deleteSql=False) subprocess.run(["diff", "examples/data/compare-schema.sql", "yaml-test.sql"], stdout=open("compare_sql.txt", "w")) file_size = os.path.getsize("compare_sql.txt") - os.remove("compare_sql.txt") - os.remove("yaml-test.sql") - os.remove("yaml-test.db") assert file_size == 0 #difference between sql files should be 0 characters def test_toml_reader(): reader = Sqlite("toml-test.db") - reader.tomlToSqlite("examples/data/schema.toml", "toml-test", deleteSql=False) + reader.tomlToSqlite(["examples/data/schema.toml", "examples/data/schema2.toml"], "toml-test", deleteSql=False) subprocess.run(["diff", "examples/data/compare-schema.sql", "toml-test.sql"], stdout=open("compare_sql.txt", "w")) file_size = os.path.getsize("compare_sql.txt") - os.remove("compare_sql.txt") - os.remove("toml-test.sql") - os.remove("toml-test.db") - assert file_size == 0 #difference between sql files should be 0 characters \ No newline at end of file + assert file_size == 0 #difference between sql files should be 0 characters diff --git a/examples/data/compare-schema.sql b/examples/data/compare-schema.sql index e52938b..df2112b 100644 --- a/examples/data/compare-schema.sql +++ b/examples/data/compare-schema.sql @@ -1,24 +1,48 @@ -CREATE TABLE math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT); +CREATE TABLE IF NOT EXISTS math ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT); -CREATE TABLE math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR); +CREATE TABLE IF NOT EXISTS math_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR); INSERT INTO math_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL); INSERT INTO math VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089); -CREATE TABLE address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT); +CREATE TABLE IF NOT EXISTS address ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT); -CREATE TABLE address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR); +CREATE TABLE IF NOT EXISTS address_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR); INSERT INTO address_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); INSERT INTO address VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99); -CREATE TABLE physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT); +CREATE TABLE IF NOT EXISTS physics ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT); -CREATE TABLE physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR); +CREATE TABLE IF NOT EXISTS physics_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR); INSERT INTO physics_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL); INSERT INTO physics VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012); +CREATE TABLE IF NOT EXISTS math2 ( specification VARCHAR, a INT, b VARCHAR, c FLOAT, d INT, e FLOAT, f FLOAT); + +CREATE TABLE IF NOT EXISTS math2_units ( specification VARCHAR, a VARCHAR, b VARCHAR, c VARCHAR, d VARCHAR, e VARCHAR, f VARCHAR); + +INSERT INTO math2_units VALUES( NULL, NULL, NULL, 'cm', NULL, NULL, NULL); + +INSERT INTO math2 VALUES( '!jack', 1, 'there is CM', 45.98, 2, 34.8, 0.0089); + +CREATE TABLE IF NOT EXISTS address2 ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i INT, j INT, k INT, l FLOAT, m INT); + +CREATE TABLE IF NOT EXISTS address2_units ( specification VARCHAR, fileLoc VARCHAR, g VARCHAR, h VARCHAR, i VARCHAR, j VARCHAR, k VARCHAR, l VARCHAR, m VARCHAR); + +INSERT INTO address2_units VALUES( NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); + +INSERT INTO address2 VALUES( '!sam', '/home/sam/lib/data', 'good memories', '556place street', 2, 3, 4, 1.0, 99); + +CREATE TABLE IF NOT EXISTS physics2 ( specification VARCHAR, n FLOAT, o VARCHAR, p INT, q VARCHAR, r INT, s FLOAT); + +CREATE TABLE IF NOT EXISTS physics2_units ( specification VARCHAR, n VARCHAR, o VARCHAR, p VARCHAR, q VARCHAR, r VARCHAR, s VARCHAR); + +INSERT INTO physics2_units VALUES( NULL, 'm / s / s', NULL, 's', NULL, 'million grams', NULL); + +INSERT INTO physics2 VALUES( '!amy', 9.8, 'gravity', 23, 'home 23', 1, -0.0012); + diff --git a/examples/data/schema2.toml b/examples/data/schema2.toml new file mode 100644 index 0000000..d8723d2 --- /dev/null +++ b/examples/data/schema2.toml @@ -0,0 +1,28 @@ +[math2] +specification = "!jack" +a = 1 +b = "there is CM" +c = ["45.98", "cm"] +d = 2 +e = 34.8 +f = 89.0e-4 + +[address2] +specification = "!sam" +fileLoc = '/home/sam/lib/data' +g = "good memories" +h = "556place street" +i = 2 +j = 3 +k = 4 +l = 10000.0e-4 +m = 99 + +[physics2] +specification = "!amy" +n = ["9.8", "m / s / s"] +o = "gravity" +p = ["23", "s"] +q = "home 23" +r = ['1', 'million grams'] +s = -12.0e-4 diff --git a/examples/data/schema2.yml b/examples/data/schema2.yml new file mode 100644 index 0000000..70ae6c2 --- /dev/null +++ b/examples/data/schema2.yml @@ -0,0 +1,29 @@ +--- +segment: math2 +specification: !jack + a: 1 + b: "there is CM" + c: "45.98 cm" + d: 2 + e: 34.8 + f: 89.0e-4 +--- +segment: address2 +specification: !sam + fileLoc: '/home/sam/lib/data' + g: "good memories" + h: "556place street" + i: 2 + j: 3 + k: 4 + l: 10000.0e-4 + m: 99 +--- +segment: physics2 +specification: !amy + n: "9.8 m / s / s" + o: "gravity" + p: "23 s" + q: "home 23" + r: '1 million grams' + s: -12.0e-4