Skip to content

Commit

Permalink
Format files
Browse files Browse the repository at this point in the history
  • Loading branch information
mohd-akram committed Jan 13, 2024
1 parent 5357e93 commit 4e42b6c
Show file tree
Hide file tree
Showing 15 changed files with 610 additions and 640 deletions.
4 changes: 1 addition & 3 deletions .eslintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
"project": "tsconfig.json",
"sourceType": "module"
},
"plugins": [
"@typescript-eslint"
],
"plugins": ["@typescript-eslint"],
"rules": {
"@typescript-eslint/no-floating-promises": "warn",
"semi": "warn"
Expand Down
60 changes: 27 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
jify
====
# jify

jify is an experimental library/tool for querying large (GBs) JSON files. It
does this by first indexing the required fields. It can also be used as an
Expand All @@ -8,75 +7,72 @@ append-only database.
When a JSON file is indexed (eg. `data.json`) an index file is created in the
same directory with a `.index.json` extension (eg. `data.index.json`).

Install
-------
## Install

npm install jify

Usage
-----
## Usage

```javascript
const { Database, predicate: p } = require('jify');
const { Database, predicate: p } = require("jify");

async function main() {
const db = new Database('books.json');
const db = new Database("books.json");

// Create
await db.create();

// Insert - Single
await db.insert({
title: 'Robinson Crusoe',
title: "Robinson Crusoe",
year: 1719,
author: { name: 'Daniel Defoe' }
author: { name: "Daniel Defoe" },
});

// Insert - Batch
await db.insert([
{
title: 'Great Expectations',
title: "Great Expectations",
year: 1861,
author: { name: 'Charles Dickens' }
author: { name: "Charles Dickens" },
},
{
title: 'Oliver Twist',
title: "Oliver Twist",
year: 1838,
author: { name: 'Charles Dickens' }
author: { name: "Charles Dickens" },
},
{
title: 'Pride and Prejudice',
title: "Pride and Prejudice",
year: 1813,
author: { name: 'Jane Austen' }
author: { name: "Jane Austen" },
},
{
title: 'Nineteen Eighty-Four',
title: "Nineteen Eighty-Four",
year: 1949,
author: { name: 'George Orwell' }
}
author: { name: "George Orwell" },
},
]);

// Index - creates books.index.json file
await db.index('title', 'year', 'author.name');
await db.index("title", "year", "author.name");

// Query
console.log('author.name = Charles Dickens, year > 1840');
const query = { 'author.name': 'Charles Dickens', year: p`> ${1840}` };
for await (const record of db.find(query))
console.log(record);
console.log("author.name = Charles Dickens, year > 1840");
const query = { "author.name": "Charles Dickens", year: p`> ${1840}` };
for await (const record of db.find(query)) console.log(record);

let records;

// Range query
console.log('1800 <= year < 1900');
console.log("1800 <= year < 1900");
records = await db.find({ year: p`>= ${1800} < ${1900}` }).toArray();
console.log(records);

// Multiple queries
console.log('year < 1800 or year > 1900');
records = await db.find(
{ year: p`< ${1800}` }, { year: p`> ${1900}` }
).toArray();
console.log("year < 1800 or year > 1900");
records = await db
.find({ year: p`< ${1800}` }, { year: p`> ${1900}` })
.toArray();
console.log(records);
}

Expand All @@ -92,8 +88,7 @@ $ jify find --query "year>=1800<1900" books.json
$ jify find --query "year<1800" --query "year>1900" books.json
```

Implementation
--------------
## Implementation

The index is implemented as a JSON array of skip list entries. The entries are
encoded as strings and all numbers embedded in the string are encoded using
Expand All @@ -102,8 +97,7 @@ its simplicity and to allow for using a single JSON file as an index. Better
performance might be achieved by using a different data structure, a binary
format, or multiple index files.

Performance
-----------
## Performance

jify is reasonably fast. It can index about 1M records (~700 MB) per minute and
supports parallel indexing of fields. Inserting (with indexes) has similar
Expand Down
153 changes: 78 additions & 75 deletions src/bin.ts
Original file line number Diff line number Diff line change
@@ -1,115 +1,118 @@
#!/usr/bin/env node

import { ArgumentParser } from 'argparse';
import { Database, predicate, Query } from './main';
import { ArgumentParser } from "argparse";
import { Database, predicate, Query } from "./main";

async function index(file: string, fields: string[]) {
const db = new Database(file);
await db.index(...fields.map(f => {
const [name, type] = f.split(':');
return { name, type };
}));
await db.index(
...fields.map((f) => {
const [name, type] = f.split(":");
return { name, type };
})
);
}

async function find(file: string, queries: string[]) {
const db = new Database(file);
const iter = db.find(...queries.map(q => {
const query: Query = {};
let name = '';
let ops: string[] = [];
let values: any[] = [];
let start = 0;
let i = 0;
const getValue = () => {
const value = q.slice(start, i).trim();
if (!value)
throw new Error('Empty value');
start = i;
return value;
};
const addValue = () => {
const value = getValue();
values.push(
value == 'null' ? null :
value == 'false' ? false :
value == 'true' ? true :
Number.isFinite(Number(value)) ? Number(value) : value
);
};
const addPredicate = () => {
query[name] = ops[0] == '=' ?
values[0] : predicate(ops as any, ...values);
name = '';
ops = [];
values = [];
};
for (; i < q.length; i++) {
const c = q[i];
if (['<', '>', '='].includes(c)) {
if (!name)
name = getValue();
else if (ops.length)
const iter = db.find(
...queries.map((q) => {
const query: Query = {};
let name = "";
let ops: string[] = [];
let values: any[] = [];
let start = 0;
let i = 0;
const getValue = () => {
const value = q.slice(start, i).trim();
if (!value) throw new Error("Empty value");
start = i;
return value;
};
const addValue = () => {
const value = getValue();
values.push(
value == "null"
? null
: value == "false"
? false
: value == "true"
? true
: Number.isFinite(Number(value))
? Number(value)
: value
);
};
const addPredicate = () => {
query[name] =
ops[0] == "=" ? values[0] : predicate(ops as any, ...values);
name = "";
ops = [];
values = [];
};
for (; i < q.length; i++) {
const c = q[i];
if (["<", ">", "="].includes(c)) {
if (!name) name = getValue();
else if (ops.length) addValue();
const op = q.slice(start, i + 1 + Number(q[i + 1] == "="));
ops.push(op);
i += op.length - 1;
start = i + 1;
} else if (c == ",") {
addValue();
const op = q.slice(start, i + 1 + Number(q[i + 1] == '='));
ops.push(op);
i += op.length - 1;
start = i + 1;
} else if (c == ',') {
addValue();
start += 1;
addPredicate();
start += 1;
addPredicate();
}
}
}
addValue();
addPredicate();
return query;
}));
addValue();
addPredicate();
return query;
})
);

for await (const record of iter)
console.log(JSON.stringify(record));
for await (const record of iter) console.log(JSON.stringify(record));
}

async function main() {
const parser = new ArgumentParser({
add_help: true,
description: 'query JSON files'
description: "query JSON files",
});

parser.add_argument('--version', {
action: 'version',
version: require('../package.json').version,
parser.add_argument("--version", {
action: "version",
version: require("../package.json").version,
});

const subparsers = parser.add_subparsers({ dest: 'command' });
const subparsers = parser.add_subparsers({ dest: "command" });

const commands = {
index: subparsers.add_parser(
'index', { help: 'index JSON file' },
),
find: subparsers.add_parser(
'find', { help: 'query JSON file' },
)
index: subparsers.add_parser("index", { help: "index JSON file" }),
find: subparsers.add_parser("find", { help: "query JSON file" }),
};

// Index
commands.index.add_argument('file');
commands.index.add_argument('--field', { action: 'append' });
commands.index.add_argument("file");
commands.index.add_argument("--field", { action: "append" });

// Find
commands.find.add_argument('file');
commands.find.add_argument('--query', { action: 'append' });
commands.find.add_argument("file");
commands.find.add_argument("--query", { action: "append" });

const args = parser.parse_args();
switch (args.command) {
case 'index':
case "index":
await index(args.file, args.field);
break;
case 'find':
case "find":
await find(args.file, args.query);
break;
}
}

process.once('unhandledRejection', err => { throw err; });
process.once("unhandledRejection", (err) => {
throw err;
});

main();
Loading

0 comments on commit 4e42b6c

Please sign in to comment.