Skip to content

Commit

Permalink
feat(parser): add float literal
Browse files Browse the repository at this point in the history
  • Loading branch information
RossComputerGuy committed Feb 3, 2024
1 parent 664d1d0 commit d1779c5
Show file tree
Hide file tree
Showing 7 changed files with 327 additions and 51 deletions.
55 changes: 9 additions & 46 deletions webidl/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,13 @@ const Self = @This();
pub const Message = @import("Parser/Message.zig");
pub const Context = @import("Parser/Context.zig");
pub const productions = @import("Parser/productions.zig");
pub const matchers = @import("Parser/matchers.zig");

pub const TokenType = enum {
whitespace,
linefeed,
int,
float,
int,
symbol,
identifier,
string,
Expand All @@ -29,53 +30,14 @@ pub const TokenType = enum {
const Pattern = ptk.Pattern(TokenType);
pub const ruleset = ptk.RuleSet(TokenType);

fn identifier(input: []const u8) usize {
const first_char = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const all_chars = first_char ++ "0123456789";
for (input, 0..) |c, i| {
if (std.mem.indexOfScalar(u8, if (i > 0 and input[0] != '-') all_chars else first_char, c) == null) {
return i;
}
}
return input.len;
}

pub const Tokenizer = ptk.Tokenizer(TokenType, &[_]Pattern{
Pattern.create(.whitespace, ptk.matchers.whitespace),
Pattern.create(.linefeed, ptk.matchers.linefeed),
Pattern.create(.int, ptk.matchers.decimalNumber),
Pattern.create(.float, ptk.matchers.sequenceOf(.{ ptk.matchers.decimalNumber, ptk.matchers.literal("."), ptk.matchers.decimalNumber })),
Pattern.create(.symbol, struct {
fn func(input: []const u8) ?usize {
const i = identifier(input);
return if (std.meta.stringToEnum(productions.Symbol.Type, input[0..i])) |_| i else null;
}
}.func),
Pattern.create(.identifier, struct {
fn func(input: []const u8) ?usize {
const i = identifier(input);
if (input[0] == '-') {
var digits: usize = 0;
for (input[1..i]) |ch| {
if (std.ascii.isDigit(ch)) {
digits += 1;
}
}
if (digits == (i - 1)) return null;
}
return if (std.meta.stringToEnum(productions.Symbol.Type, input[0..i])) |_| null else i;
}
}.func),
Pattern.create(.string, struct {
fn func(input: []const u8) ?usize {
if (input[0] == '"') {
if (std.mem.indexOf(u8, input[1..], "\"")) |i| {
return i + 2;
}
}
return null;
}
}.func),
Pattern.create(.float, matchers.float),
Pattern.create(.int, matchers.int),
Pattern.create(.symbol, matchers.symbol),
Pattern.create(.identifier, matchers.identifier),
Pattern.create(.string, matchers.string),
Pattern.create(.@"=", ptk.matchers.literal("=")),
Pattern.create(.@"+", ptk.matchers.literal("+")),
Pattern.create(.@"-", ptk.matchers.literal("-")),
Expand All @@ -88,7 +50,7 @@ pub const Tokenizer = ptk.Tokenizer(TokenType, &[_]Pattern{
});

pub const ParserCore = ptk.ParserCore(Tokenizer, .{ .whitespace, .linefeed });
pub const Error = ParserCore.Error || Allocator.Error || Message.Error || std.fmt.ParseIntError;
pub const Error = ParserCore.Error || Allocator.Error || Message.Error || std.fmt.ParseIntError || std.fmt.ParseFloatError;

allocator: Allocator,
core: ParserCore,
Expand Down Expand Up @@ -124,4 +86,5 @@ test {
_ = Context;
_ = Message;
_ = productions;
_ = matchers;
}
14 changes: 12 additions & 2 deletions webidl/Parser/Context.zig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pub const ValueType = union(enum) {
tokens: []const Parser.TokenType,
symbol: Symbol.Type,
symbols: []const Symbol.Type,
text: []const u8,

pub fn format(self: ValueType, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
Expand All @@ -23,8 +24,14 @@ pub const ValueType = union(enum) {

if (@typeInfo(Value) == .Pointer or @typeInfo(Value) == .Array) {
for (value, 0..) |item, x| {
try writer.writeAll(@tagName(item));
if ((x + 1) < value.len) try writer.writeAll(", ");
const Item = @TypeOf(item);

if (Item == u8) {
try writer.writeByte(item);
} else {
try writer.writeAll(@tagName(item));
if ((x + 1) < value.len) try writer.writeAll(", ");
}
}
} else {
try writer.writeAll(@tagName(value));
Expand All @@ -47,6 +54,9 @@ pub fn reset(self: *Self) void {
}

pub fn pushError(self: *Self, err: Parser.Error) !void {
if (err == error.UnexpectedCharacter) {
self.got = .{ .text = self.core.tokenizer.source[self.core.tokenizer.offset..] };
}
try Parser.Message.pushError(self.messages, err, self.*);
}

Expand Down
2 changes: 1 addition & 1 deletion webidl/Parser/Message.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ message: []const u8,

pub fn pushError(list: *std.ArrayList(Self), err: Parser.Error, ctx: Parser.Context) Allocator.Error!void {
const msg = try (switch (err) {
error.UnexpectedToken, error.UnexpectedSymbol => std.fmt.allocPrint(list.allocator, "Expected {?}, got {?}", .{ ctx.expected, ctx.got }),
error.UnexpectedToken, error.UnexpectedSymbol, error.UnexpectedCharacter => std.fmt.allocPrint(list.allocator, "Expected {?}, got {?}", .{ ctx.expected, ctx.got }),
else => std.fmt.allocPrint(list.allocator, "Internal error", .{}),
});
errdefer list.allocator.free(msg);
Expand Down
61 changes: 61 additions & 0 deletions webidl/Parser/matchers.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
const std = @import("std");
const ptk = @import("parser-toolkit");
const Symbol = @import("productions/Symbol.zig");

pub fn basicIdentifier(input: []const u8) usize {
const first_char = "-_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
const all_chars = first_char ++ "0123456789";
for (input, 0..) |c, i| {
if (std.mem.indexOfScalar(u8, if (i > 0 and input[0] != '-') all_chars else first_char, c) == null) {
return i;
}
}
return input.len;
}

pub const float = ptk.matchers.sequenceOf(.{ ptk.matchers.decimalNumber, ptk.matchers.literal("."), ptk.matchers.decimalNumber });

pub fn int(input: []const u8) ?usize {
if (std.ascii.startsWithIgnoreCase(input, "0x")) {
if (ptk.matchers.hexadecimalNumber(input[2..])) |i| {
return i + 2;
}
return null;
}
return ptk.matchers.decimalNumber(input);
}

pub fn symbol(input: []const u8) ?usize {
const i = basicIdentifier(input);
return if (std.meta.stringToEnum(Symbol.Type, input[0..i])) |_| i else null;
}

pub fn string(input: []const u8) ?usize {
if (input[0] == '"') {
if (std.mem.indexOf(u8, input[1..], "\"")) |i| {
return i + 2;
}
}
return null;
}

pub fn identifier(input: []const u8) ?usize {
if (int(input)) |_| return null;

const i = basicIdentifier(input);
if (input[0] == '-') {
var digits: usize = 0;
for (input[1..i]) |ch| {
if (std.ascii.isDigit(ch)) {
digits += 1;
}
}
if (digits == (i - 1)) return null;
}
return if (std.meta.stringToEnum(Symbol.Type, input[0..i])) |_| null else i;
}

test "Matching hexadecimals" {
try std.testing.expectEqual(6, int("0xFFFF"));
try std.testing.expectEqual(null, identifier("0xFFFF"));
}
2 changes: 2 additions & 0 deletions webidl/Parser/productions.zig
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub const ConstType = @import("productions/ConstType.zig");
pub const FloatLiteral = @import("productions/FloatLiteral.zig");
pub const FloatType = @import("productions/FloatType.zig");
pub const Identifier = @import("productions/Identifier.zig");
pub const Integer = @import("productions/Integer.zig");
Expand All @@ -11,6 +12,7 @@ pub const UnsignedIntegerType = @import("productions/UnsignedIntegerType.zig");

test {
_ = ConstType;
_ = FloatLiteral;
_ = FloatType;
_ = Identifier;
_ = Integer;
Expand Down
Loading

0 comments on commit d1779c5

Please sign in to comment.