Kaynağa Gözat

Merge pull request #423 from AtkinsSJ/parsely

Tidy up "newparser" so it can be used properly
Eric Dubé 1 yıl önce
ebeveyn
işleme
895358e6e5

+ 0 - 46
packages/phoenix/packages/newparser/parsers/terminals.js

@@ -1,46 +0,0 @@
-import { Parser, UNRECOGNIZED, VALUE } from '../lib.js';
-
-export class Literal extends Parser {
-    _create (value) {
-        this.value = value;
-    }
-
-    _parse (stream) {
-        const subStream = stream.fork();
-        for ( let i=0 ; i < this.value.length ; i++ ) {
-            let { done, value } = subStream.next();
-            if ( done ) return UNRECOGNIZED;
-            if ( this.value[i] !== value ) return UNRECOGNIZED;
-        }
-
-        stream.join(subStream);
-        return { status: VALUE, $: 'literal', value: this.value };
-    }
-}
-
-export class StringOf extends Parser {
-    _create (values) {
-        this.values = values;
-    }
-
-    _parse (stream) {
-        const subStream = stream.fork();
-        let text = '';
-
-        while (true) {
-            let { done, value } = subStream.look();
-            if ( done ) break;
-            if ( ! this.values.includes(value) ) break;
-
-            subStream.next();
-            text += value;
-        }
-
-        if (text.length === 0) {
-            return UNRECOGNIZED;
-        }
-
-        stream.join(subStream);
-        return { status: VALUE, $: 'stringOf', value: text };
-    }
-}

+ 22 - 30
packages/phoenix/packages/newparser/exports.js → packages/phoenix/packages/parsely/exports.js

@@ -1,31 +1,6 @@
-import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from './lib.js';
-import { Discard, FirstMatch, None, Optional, Repeat, Sequence } from './parsers/combinators.js';
-import { Literal, StringOf } from './parsers/terminals.js';
-
-class Symbol extends Parser {
-    _create(symbolName) {
-        this.symbolName = symbolName;
-    }
-
-    _parse (stream) {
-        const parser = this.symbol_registry[this.symbolName];
-        if ( ! parser ) {
-            throw new Error(`No symbol defined named '${this.symbolName}'`);
-        }
-        const subStream = stream.fork();
-        const result = parser.parse(subStream);
-        console.log(`Result of parsing symbol('${this.symbolName}'):`, result);
-        if ( result.status === UNRECOGNIZED ) {
-            return UNRECOGNIZED;
-        }
-        if ( result.status === INVALID ) {
-            return { status: INVALID, value: result };
-        }
-        stream.join(subStream);
-        result.$ = this.symbolName;
-        return result;
-    }
-}
+import { adapt_parser, VALUE } from './parser.js';
+import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js';
+import { Literal, None, StringOf, Symbol } from './parsers/terminals.js';
 
 class ParserWithAction {
     #parser;
@@ -55,6 +30,12 @@ export class GrammarContext {
         return new GrammarContext({...this.parsers, ...more_parsers});
     }
 
+    /**
+     * Construct a parsing function for the given grammar.
+     * @param grammar An object of symbol-names to a DSL for parsing that symbol.
+     * @param actions An object of symbol-names to a function run to process the symbol after it has been parsed.
+     * @returns {function(*, *, {must_consume_all_input?: boolean}=): *} A function to run the parser. Throws if parsing fails.
+     */
     define_parser (grammar, actions) {
         const symbol_registry = {};
         const api = {};
@@ -76,12 +57,23 @@ export class GrammarContext {
             }
         }
 
-        return (stream, entry_symbol) => {
+        return (stream, entry_symbol, { must_consume_all_input = true } = {}) => {
             const entry_parser = symbol_registry[entry_symbol];
             if (!entry_parser) {
                 throw new Error(`Entry symbol '${entry_symbol}' not found in grammar.`);
             }
-            return entry_parser.parse(stream);
+            const result = entry_parser.parse(stream);
+
+            if (result.status !== VALUE) {
+                throw new Error('Failed to parse input against grammar.');
+            }
+
+            // Ensure the entire stream is consumed.
+            if (must_consume_all_input && !stream.is_eof()) {
+                throw new Error('Parsing did not consume all input.');
+            }
+
+            return result;
         };
     }
 }

+ 6 - 0
packages/phoenix/packages/newparser/lib.js → packages/phoenix/packages/parsely/parser.js

@@ -4,6 +4,12 @@ export const UNRECOGNIZED = Symbol('unrecognized');
 export const INVALID = Symbol('invalid');
 export const VALUE = Symbol('value');
 
+/**
+ * Base class for parsers.
+ * To implement your own, subclass it and define these methods:
+ * - _create(): Acts as the constructor
+ * - _parse(stream): Performs the parsing on the stream, and returns either UNRECOGNIZED, INVALID, or a result object.
+ */
 export class Parser {
     result (o) {
         if (o.value && o.value.$discard) {

+ 60 - 37
packages/phoenix/packages/newparser/parsers/combinators.js → packages/phoenix/packages/parsely/parsers/combinators.js

@@ -1,5 +1,9 @@
-import { INVALID, UNRECOGNIZED, VALUE, adapt_parser, Parser } from '../lib.js';
+import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
 
+/**
+ * Runs its child parser, and discards its result.
+ * @param parser Child parser
+ */
 export class Discard extends Parser {
     _create (parser) {
         this.parser = adapt_parser(parser);
@@ -19,6 +23,10 @@ export class Discard extends Parser {
     }
 }
 
+/**
+ * Runs its child parsers in order, and returns the first successful result.
+ * @param parsers Child parsers
+ */
 export class FirstMatch extends Parser {
     _create (...parsers) {
         this.parsers = parsers.map(adapt_parser);
@@ -42,14 +50,10 @@ export class FirstMatch extends Parser {
     }
 }
 
-export class None extends Parser {
-    _create () {}
-
-    _parse (stream) {
-        return { status: VALUE, $: 'none', $discard: true };
-    }
-}
-
+/**
+ * Runs its child parser, and then returns its result, or nothing.
+ * @param parser Child parser
+ */
 export class Optional extends Parser {
     _create (parser) {
         this.parser = adapt_parser(parser);
@@ -66,6 +70,12 @@ export class Optional extends Parser {
     }
 }
 
+/**
+ * Parses a repeated sequence of values with separators between them.
+ * @param value_parser Parser for the value
+ * @param separator_parser Parser for the separator, optional
+ * @param trailing Whether to allow a trailing separator
+ */
 export class Repeat extends Parser {
     _create (value_parser, separator_parser, { trailing = false } = {}) {
         this.value_parser = adapt_parser(value_parser);
@@ -75,45 +85,58 @@ export class Repeat extends Parser {
 
     _parse (stream) {
         const results = [];
-        for ( ;; ) {
-            const subStream = stream.fork();
+        const subStream = stream.fork();
 
-            // Value
-            const result = this.value_parser.parse(subStream);
-            if ( result.status === UNRECOGNIZED ) {
-                break;
-            }
-            if ( result.status === INVALID ) {
-                return { status: INVALID, value: result };
-            }
-            stream.join(subStream);
-            if ( ! result.$discard ) results.push(result);
+        // Parse first value
+        const result = this.value_parser.parse(subStream);
+        if ( result.status === INVALID )
+            return { status: INVALID, value: result };
 
-            // Separator
-            if ( ! this.separator_parser ) {
-                continue;
-            }
-            const separatorResult = this.separator_parser.parse(subStream);
-            if ( separatorResult.status === UNRECOGNIZED ) {
-                break;
-            }
-            if ( separatorResult.status === INVALID ) {
-                return { status: INVALID, value: separatorResult };
-            }
+        if ( result.status === VALUE ) {
             stream.join(subStream);
-            if ( ! result.$discard ) results.push(separatorResult);
-
-            // TODO: Detect trailing separator and reject it if trailing==false
+            if (!result.$discard) results.push(result);
+
+            // Repeatedly parse <separator> <value>
+            for (;;) {
+                // Separator
+                if (!this.separator_parser)
+                    continue;
+
+                const separatorResult = this.separator_parser.parse(subStream);
+                if (separatorResult.status === UNRECOGNIZED)
+                    break;
+                if (separatorResult.status === INVALID)
+                    return { status: INVALID, value: separatorResult };
+                stream.join(subStream);
+                if (!separatorResult.$discard) results.push(separatorResult);
+
+                // Value
+                const result = this.value_parser.parse(subStream);
+                if (result.status === UNRECOGNIZED) {
+                    // If we failed to parse a value, we have a trailing separator
+                    if (this.trailing === false)
+                        return { status: INVALID, value: result };
+                    break;
+                }
+                if (result.status === INVALID)
+                    return { status: INVALID, value: result };
+
+                stream.join(subStream);
+                if (!result.$discard) results.push(result);
+            }
         }
 
-        if ( results.length === 0 ) {
+        if ( results.length === 0 )
             return UNRECOGNIZED;
-        }
 
         return { status: VALUE, value: results };
     }
 }
 
+/**
+ * Runs a sequence of child parsers, and returns their result as an array if they all succeed.
+ * @param parsers Child parsers
+ */
 export class Sequence extends Parser {
     _create (...parsers) {
         this.parsers = parsers.map(adapt_parser);

+ 93 - 0
packages/phoenix/packages/parsely/parsers/terminals.js

@@ -0,0 +1,93 @@
+import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
+
+/**
+ * Parses a literal value.
+ * @param value The value to parse
+ */
+export class Literal extends Parser {
+    _create (value) {
+        this.value = value;
+    }
+
+    _parse (stream) {
+        const subStream = stream.fork();
+        for ( let i=0 ; i < this.value.length ; i++ ) {
+            let { done, value } = subStream.next();
+            if ( done ) return UNRECOGNIZED;
+            if ( this.value[i] !== value ) return UNRECOGNIZED;
+        }
+
+        stream.join(subStream);
+        return { status: VALUE, $: 'literal', value: this.value };
+    }
+}
+
+/**
+ * Parses matching characters as a string.
+ * @param test Function that takes a character, and returns whether to include it.
+ */
+export class StringOf extends Parser {
+    _create (test) {
+        this.test = test;
+    }
+
+    _parse (stream) {
+        const subStream = stream.fork();
+        let text = '';
+
+        while (true) {
+            let { done, value } = subStream.look();
+            if ( done ) break;
+            if ( ! this.test(value) ) break;
+
+            subStream.next();
+            text += value;
+        }
+
+        if (text.length === 0) {
+            return UNRECOGNIZED;
+        }
+
+        stream.join(subStream);
+        return { status: VALUE, $: 'stringOf', value: text };
+    }
+}
+
+/**
+ * Parses an object defined by the symbol registry.
+ * @param symbolName The name of the symbol to parse.
+ */
+export class Symbol extends Parser {
+    _create(symbolName) {
+        this.symbolName = symbolName;
+    }
+
+    _parse (stream) {
+        const parser = this.symbol_registry[this.symbolName];
+        if ( ! parser ) {
+            throw new Error(`No symbol defined named '${this.symbolName}'`);
+        }
+        const subStream = stream.fork();
+        const result = parser.parse(subStream);
+        if ( result.status === UNRECOGNIZED ) {
+            return UNRECOGNIZED;
+        }
+        if ( result.status === INVALID ) {
+            return { status: INVALID, value: result };
+        }
+        stream.join(subStream);
+        result.$ = this.symbolName;
+        return result;
+    }
+}
+
+/**
+ * Does no parsing and returns a discarded result.
+ */
+export class None extends Parser {
+    _create () {}
+
+    _parse (stream) {
+        return { status: VALUE, $: 'none', $discard: true };
+    }
+}

+ 52 - 0
packages/phoenix/packages/parsely/streams.js

@@ -0,0 +1,52 @@
+/**
+ * Base class for input streams.
+ * Defines which methods are expected for any stream implementations.
+ */
+export class ParserStream {
+    value_at (index) { throw new Error(`${this.constructor.name}.value_at() not implemented`); }
+    look () { throw new Error(`${this.constructor.name}.look() not implemented`); }
+    next () { throw new Error(`${this.constructor.name}.next() not implemented`); }
+    fork () { throw new Error(`${this.constructor.name}.fork() not implemented`); }
+    join () { throw new Error(`${this.constructor.name}.join() not implemented`); }
+
+    is_eof () {
+        return this.look().done;
+    }
+}
+
+/**
+ * ParserStream that takes a string, and processes it character by character.
+ */
+export class StringStream extends ParserStream {
+    constructor (str, startIndex = 0) {
+        super();
+        this.str = str;
+        this.i = startIndex;
+    }
+
+    value_at (index) {
+        if ( index >= this.str.length ) {
+            return { done: true, value: undefined };
+        }
+
+        return { done: false, value: this.str[index] };
+    }
+
+    look () {
+        return this.value_at(this.i);
+    }
+
+    next () {
+        const result = this.value_at(this.i);
+        this.i++;
+        return result;
+    }
+
+    fork () {
+        return new StringStream(this.str, this.i);
+    }
+
+    join (forked) {
+        this.i = forked.i;
+    }
+}

+ 20 - 65
packages/phoenix/src/puter-shell/coreutils/concept-parser.js

@@ -1,5 +1,6 @@
-import { GrammarContext, standard_parsers } from '../../../packages/newparser/exports.js';
-import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/newparser/lib.js';
+import { GrammarContext, standard_parsers } from '../../../packages/parsely/exports.js';
+import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/parsely/parser.js';
+import { StringStream } from '../../../packages/parsely/streams.js';
 
 class NumberParser extends Parser {
     static data = {
@@ -163,39 +164,6 @@ class StringParser extends Parser {
     }
 }
 
-class StringStream {
-    constructor (str, startIndex = 0) {
-        this.str = str;
-        this.i = startIndex;
-    }
-
-    value_at (index) {
-        if ( index >= this.str.length ) {
-            return { done: true, value: undefined };
-        }
-
-        return { done: false, value: this.str[index] };
-    }
-
-    look () {
-        return this.value_at(this.i);
-    }
-
-    next () {
-        const result = this.value_at(this.i);
-        this.i++;
-        return result;
-    }
-
-    fork () {
-        return new StringStream(this.str, this.i);
-    }
-
-    join (forked) {
-        this.i = forked.i;
-    }
-}
-
 export default {
     name: 'concept-parser',
     args: {
@@ -204,15 +172,13 @@ export default {
     },
     execute: async ctx => {
         const { in_, out, err } = ctx.externs;
-        await out.write("STARTING CONCEPT PARSER\n");
         const grammar_context = new GrammarContext(standard_parsers());
-        await out.write("Constructed a grammar context\n");
 
         const parser = grammar_context.define_parser({
             element: a => a.sequence(
-                a.symbol('whitespace'),
+                a.optional(a.symbol('whitespace')),
                 a.symbol('value'),
-                a.symbol('whitespace'),
+                a.optional(a.symbol('whitespace')),
             ),
             value: a => a.firstMatch(
                 a.symbol('object'),
@@ -225,37 +191,33 @@ export default {
             ),
             array: a => a.sequence(
                 a.literal('['),
-                a.symbol('whitespace'),
-                a.optional(
+                a.firstMatch(
                     a.repeat(
                         a.symbol('element'),
                         a.literal(','),
-                        { trailing: true },
+                        { trailing: false },
                     ),
+                    a.optional(a.symbol('whitespace')),
                 ),
-                a.symbol('whitespace'),
                 a.literal(']'),
             ),
             member: a => a.sequence(
-                a.symbol('whitespace'),
+                a.optional(a.symbol('whitespace')),
                 a.symbol('string'),
-                a.symbol('whitespace'),
+                a.optional(a.symbol('whitespace')),
                 a.literal(':'),
-                a.symbol('whitespace'),
-                a.symbol('value'),
-                a.symbol('whitespace'),
+                a.symbol('element'),
             ),
             object: a => a.sequence(
                 a.literal('{'),
-                a.symbol('whitespace'),
-                a.optional(
+                a.firstMatch(
                     a.repeat(
                         a.symbol('member'),
                         a.literal(','),
-                        { trailing: true },
+                        { trailing: false },
                     ),
+                    a.optional(a.symbol('whitespace')),
                 ),
-                a.symbol('whitespace'),
                 a.literal('}'),
             ),
             true: a => a.literal('true'),
@@ -263,37 +225,31 @@ export default {
             null: a => a.literal('null'),
             number: a => new NumberParser(),
             string: a => new StringParser(),
-            whitespace: a => a.optional(
-                a.stringOf(' \r\n\t'.split('')),
-            ),
+            whitespace: a => a.stringOf(c => ' \r\n\t'.includes(c)),
         }, {
-            element: it => it[0].value,
+            element: it => it.filter(it => it.$ === 'value')[0].value,
             value: it => it,
             array: it => {
                 // A parsed array contains 3 values: `[`, the entries array, and `]`, so we only care about index 1.
                 // If it's less than 3, there were no entries.
                 if (it.length < 3) return [];
                 return (it[1].value || [])
-                    .filter(it => it.$ !== 'literal')
+                    .filter(it => it.$ === 'element')
                     .map(it => it.value);
             },
             member: it => {
-                // A parsed member contains 3 values: a name, `:`, and a value.
-                const [ name_part, colon, value_part ] = it;
+                const [ name_part, value_part ] = it.filter(it => it.$ === 'string' || it.$ === 'element');
                 return { name: name_part.value, value: value_part.value };
             },
             object: it => {
-                console.log('OBJECT!!!!');
-                console.log(it[1]);
                 // A parsed object contains 3 values: `{`, the members array, and `}`, so we only care about index 1.
                 // If it's less than 3, there were no members.
                 if (it.length < 3) return {};
                 const result = {};
-                // FIXME: This is all wrong!!!
                 (it[1].value || [])
                     .filter(it => it.$ === 'member')
                     .forEach(it => {
-                        result[it.name] = it.value;
+                        result[it.value.name] = it.value.value;
                     });
                 return result;
             },
@@ -305,7 +261,6 @@ export default {
             whitespace: _ => {},
         });
 
-        // TODO: What do we want our streams to be like?
         const input = ctx.locals.positionals.shift();
         const stream = new StringStream(input);
         try {
@@ -317,4 +272,4 @@ export default {
             await err.write(e.stack + '\n');
         }
     }
-}
+}