Explorar el Código

Merge pull request #424 from AtkinsSJ/sed

Actually parse sed scripts
Eric Dubé hace 1 año
padre
commit
2a2a42c153

+ 3 - 1
packages/phoenix/packages/parsely/exports.js

@@ -1,6 +1,6 @@
 import { adapt_parser, VALUE } from './parser.js';
 import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js';
-import { Literal, None, StringOf, Symbol } from './parsers/terminals.js';
+import { Fail, Literal, None, StringOf, StringUntil, Symbol } from './parsers/terminals.js';
 
 class ParserWithAction {
     #parser;
@@ -81,6 +81,7 @@ export class GrammarContext {
 export const standard_parsers = () => {
     return {
         discard: Discard,
+        fail: Fail,
         firstMatch: FirstMatch,
         literal: Literal,
         none: None,
@@ -88,6 +89,7 @@ export const standard_parsers = () => {
         repeat: Repeat,
         sequence: Sequence,
         stringOf: StringOf,
+        stringUntil: StringUntil,
         symbol: Symbol,
     }
 }

+ 13 - 12
packages/phoenix/packages/parsely/parsers/combinators.js

@@ -79,7 +79,7 @@ export class Optional extends Parser {
 export class Repeat extends Parser {
     _create (value_parser, separator_parser, { trailing = false } = {}) {
         this.value_parser = adapt_parser(value_parser);
-        this.separator_parser = adapt_parser(separator_parser);
+        this.separator_parser = separator_parser ? adapt_parser(separator_parser) : null;
         this.trailing = trailing;
     }
 
@@ -99,22 +99,23 @@ export class Repeat extends Parser {
             // Repeatedly parse <separator> <value>
             for (;;) {
                 // Separator
-                if (!this.separator_parser)
-                    continue;
-
-                const separatorResult = this.separator_parser.parse(subStream);
-                if (separatorResult.status === UNRECOGNIZED)
-                    break;
-                if (separatorResult.status === INVALID)
-                    return { status: INVALID, value: separatorResult };
-                stream.join(subStream);
-                if (!separatorResult.$discard) results.push(separatorResult);
+                let parsed_separator = false;
+                if (this.separator_parser) {
+                    const separatorResult = this.separator_parser.parse(subStream);
+                    if (separatorResult.status === UNRECOGNIZED)
+                        break;
+                    if (separatorResult.status === INVALID)
+                        return { status: INVALID, value: separatorResult };
+                    stream.join(subStream);
+                    if (!separatorResult.$discard) results.push(separatorResult);
+                    parsed_separator = true;
+                }
 
                 // Value
                 const result = this.value_parser.parse(subStream);
                 if (result.status === UNRECOGNIZED) {
                     // If we failed to parse a value, we have a trailing separator
-                    if (this.trailing === false)
+                    if (parsed_separator && this.trailing === false)
                         return { status: INVALID, value: result };
                     break;
                 }

+ 58 - 0
packages/phoenix/packages/parsely/parsers/terminals.js

@@ -53,6 +53,53 @@ export class StringOf extends Parser {
     }
 }
 
+/**
+ * Parses characters into a string, until it encounters the given character, unescaped.
+ * @param testOrCharacter End of the string. Either a character, or a function that takes a character,
+ *                        and returns whether it ends the string.
+ * @param escapeCharacter Character to use as the escape character. By default, is '\'.
+ */
+export class StringUntil extends Parser {
+    _create(testOrCharacter, { escapeCharacter = '\\' } = {}) {
+        if (typeof testOrCharacter === 'string') {
+            this.test = (c => c === testOrCharacter);
+        } else {
+            this.test = testOrCharacter;
+        }
+        this.escapeCharacter = escapeCharacter;
+    }
+
+    _parse(stream) {
+        const subStream = stream.fork();
+        let text = '';
+        let lastWasEscape = false;
+
+        while (true) {
+            let { done, value } = subStream.look();
+            if ( done ) break;
+            if ( !lastWasEscape && this.test(value) )
+                break;
+
+            subStream.next();
+            if (value === this.escapeCharacter) {
+                lastWasEscape = true;
+                continue;
+            }
+            lastWasEscape = false;
+            text += value;
+        }
+
+        if (lastWasEscape)
+            return INVALID;
+
+        if (text.length === 0)
+            return UNRECOGNIZED;
+
+        stream.join(subStream);
+        return { status: VALUE, $: 'stringUntil', value: text };
+    }
+}
+
 /**
  * Parses an object defined by the symbol registry.
  * @param symbolName The name of the symbol to parse.
@@ -91,3 +138,14 @@ export class None extends Parser {
         return { status: VALUE, $: 'none', $discard: true };
     }
 }
+
+/**
+ * Always fails parsing.
+ */
+export class Fail extends Parser {
+    _create () {}
+
+    _parse (stream) {
+        return UNRECOGNIZED;
+    }
+}

+ 2 - 0
packages/phoenix/src/ansi-shell/arg-parsers/simple-parser.js

@@ -43,5 +43,7 @@ export default {
 
         ctx.locals.values = result.values;
         ctx.locals.positionals = result.positionals;
+        if (result.tokens)
+            ctx.locals.tokens = result.tokens;
     }
 }

+ 38 - 655
packages/phoenix/src/puter-shell/coreutils/sed.js

@@ -18,620 +18,7 @@
  */
 import { Exit } from './coreutil_lib/exit.js';
 import { fileLines } from '../../util/file.js';
-
-function makeIndent(size) {
-    return '  '.repeat(size);
-}
-
-// Either a line number or a regex
-class Address {
-    constructor(value) {
-        this.value = value;
-    }
-
-    matches(lineNumber, line) {
-        if (this.value instanceof RegExp) {
-            return this.value.test(line);
-        }
-        return this.value === lineNumber;
-    }
-
-    isLineNumberBefore(lineNumber) {
-        return (typeof this.value === 'number') && this.value < lineNumber;
-    }
-
-    dump(indent) {
-        if (this.value instanceof RegExp) {
-            return `${makeIndent(indent)}REGEX: ${this.value}\n`;
-        }
-        return `${makeIndent(indent)}LINE: ${this.value}\n`;
-    }
-}
-
-class AddressRange {
-    // Three kinds of AddressRange:
-    // - Empty (includes everything)
-    // - Single (matches individual line)
-    // - Range (matches lines between start and end, inclusive)
-    constructor({ start, end, inverted = false } = {}) {
-        this.start = start;
-        this.end = end;
-        this.inverted = inverted;
-        this.insideRange = false;
-        this.leaveRangeNextLine = false;
-    }
-
-    updateMatchState(lineNumber, line) {
-        // Only ranges have a state to update
-        if (!(this.start && this.end)) {
-            return;
-        }
-
-        // Reset our state each time we start a new file.
-        if (lineNumber === 1) {
-            this.insideRange = false;
-            this.leaveRangeNextLine = false;
-        }
-
-        // Leave the range if the previous line matched the end.
-        if (this.leaveRangeNextLine) {
-            this.insideRange = false;
-            this.leaveRangeNextLine = false;
-        }
-
-        if (this.insideRange) {
-            // We're inside the range, does this line end it?
-            // If the end address is a line number in the past, yes, immediately.
-            if (this.end.isLineNumberBefore(lineNumber)) {
-                this.insideRange = false;
-                return;
-            }
-            // If the line matches the end address, include it but leave the range on the next line.
-            this.leaveRangeNextLine = this.end.matches(lineNumber, line);
-        } else {
-            // Does this line start the range?
-            this.insideRange = this.start.matches(lineNumber, line);
-        }
-    }
-
-    matches(lineNumber, line) {
-        const invertIfNeeded = (value) => {
-            return this.inverted ? !value : value;
-        };
-
-        // Empty - matches all lines
-        if (!this.start) {
-            return invertIfNeeded(true);
-        }
-
-        // Range
-        if (this.end) {
-            return invertIfNeeded(this.insideRange);
-        }
-
-        // Single
-        return invertIfNeeded(this.start.matches(lineNumber, line));
-    }
-
-    dump(indent) {
-        const inverted = this.inverted ? `${makeIndent(indent+1)}(INVERTED)\n` : '';
-
-        if (!this.start) {
-            return `${makeIndent(indent)}ADDRESS RANGE (EMPTY)\n`
-                + inverted;
-        }
-
-        if (this.end) {
-            return `${makeIndent(indent)}ADDRESS RANGE (RANGE):\n`
-                + inverted
-                + this.start.dump(indent+1)
-                + this.end.dump(indent+1);
-        }
-
-        return `${makeIndent(indent)}ADDRESS RANGE (SINGLE):\n`
-            + this.start.dump(indent+1)
-            + inverted;
-    }
-}
-
-const JumpLocation = {
-    None: Symbol('None'),
-    EndOfCycle: Symbol('EndOfCycle'),
-    StartOfCycle: Symbol('StartOfCycle'),
-    Label: Symbol('Label'),
-    Quit: Symbol('Quit'),
-    QuitSilent: Symbol('QuitSilent'),
-};
-
-class Command {
-    constructor(addressRange) {
-        this.addressRange = addressRange ?? new AddressRange();
-    }
-
-    updateMatchState(context) {
-        this.addressRange.updateMatchState(context.lineNumber, context.patternSpace);
-    }
-
-    async runCommand(context) {
-        if (this.addressRange.matches(context.lineNumber, context.patternSpace)) {
-            return await this.run(context);
-        }
-        return JumpLocation.None;
-    }
-
-    async run(context) {
-        throw new Error('run() not implemented for ' + this.constructor.name);
-    }
-
-    dump(indent) {
-        throw new Error('dump() not implemented for ' + this.constructor.name);
-    }
-}
-
-// '{}' - Group other commands
-class GroupCommand extends Command {
-    constructor(addressRange, subCommands) {
-        super(addressRange);
-        this.subCommands = subCommands;
-    }
-
-    updateMatchState(context) {
-        super.updateMatchState(context);
-        for (const command of this.subCommands) {
-            command.updateMatchState(context);
-        }
-    }
-
-    async run(context) {
-        for (const command of this.subCommands) {
-            const result = await command.runCommand(context);
-            if (result !== JumpLocation.None) {
-                return result;
-            }
-        }
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}GROUP:\n`
-            + this.addressRange.dump(indent+1)
-            + `${makeIndent(indent+1)}CHILDREN:\n`
-            + this.subCommands.map(command => command.dump(indent+2)).join('');
-    }
-}
-
-// '=' - Output line number
-class LineNumberCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        await context.out.write(`${context.lineNumber}\n`);
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}LINE-NUMBER:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'a' - Append text
-class AppendTextCommand extends Command {
-    constructor(addressRange, text) {
-        super(addressRange);
-        this.text = text;
-    }
-
-    async run(context) {
-        context.queuedOutput += this.text + '\n';
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}APPEND-TEXT:\n`
-            + this.addressRange.dump(indent+1)
-            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
-    }
-}
-
-// 'c' - Replace line with text
-class ReplaceCommand extends Command {
-    constructor(addressRange, text) {
-        super(addressRange);
-        this.text = text;
-    }
-
-    async run(context) {
-        context.patternSpace = '';
-        // Output if we're either a 0-address range, 1-address range, or 2-address on the last line.
-        if (this.addressRange.leaveRangeNextLine || !this.addressRange.end) {
-            await context.out.write(this.text + '\n');
-        }
-        return JumpLocation.EndOfCycle;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}REPLACE-TEXT:\n`
-            + this.addressRange.dump(indent+1)
-            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
-    }
-}
-
-// 'd' - Delete pattern
-class DeleteCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.patternSpace = '';
-        return JumpLocation.EndOfCycle;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}DELETE:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'D' - Delete first line of pattern
-class DeleteLineCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        const [ firstLine, rest ] = context.patternSpace.split('\n', 2);
-        context.patternSpace = rest ?? '';
-        if (rest === undefined) {
-            return JumpLocation.EndOfCycle;
-        }
-        return JumpLocation.StartOfCycle;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}DELETE-LINE:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'g' - Get the held line into the pattern
-class GetCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.patternSpace = context.holdSpace;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}GET-HELD:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'G' - Get the held line and append it to the pattern
-class GetAppendCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.patternSpace += '\n' + context.holdSpace;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}GET-HELD-APPEND:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'h' - Hold the pattern
-class HoldCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.holdSpace = context.patternSpace;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}HOLD:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'H' - Hold append the pattern
-class HoldAppendCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.holdSpace += '\n' + context.patternSpace;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}HOLD-APPEND:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'i' - Insert text
-class InsertTextCommand extends Command {
-    constructor(addressRange, text) {
-        super(addressRange);
-        this.text = text;
-    }
-
-    async run(context) {
-        await context.out.write(this.text + '\n');
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}INSERT-TEXT:\n`
-            + this.addressRange.dump(indent+1)
-            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
-    }
-}
-
-// 'l' - Print pattern in debug format
-class DebugPrintCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        let output = '';
-        for (const c of context.patternSpace) {
-            if (c < ' ') {
-                const charCode = c.charCodeAt(0);
-                switch (charCode) {
-                    case 0x07: output += '\\a'; break;
-                    case 0x08: output += '\\b'; break;
-                    case 0x0C: output += '\\f'; break;
-                    case 0x0A: output += '$\n'; break;
-                    case 0x0D: output += '\\r'; break;
-                    case 0x09: output += '\\t'; break;
-                    case 0x0B: output += '\\v'; break;
-                    default: {
-                        const octal = charCode.toString(8);
-                        output += '\\' + '0'.repeat(3 - octal.length) + octal;
-                    }
-                }
-            } else if (c === '\\') {
-                output += '\\\\';
-            }  else {
-                output += c;
-            }
-        }
-        await context.out.write(output);
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}DEBUG-PRINT:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'p' - Print pattern
-class PrintCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        await context.out.write(context.patternSpace);
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}PRINT:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'P' - Print first line of pattern
-class PrintLineCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        const firstLine = context.patternSpace.split('\n', 2)[0];
-        await context.out.write(firstLine);
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}PRINT-LINE:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'q' - Quit
-class QuitCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        return JumpLocation.Quit;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}QUIT:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'Q' - Quit, suppressing the default output
-class QuitSilentCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        return JumpLocation.QuitSilent;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}QUIT-SILENT:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'x' - Exchange hold and pattern
-class ExchangeCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        const oldPattern = context.patternSpace;
-        context.patternSpace = context.holdSpace;
-        context.holdSpace = oldPattern;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}EXCHANGE:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-// 'y' - Transliterate characters
-class TransliterateCommand extends Command {
-    constructor(addressRange, inputCharacters, replacementCharacters) {
-        super(addressRange);
-        this.inputCharacters = inputCharacters;
-        this.replacementCharacters = replacementCharacters;
-
-        if (inputCharacters.length !== replacementCharacters.length) {
-            throw new Error('inputCharacters and replacementCharacters must be the same length!');
-        }
-    }
-
-    async run(context) {
-        let newPatternSpace = '';
-        for (let i = 0; i < context.patternSpace.length; ++i) {
-            const char = context.patternSpace[i];
-            const replacementIndex = this.inputCharacters.indexOf(char);
-            if (replacementIndex !== -1) {
-                newPatternSpace += this.replacementCharacters[replacementIndex];
-                continue;
-            }
-            newPatternSpace += char;
-        }
-        context.patternSpace = newPatternSpace;
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}TRANSLITERATE:\n`
-            + this.addressRange.dump(indent+1)
-            + `${makeIndent(indent+1)}FROM '${this.inputCharacters}'\n`
-            + `${makeIndent(indent+1)}TO   '${this.replacementCharacters}'\n`;
-    }
-}
-
-// 'z' - Zap, delete the pattern without ending cycle
-class ZapCommand extends Command {
-    constructor(addressRange) {
-        super(addressRange);
-    }
-
-    async run(context) {
-        context.patternSpace = '';
-        return JumpLocation.None;
-    }
-
-    dump(indent) {
-        return `${makeIndent(indent)}ZAP:\n`
-            + this.addressRange.dump(indent+1);
-    }
-}
-
-const CycleResult = {
-    Continue: Symbol('Continue'),
-    Quit: Symbol('Quit'),
-    QuitSilent: Symbol('QuitSilent'),
-};
-
-class Script {
-    constructor(commands) {
-        this.commands = commands;
-    }
-
-    async runCycle(context) {
-        for (let i = 0; i < this.commands.length; i++) {
-            const command = this.commands[i];
-            command.updateMatchState(context);
-            const result = await command.runCommand(context);
-            switch (result) {
-                case JumpLocation.Label:
-                    // TODO: Implement labels
-                    break;
-                case JumpLocation.Quit:
-                    return CycleResult.Quit;
-                case JumpLocation.QuitSilent:
-                    return CycleResult.QuitSilent;
-                case JumpLocation.StartOfCycle:
-                    i = -1; // To start at 0 after the loop increment.
-                    continue;
-                case JumpLocation.EndOfCycle:
-                    return CycleResult.Continue;
-                case JumpLocation.None:
-                    continue;
-            }
-        }
-    }
-
-    dump() {
-        return `SCRIPT:\n`
-            + this.commands.map(command => command.dump(1)).join('');
-    }
-}
-
-function parseScript(scriptString) {
-    const commands = [];
-
-    // Generate a hard-coded script for now.
-    // TODO: Actually parse input!
-
-    commands.push(new TransliterateCommand(new AddressRange(), 'abcdefABCDEF', 'ABCDEFabcdef'));
-    // commands.push(new ZapCommand(new AddressRange({start: new Address(1), end: new Address(10)})));
-    // commands.push(new HoldAppendCommand(new AddressRange({start: new Address(1), end: new Address(10)})));
-    // commands.push(new GetCommand(new AddressRange({start: new Address(11)})));
-    // commands.push(new DebugPrintCommand(new AddressRange()));
-
-    // commands.push(new ReplaceCommand(new AddressRange({start: new Address(3), end: new Address(30)}), "LOL"));
-
-    // commands.push(new GroupCommand(new AddressRange({ start: new Address(5), end: new Address(10) }), [
-    //     // new LineNumberCommand(),
-    //     // new TextCommand(new AddressRange({ start: new Address(8) }), "Well hello friends! :^)"),
-    //     new QuitCommand(new AddressRange({ start: new Address(8) })),
-    //     new NoopCommand(new AddressRange()),
-    //     new PrintCommand(new AddressRange({ start: new Address(2), end: new Address(14) })),
-    // ]));
-
-    // commands.push(new LineNumberCommand(new AddressRange({ start: new Address(5), end: new Address(10) })));
-    // commands.push(new PrintCommand());
-    // commands.push(new NoopCommand());
-    // commands.push(new PrintCommand());
-
-    return new Script(commands);
-}
+import { parseScript } from './sed/parser.js';
 
 export default {
     name: 'sed',
@@ -645,7 +32,13 @@ export default {
     args: {
         $: 'simple-parser',
         allowPositionals: true,
+        tokens: true,
         options: {
+            dump: {
+                description: 'Dump a representation of the parsed script, for debugging.',
+                type: 'boolean',
+                default: false,
+            },
             expression: {
                 description: 'Specify an additional script to execute. May be specified multiple times.',
                 type: 'string',
@@ -653,6 +46,13 @@ export default {
                 multiple: true,
                 default: [],
             },
+            file: {
+                description: 'Specify a script file to execute. May be specified multiple times.',
+                type: 'string',
+                short: 'f',
+                multiple: true,
+                default: [],
+            },
             quiet: {
                 description: 'Suppress default printing of selected lines.',
                 type: 'boolean',
@@ -663,7 +63,7 @@ export default {
     },
     execute: async ctx => {
         const { out, err } = ctx.externs;
-        const { positionals, values } = ctx.locals;
+        const { positionals, values, tokens } = ctx.locals;
 
         if (positionals.length < 1) {
             await err.write('sed: No inputs given\n');
@@ -675,51 +75,34 @@ export default {
         // made, if the previous addition (if any) was from a -e option, a <newline> shall be inserted before the new
         // addition. The resulting script shall have the same properties as the script operand, described in the
         // OPERANDS section."
-        // TODO: -f loads scripts from a file
         let scriptString = '';
-        if (values.expression.length > 0) {
-            scriptString = values.expression.join('\n');
+        if (values.expression.length + values.file.length > 0) {
+            // These have to be in order, and -e and -f could be intermixed, so iterate the tokens
+            for (let token of tokens) {
+                if (token.kind !== 'option') continue;
+                if (token.name === 'expression') {
+                    scriptString += token.value + '\n';
+                    continue;
+                }
+                if (token.name === 'file') {
+                    for await (const line of fileLines(ctx, token.value)) {
+                        scriptString += line;
+                    }
+                    continue;
+                }
+            }
         } else {
             scriptString = positionals.shift();
         }
 
-        const script = parseScript(scriptString);
-        await out.write(script.dump());
-
-        const context = {
-            out: out,
-            patternSpace: '',
-            holdSpace: '\n',
-            lineNumber: 1,
-            queuedOutput: '',
-        }
-
-        // All remaining positionals are file paths to process.
-        for (const relPath of positionals) {
-            context.lineNumber = 1;
-            for await (const line of fileLines(ctx, relPath)) {
-                context.patternSpace = line.replace(/\n$/, '');
-                const result = await script.runCycle(context);
-                switch (result) {
-                    case CycleResult.Quit: {
-                        if (!values.quiet) {
-                            await out.write(context.patternSpace + '\n');
-                        }
-                        return;
-                    }
-                    case CycleResult.QuitSilent: {
-                        return;
-                    }
-                }
-                if (!values.quiet) {
-                    await out.write(context.patternSpace + '\n');
-                }
-                if (context.queuedOutput) {
-                    await out.write(context.queuedOutput + '\n');
-                    context.queuedOutput = '';
-                }
-                context.lineNumber++;
-            }
+        try {
+            const script = parseScript(scriptString, values);
+            if (values.dump)
+                await out.write(script.dump());
+            await script.run(ctx);
+        } catch (e) {
+            console.error(e);
+            await err.write(`sed: ${e.message}\n`);
         }
     }
 };

+ 134 - 0
packages/phoenix/src/puter-shell/coreutils/sed/address.js

@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2024  Puter Technologies Inc.
+ *
+ * This file is part of Phoenix Shell.
+ *
+ * Phoenix Shell is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+import { makeIndent } from './utils.js';
+
+// Either a line number or a regex
+export class Address {
+    constructor(value) {
+        this.value = value;
+    }
+
+    matches(lineNumber, line) {
+        if (this.value instanceof RegExp) {
+            return this.value.test(line);
+        }
+        return this.value === lineNumber;
+    }
+
+    isLineNumberBefore(lineNumber) {
+        return (typeof this.value === 'number') && this.value < lineNumber;
+    }
+
+    dump(indent) {
+        if (this.value instanceof RegExp) {
+            return `${makeIndent(indent)}REGEX: ${this.value}\n`;
+        }
+        return `${makeIndent(indent)}LINE: ${this.value}\n`;
+    }
+}
+
+export class AddressRange {
+    // Three kinds of AddressRange:
+    // - Empty (includes everything)
+    // - Single (matches individual line)
+    // - Range (matches lines between start and end, inclusive)
+    constructor({ start, end, inverted = false } = {}) {
+        this.start = start;
+        this.end = end;
+        this.inverted = inverted;
+        this.insideRange = false;
+        this.leaveRangeNextLine = false;
+    }
+
+    get addressCount() {
+        return (this.start ? 1 : 0) + (this.end ? 1 : 0);
+    }
+
+    updateMatchState(lineNumber, line) {
+        // Only ranges have a state to update
+        if (!(this.start && this.end)) {
+            return;
+        }
+
+        // Reset our state each time we start a new file.
+        if (lineNumber === 1) {
+            this.insideRange = false;
+            this.leaveRangeNextLine = false;
+        }
+
+        // Leave the range if the previous line matched the end.
+        if (this.leaveRangeNextLine) {
+            this.insideRange = false;
+            this.leaveRangeNextLine = false;
+        }
+
+        if (this.insideRange) {
+            // We're inside the range, does this line end it?
+            // If the end address is a line number in the past, yes, immediately.
+            if (this.end.isLineNumberBefore(lineNumber)) {
+                this.insideRange = false;
+                return;
+            }
+            // If the line matches the end address, include it but leave the range on the next line.
+            this.leaveRangeNextLine = this.end.matches(lineNumber, line);
+        } else {
+            // Does this line start the range?
+            this.insideRange = this.start.matches(lineNumber, line);
+        }
+    }
+
+    matches(lineNumber, line) {
+        const invertIfNeeded = (value) => {
+            return this.inverted ? !value : value;
+        };
+
+        // Empty - matches all lines
+        if (!this.start) {
+            return invertIfNeeded(true);
+        }
+
+        // Range
+        if (this.end) {
+            return invertIfNeeded(this.insideRange);
+        }
+
+        // Single
+        return invertIfNeeded(this.start.matches(lineNumber, line));
+    }
+
+    dump(indent) {
+        const inverted = this.inverted ? `${makeIndent(indent+1)}(INVERTED)\n` : '';
+
+        if (!this.start) {
+            return `${makeIndent(indent)}ADDRESS RANGE (EMPTY)\n`
+                + inverted;
+        }
+
+        if (this.end) {
+            return `${makeIndent(indent)}ADDRESS RANGE (RANGE):\n`
+                + inverted
+                + this.start.dump(indent+1)
+                + this.end.dump(indent+1);
+        }
+
+        return `${makeIndent(indent)}ADDRESS RANGE (SINGLE):\n`
+            + this.start.dump(indent+1)
+            + inverted;
+    }
+}

+ 514 - 0
packages/phoenix/src/puter-shell/coreutils/sed/command.js

@@ -0,0 +1,514 @@
+/*
+ * Copyright (C) 2024  Puter Technologies Inc.
+ *
+ * This file is part of Phoenix Shell.
+ *
+ * Phoenix Shell is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+import { AddressRange } from './address.js';
+import { makeIndent } from './utils.js';
+
+export const JumpLocation = {
+    None: Symbol('None'),
+    EndOfCycle: Symbol('EndOfCycle'),
+    StartOfCycle: Symbol('StartOfCycle'),
+    Label: Symbol('Label'),
+    GroupEnd: Symbol('GroupEnd'),
+    Quit: Symbol('Quit'),
+    QuitSilent: Symbol('QuitSilent'),
+};
+
+export class Command {
+    constructor(addressRange) {
+        this.addressRange = addressRange ?? new AddressRange();
+    }
+
+    updateMatchState(context) {
+        this.addressRange.updateMatchState(context.lineNumber, context.patternSpace);
+    }
+
+    async runCommand(context) {
+        if (this.addressRange.matches(context.lineNumber, context.patternSpace)) {
+            return await this.run(context);
+        }
+        return JumpLocation.None;
+    }
+
+    async run(context) {
+        throw new Error('run() not implemented for ' + this.constructor.name);
+    }
+
+    dump(indent) {
+        throw new Error('dump() not implemented for ' + this.constructor.name);
+    }
+}
+
+// '{}' - Group other commands
+export class GroupStartCommand extends Command {
+    constructor(addressRange, id) {
+        super(addressRange);
+        this.id = id;
+    }
+
+    async runCommand(context) {
+        if (!this.addressRange.matches(context.lineNumber, context.patternSpace)) {
+            context.jumpParameter = this.id;
+            return JumpLocation.GroupEnd;
+        }
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}GROUP-START: #${this.id}\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+export class GroupEndCommand extends Command {
+    constructor(id) {
+        super();
+        this.id = id;
+    }
+
+    async run(context) {
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}GROUP-END: #${this.id}\n`;
+    }
+}
+
+// ':' - Label
+export class LabelCommand extends Command {
+    constructor(label) {
+        super();
+        this.label = label;
+    }
+
+    async run(context) {
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}LABEL:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}NAME: ${this.label}\n`;
+    }
+}
+
+// '=' - Output line number
+export class LineNumberCommand extends Command {
+    constructor(addressRange) {
+        super(addressRange);
+    }
+
+    async run(context) {
+        await context.out.write(`${context.lineNumber}\n`);
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}LINE-NUMBER:\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'a' - Append text
+export class AppendTextCommand extends Command {
+    constructor(addressRange, text) {
+        super(addressRange);
+        this.text = text;
+    }
+
+    async run(context) {
+        context.queuedOutput += this.text + '\n';
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}APPEND-TEXT:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
+    }
+}
+
+// 'b' - Branch to label
+// 't' - Branch if substitution successful
+// 'T' - Branch if substitution unsuccessful
+export class BranchCommand extends Command {
+    constructor(addressRange, label, substitutionCondition) {
+        super(addressRange);
+        this.label = label;
+        this.substitutionCondition = substitutionCondition;
+    }
+
+    async run(context) {
+        if (typeof this.substitutionCondition === 'boolean') {
+            if (context.substitutionResult !== this.substitutionCondition)
+                return JumpLocation.None;
+        }
+
+        if (this.label) {
+            context.jumpParameter = this.label;
+            return JumpLocation.Label;
+        }
+        return JumpLocation.EndOfCycle;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}BRANCH:\n`
+            + `${makeIndent(indent+1)}CONDITION: ${this.substitutionCondition ?? 'ALWAYS'}\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}LABEL: ${this.label ? `'${this.label}'` : 'END'}\n`;
+    }
+}
+
+// 'c' - Replace line with text
+export class ReplaceCommand extends Command {
+    constructor(addressRange, text) {
+        super(addressRange);
+        this.text = text;
+    }
+
+    async run(context) {
+        context.patternSpace = '';
+        // Output if we're either a 0-address range, 1-address range, or 2-address on the last line.
+        if (this.addressRange.leaveRangeNextLine || !this.addressRange.end) {
+            await context.out.write(this.text + '\n');
+        }
+        return JumpLocation.EndOfCycle;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}REPLACE-TEXT:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
+    }
+}
+
+// 'd' - Delete pattern
+// 'D' - Delete first line of pattern
+export class DeleteCommand extends Command {
+    constructor(addressRange, firstLine = false) {
+        super(addressRange);
+        this.firstLine = firstLine;
+    }
+
+    async run(context) {
+        if (this.firstLine) {
+            const [ first, rest ] = context.patternSpace.split('\n', 2);
+            context.patternSpace = rest ?? '';
+            if (rest === undefined)
+                return JumpLocation.EndOfCycle;
+            return JumpLocation.StartOfCycle;
+        }
+        context.patternSpace = '';
+        return JumpLocation.EndOfCycle;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}DELETE: ${this.firstLine ? 'LINE' : 'ALL'}\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'g' - Get the held line into the pattern
+// 'G' - Get the held line and append it to the pattern
+export class GetCommand extends Command {
+    constructor(addressRange, append = false) {
+        super(addressRange);
+        this.append = append;
+    }
+
+    async run(context) {
+        if (this.append) {
+            context.patternSpace += '\n' + context.holdSpace;
+        } else {
+            context.patternSpace = context.holdSpace;
+        }
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}GET-HELD: ${this.append ? 'APPEND' : 'ALL'}\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'h' - Hold the pattern
+// 'H' - Hold append the pattern
+export class HoldCommand extends Command {
+    constructor(addressRange, append = false) {
+        super(addressRange);
+        this.append = append;
+    }
+
+    async run(context) {
+        if (this.append) {
+            context.holdSpace += '\n' + context.patternSpace;
+        } else {
+            context.holdSpace = context.patternSpace;
+        }
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}HOLD: ${this.append ? 'APPEND' : 'ALL'}\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'i' - Insert text
+export class InsertTextCommand extends Command {
+    constructor(addressRange, text) {
+        super(addressRange);
+        this.text = text;
+    }
+
+    async run(context) {
+        await context.out.write(this.text + '\n');
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}INSERT-TEXT:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}CONTENTS: '${this.text}'\n`;
+    }
+}
+
+// 'l' - Print pattern in debug format
+export class DebugPrintCommand extends Command {
+    constructor(addressRange) {
+        super(addressRange);
+    }
+
+    async run(context) {
+        let output = '';
+        for (const c of context.patternSpace) {
+            if (c < ' ') {
+                const charCode = c.charCodeAt(0);
+                switch (charCode) {
+                    case 0x07: output += '\\a'; break;
+                    case 0x08: output += '\\b'; break;
+                    case 0x0C: output += '\\f'; break;
+                    case 0x0A: output += '$\n'; break;
+                    case 0x0D: output += '\\r'; break;
+                    case 0x09: output += '\\t'; break;
+                    case 0x0B: output += '\\v'; break;
+                    default: {
+                        const octal = charCode.toString(8);
+                        output += '\\' + '0'.repeat(3 - octal.length) + octal;
+                    }
+                }
+            } else if (c === '\\') {
+                output += '\\\\';
+            }  else {
+                output += c;
+            }
+        }
+        await context.out.write(output + '\n');
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}DEBUG-PRINT:\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'p' - Print pattern
+// 'P' - Print first line of pattern
+export class PrintCommand extends Command {
+    constructor(addressRange, firstLine = false) {
+        super(addressRange);
+        this.firstLine = firstLine;
+    }
+
+    async run(context) {
+        if (this.firstLine) {
+            const firstLine = context.patternSpace.split('\n', 2)[0];
+            await context.out.write(firstLine + '\n');
+        } else {
+            await context.out.write(context.patternSpace + '\n');
+        }
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}PRINT: ${this.firstLine ? 'LINE' : 'ALL'}\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'q' - Quit
+// 'Q' - Quit, suppressing the default output
+export class QuitCommand extends Command {
+    constructor(addressRange, silent) {
+        super(addressRange);
+        this.silent = silent;
+    }
+
+    async run(context) {
+        return this.silent ? JumpLocation.QuitSilent : JumpLocation.Quit;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}QUIT:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}SILENT = '${this.silent}'\n`;
+    }
+}
+
+// 's' - Substitute
+export class SubstituteFlags {
+    constructor({ global = false, nthOccurrence = null, print = false, writeToFile = null } = {}) {
+        this.global = global;
+        this.nthOccurrence = nthOccurrence;
+        this.print = print;
+        this.writeToFile = writeToFile;
+    }
+}
+export class SubstituteCommand extends Command {
+    constructor(addressRange, regex, replacement, flags = new SubstituteFlags()) {
+        if (!(flags instanceof SubstituteFlags)) {
+            throw new Error('flags provided to SubstituteCommand must be an instance of SubstituteFlags');
+        }
+        super(addressRange);
+        this.regex = regex;
+        this.replacement = replacement;
+        this.flags = flags;
+    }
+
+    async run(context) {
+        if (this.flags.global) {
+            // replaceAll() requires that the regex have the g flag
+            const regex = new RegExp(this.regex, 'g');
+            context.substitutionResult = regex.test(context.patternSpace);
+            context.patternSpace = context.patternSpace.replaceAll(regex, this.replacement);
+        } else if (this.flags.nthOccurrence && this.flags.nthOccurrence !== 1) {
+            // Note: For n=1, it's easier to use the "replace first match" path below instead.
+
+            // matchAll() requires that the regex have the g flag
+            const matches = [...context.patternSpace.matchAll(new RegExp(this.regex, 'g'))];
+            const nthMatch = matches[this.flags.nthOccurrence - 1]; // n is 1-indexed
+            if (nthMatch !== undefined) {
+                // To only replace the Nth match:
+                // - Split the string in two, at the match position
+                // - Run the replacement on the second half
+                // - Combine that with the first half again
+                const firstHalf = context.patternSpace.substring(0, nthMatch.index);
+                const secondHalf = context.patternSpace.substring(nthMatch.index);
+                context.patternSpace = firstHalf + secondHalf.replace(this.regex, this.replacement);
+                context.substitutionResult = true;
+            } else {
+                context.substitutionResult = false;
+            }
+        } else {
+            context.substitutionResult = this.regex.test(context.patternSpace);
+            context.patternSpace = context.patternSpace.replace(this.regex, this.replacement);
+        }
+
+        if (context.substitutionResult) {
+            if  (this.flags.print) {
+                await context.out.write(context.patternSpace + '\n');
+            }
+
+            if (this.flags.writeToFile) {
+                // TODO: Implement this.
+            }
+        }
+
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}SUBSTITUTE:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}REGEX       '${this.regex}'\n`
+            + `${makeIndent(indent+1)}REPLACEMENT '${this.replacement}'\n`
+            + `${makeIndent(indent+1)}FLAGS       ${JSON.stringify(this.flags)}\n`;
+    }
+}
+
+// 'x' - Exchange hold and pattern
+export class ExchangeCommand extends Command {
+    constructor(addressRange) {
+        super(addressRange);
+    }
+
+    async run(context) {
+        const oldPattern = context.patternSpace;
+        context.patternSpace = context.holdSpace;
+        context.holdSpace = oldPattern;
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}EXCHANGE:\n`
+            + this.addressRange.dump(indent+1);
+    }
+}
+
+// 'y' - Transliterate characters
+export class TransliterateCommand extends Command {
+    constructor(addressRange, inputCharacters, replacementCharacters) {
+        super(addressRange);
+        this.inputCharacters = inputCharacters;
+        this.replacementCharacters = replacementCharacters;
+
+        if (inputCharacters.length !== replacementCharacters.length) {
+            throw new Error('inputCharacters and replacementCharacters must be the same length!');
+        }
+    }
+
+    async run(context) {
+        let newPatternSpace = '';
+        for (let i = 0; i < context.patternSpace.length; ++i) {
+            const char = context.patternSpace[i];
+            const replacementIndex = this.inputCharacters.indexOf(char);
+            if (replacementIndex !== -1) {
+                newPatternSpace += this.replacementCharacters[replacementIndex];
+                continue;
+            }
+            newPatternSpace += char;
+        }
+        context.patternSpace = newPatternSpace;
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}TRANSLITERATE:\n`
+            + this.addressRange.dump(indent+1)
+            + `${makeIndent(indent+1)}FROM '${this.inputCharacters}'\n`
+            + `${makeIndent(indent+1)}TO   '${this.replacementCharacters}'\n`;
+    }
+}
+
+// 'z' - Zap, delete the pattern without ending cycle
+export class ZapCommand extends Command {
+    constructor(addressRange) {
+        super(addressRange);
+    }
+
+    async run(context) {
+        context.patternSpace = '';
+        return JumpLocation.None;
+    }
+
+    dump(indent) {
+        return `${makeIndent(indent)}ZAP:\n`
+            + this.addressRange.dump(indent+1);
+    }
+}

+ 529 - 0
packages/phoenix/src/puter-shell/coreutils/sed/parser.js

@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) 2024  Puter Technologies Inc.
+ *
+ * This file is part of Phoenix Shell.
+ *
+ * Phoenix Shell is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+import { Address, AddressRange } from './address.js';
+import {
+    AppendTextCommand,
+    BranchCommand,
+    DebugPrintCommand,
+    DeleteCommand,
+    ExchangeCommand,
+    GetCommand,
+    GroupEndCommand,
+    GroupStartCommand,
+    HoldCommand,
+    InsertTextCommand,
+    LabelCommand,
+    LineNumberCommand,
+    PrintCommand,
+    QuitCommand,
+    ReplaceCommand,
+    SubstituteCommand,
+    SubstituteFlags,
+    TransliterateCommand,
+    ZapCommand,
+} from './command.js';
+import { Script } from './script.js';
+import { GrammarContext, standard_parsers } from '../../../../packages/parsely/exports.js';
+import { StringStream } from '../../../../packages/parsely/streams.js';
+import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../../../../packages/parsely/parser.js';
+
+/**
+ * A slight hack: Parsely doesn't yet have an equivalent of backreferences.
+ * So, while parsing /foo/bar/, where the `/` can be any character, we set the current_delimiter variable
+ * to that delimiter character temporarily, so we can refer to it in the subsequent delimiters.
+ */
+class DelimiterParser extends Parser {
+    static current_delimiter;
+
+    _create({ first = false, character = null } = {}) {
+        this.character = character;
+        this.first = first;
+    }
+
+    _parse(stream) {
+        const sub_stream = stream.fork();
+
+        let { done, value } = sub_stream.next();
+        if (done) return UNRECOGNIZED;
+
+        if (this.first) {
+            if (this.character && this.character !== value)
+                return UNRECOGNIZED;
+            // Backslash and newline are disallowed as delimiters.
+            if (value === '\n' || value === '\\')
+                return UNRECOGNIZED;
+            DelimiterParser.current_delimiter = value;
+        } else if (DelimiterParser.current_delimiter !== value) {
+            return UNRECOGNIZED;
+        }
+
+        stream.join(sub_stream);
+        return { status: VALUE, $: 'delimiter', value };
+    }
+}
+
+export const parseScript = (script_string, options) => {
+
+    const grammar_context = new GrammarContext({
+        ...standard_parsers(),
+        delimiter: DelimiterParser,
+    });
+
+    let group_start_id = 0;
+    let group_end_id = 0;
+
+    const parser = grammar_context.define_parser({
+        script: a => a.repeat(
+            a.optional(a.symbol('command')),
+            a.firstMatch(
+                a.literal('\n'),
+                a.literal(';'),
+            ),
+        ),
+        command: a => a.sequence(
+            a.symbol('whitespace'),
+            a.optional(a.symbol('address_range')),
+            a.symbol('whitespace'),
+            a.firstMatch(
+                a.discard(a.symbol('comment')),
+                a.symbol('{'),
+                a.symbol('}'),
+                a.symbol(':'),
+                a.symbol('='),
+                a.symbol('a'),
+                a.symbol('b'),
+                a.symbol('c'),
+                a.symbol('d'),
+                a.symbol('D'),
+                a.symbol('g'),
+                a.symbol('G'),
+                a.symbol('h'),
+                a.symbol('H'),
+                a.symbol('i'),
+                a.symbol('l'),
+                a.symbol('p'),
+                a.symbol('P'),
+                a.symbol('q'),
+                a.symbol('Q'),
+                a.symbol('s'),
+                a.symbol('t'),
+                a.symbol('T'),
+                a.symbol('x'),
+                a.symbol('y'),
+                a.symbol('z'),
+            ),
+        ),
+        address_range: a => a.sequence(
+            a.optional(
+                a.sequence(
+                    a.symbol('address'),
+                    a.optional(a.sequence(
+                        a.literal(','),
+                        a.symbol('address'),
+                    )),
+                ),
+            ),
+            a.optional(
+                a.sequence(
+                    a.symbol('whitespace'),
+                    a.literal('!'),
+                ),
+            ),
+        ),
+        address: a => a.firstMatch(
+            // TODO: A dollar sign, for "final line"
+            a.symbol('decimal_number'),
+            a.symbol('regex'),
+        ),
+        decimal_number: a => a.stringOf(c => /\d/.test(c)),
+        regex: a => a.sequence(
+            a.firstMatch(
+                a.delimiter({ first: true, character: '/' }),
+                a.sequence(
+                    a.literal('\\'),
+                    a.delimiter({ first: true }),
+                ),
+            ),
+            a.stringUntil(c => c === DelimiterParser.current_delimiter),
+            a.delimiter(),
+        ),
+        whitespace: a => a.discard(
+            a.optional(
+                a.stringOf(c => /[ \t]/.test(c)),
+            ),
+        ),
+        label: a => a.stringOf(c => {
+            // POSIX defines this as being characters within "the portable filename character set".
+            return /[A-Za-z0-9.\-_]/.test(c);
+        }),
+        filename: a => a.stringOf(c => {
+            return /[A-Za-z0-9.\-_]/.test(c);
+        }),
+        text: a => a.stringUntil('\n'),
+        comment: a => a.sequence(
+            a.literal('#'),
+            a.stringOf(c => c !== '\n'),
+        ),
+        '{': a => a.literal('{'),
+        '}': a => a.literal('}'),
+        ':': a => a.sequence(
+            a.literal(':'),
+            a.symbol('label'),
+        ),
+        '=': a => a.literal('='),
+        a: a => a.sequence(
+            a.literal('a\\\n'),
+            a.symbol('text'),
+        ),
+        b: a => a.sequence(
+            a.literal('b'),
+            a.optional(
+                a.sequence(
+                    a.symbol('whitespace'),
+                    a.symbol('label'),
+                ),
+            ),
+        ),
+        c: a => a.sequence(
+            a.literal('c\\\n'),
+            a.symbol('text'),
+        ),
+        d: a => a.literal('d'),
+        D: a => a.literal('D'),
+        g: a => a.literal('g'),
+        G: a => a.literal('G'),
+        h: a => a.literal('h'),
+        H: a => a.literal('H'),
+        i: a => a.sequence(
+            a.literal('i\\\n'),
+            a.symbol('text'),
+        ),
+        l: a => a.literal('l'),
+        p: a => a.literal('p'),
+        P: a => a.literal('P'),
+        q: a => a.literal('q'),
+        Q: a => a.literal('Q'),
+        s: a => a.sequence(
+            a.literal('s'),
+            a.delimiter({ first: true }),
+            a.stringUntil(c => c === DelimiterParser.current_delimiter),
+            a.delimiter(),
+            a.stringUntil(c => c === DelimiterParser.current_delimiter),
+            a.delimiter(),
+            a.optional(
+                a.repeat(
+                    a.firstMatch(
+                        a.literal('g'),
+                        a.literal('p'),
+                        a.symbol('decimal_number'),
+                        a.sequence(
+                            a.literal('w'),
+                            a.symbol('whitespace'),
+                            a.symbol('filename'),
+                        ),
+                    ),
+                ),
+            ),
+        ),
+        t: a => a.sequence(
+            a.literal('t'),
+            a.optional(
+                a.sequence(
+                    a.symbol('whitespace'),
+                    a.symbol('label'),
+                ),
+            ),
+        ),
+        T: a => a.sequence(
+            a.literal('T'),
+            a.optional(
+                a.sequence(
+                    a.symbol('whitespace'),
+                    a.symbol('label'),
+                ),
+            ),
+        ),
+        x: a => a.literal('x'),
+        y: a => a.sequence(
+            a.literal('y'),
+            a.delimiter({ first: true }),
+            a.stringUntil(c => c === DelimiterParser.current_delimiter),
+            a.delimiter(),
+            a.stringUntil(c => c === DelimiterParser.current_delimiter),
+            a.delimiter(),
+        ),
+        z: a => a.literal('z'),
+    }, {
+        script: script => {
+            const commands = script
+                .filter(it => {
+                    return it.$ === 'command' && it.value;
+                }).map(it => {
+                    return it.value;
+                });
+
+            // Record all labels that exist in the script, so we can validate branch commands.
+            const labels = new Set();
+            for (const command of commands) {
+                if (command instanceof LabelCommand) {
+                    labels.add(command.label);
+                }
+            }
+
+            // Validate commands
+            let group_depth = 0;
+            for (const command of commands) {
+                // Ensure branches all go to labels that exist
+                if (command instanceof BranchCommand) {
+                    // Note: Branches to the end of the script don't have a label.
+                    if (command.label && !labels.has(command.label))
+                        throw new Error(`Label "${command.label}" does not exist in the script.`);
+                }
+
+                if (command instanceof GroupStartCommand) {
+                    group_depth++;
+                }
+
+                if (command instanceof GroupEndCommand) {
+                    if (group_depth < 1)
+                        throw new Error('Unexpected "}": no open groups');
+                    group_depth--;
+                }
+            }
+
+            if (group_depth !== 0)
+                throw new Error(`${group_depth} groups left open`);
+
+            return new Script(commands);
+        },
+        command: command => {
+            // Comments show up as empty commands. Just skip them.
+            if (command.length === 0)
+                return;
+
+            let addresses_provided = 0;
+            let address_range, func;
+            switch (command.length) {
+                case 1:
+                    address_range = new AddressRange();
+                    func = command[0];
+                    break;
+                default:
+                    address_range = command[0].value;
+                    func = command[1];
+                    addresses_provided = address_range.addressCount;
+                    break;
+            }
+
+            const require_max_address_count = (count) => {
+                if (addresses_provided > count)
+                    throw new Error(`Too many addresses provided to '${func.$}' command, most is ${count}`);
+            }
+
+            // Decode func into its command type
+            switch (func.$) {
+                case '{': {
+                    require_max_address_count(2);
+                    return new GroupStartCommand(address_range, ++group_start_id);
+                }
+                case '}': {
+                    require_max_address_count(0);
+                    return new GroupEndCommand(++group_end_id);
+                }
+                case ':': {
+                    require_max_address_count(0);
+                    return new LabelCommand(func.value);
+                }
+                case '=': {
+                    require_max_address_count(1);
+                    return new LineNumberCommand(address_range);
+                }
+                case 'a': {
+                    require_max_address_count(1);
+                    return new AppendTextCommand(address_range, func.value);
+                }
+                case 'b': {
+                    require_max_address_count(2);
+                    return new BranchCommand(address_range, func.value);
+                }
+                case 'c': {
+                    require_max_address_count(2);
+                    return new ReplaceCommand(address_range, func.value);
+                }
+                case 'd':
+                case 'D': {
+                    require_max_address_count(2);
+                    return new DeleteCommand(address_range, func.$ === 'D');
+                }
+                case 'g':
+                case 'G': {
+                    require_max_address_count(2);
+                    return new GetCommand(address_range, func.$ === 'G');
+                }
+                case 'h':
+                case 'H': {
+                    require_max_address_count(2);
+                    return new HoldCommand(address_range, func.$ === 'H');
+                }
+                case 'i': {
+                    require_max_address_count(1);
+                    return new InsertTextCommand(address_range, func.value);
+                }
+                case 'l': {
+                    require_max_address_count(2);
+                    return new DebugPrintCommand(address_range);
+                }
+                case 'p':
+                case 'P': {
+                    require_max_address_count(2);
+                    return new PrintCommand(address_range, func.$ === 'P');
+                }
+                case 'q':
+                case 'Q': {
+                    require_max_address_count(1);
+                    return new QuitCommand(address_range, func.$ === 'Q');
+                }
+                case 's': {
+                    require_max_address_count(2);
+                    const { regex, replacement, flags } = func.value;
+                    return new SubstituteCommand(address_range, regex, replacement, flags);
+                }
+                case 't':
+                case 'T': {
+                    require_max_address_count(2);
+                    return new BranchCommand(address_range, func.value, func.$ === 't');
+                }
+                case 'x': {
+                    require_max_address_count(2);
+                    return new ExchangeCommand(address_range);
+                }
+                case 'y': {
+                    require_max_address_count(2);
+                    const { input, replacement } = func.value;
+                    return new TransliterateCommand(address_range, input, replacement);
+                }
+                case 'z': {
+                    require_max_address_count(2);
+                    return new ZapCommand(address_range);
+                }
+                default:
+                    throw new Error(`Unimplemented command '${func.$}'`);
+            }
+        },
+        address_range: address_range => {
+            if (address_range.length === 0)
+                return new AddressRange();
+
+            if (address_range.length === 1) {
+                if (address_range[0].value[0].$ === 'address') {
+                    // Either 1 or two addresses
+                    const parts = address_range[0].value;
+                    const start = parts[0].value;
+                    const end = parts[1] ? parts[1].value[1].value : null;
+                    return new AddressRange({ start, end });
+                }
+
+                // No addresses, just inverted
+                return new AddressRange({ inverted: true });
+            }
+
+            // Addresses and inverted
+            const parts = address_range[0].value;
+            const start = parts[0].value;
+            const end = parts[1] ? parts[1].value[1].value : null;
+            return new AddressRange({ start, end, inverted: true });
+        },
+        address: address => {
+            if (address instanceof RegExp)
+                return new Address(address);
+            return new Address(Number(address));
+        },
+        regex: regex => new RegExp(regex[1].value),
+
+        // Functions with arguments
+        ':': it => it[1].value,
+        a: it => it[1].value,
+        b: it => {
+            if (it.length < 2) return null;
+            return it[1].value[0].value;
+        },
+        c: it => it[1].value,
+        i: it => it[1].value,
+        s: it => {
+            const [ s, _, regex, __, replacement, ___, flag_values ] = it;
+            const flags = {
+                global: false,
+                nthOccurrence: null,
+                print: false,
+                writeToFile: null,
+            };
+            if (flag_values && flag_values.value.length) {
+                for (const flag of flag_values.value) {
+                    if (flag.value instanceof Array) {
+                        // It's a 'w'
+                        if (flags.writeToFile)
+                            throw new Error(`Multiple 'w' flags given to s command`);
+                        flags.writeToFile = flag.value[1].value;
+
+                    } else if (flag.value === 'g') {
+                        if (flags.global)
+                            throw new Error(`Multiple 'g' flags given to s command`);
+                        flags.global = true;
+
+                    } else if (flag.value === 'p') {
+                        if (flags.print)
+                            throw new Error(`Multiple 'p' flags given to s command`);
+                        flags.print = true;
+
+                    } else {
+                        // Should be a number
+                        if (flags.nthOccurrence !== null)
+                            throw new Error(`Multiple number flags given to s command`);
+                        flags.nthOccurrence = Number.parseInt(flag.value);
+                    }
+                }
+            }
+            return {
+                regex: new RegExp(regex.value),
+                replacement: replacement.value,
+                flags: new SubstituteFlags(flags),
+            };
+        },
+        t: it => {
+            if (it.length < 2) return null;
+            return it[1].value[0].value;
+        },
+        T: it => {
+            if (it.length < 2) return null;
+            return it[1].value[0].value;
+        },
+        y: it => {
+            const input = it[2].value;
+            const replacement = it[4].value;
+            if (input.length !== replacement.length)
+                throw new Error('Input and replacement parts of y command must have the same length');
+
+            return { input, replacement };
+        }
+    });
+
+    const stream = new StringStream(script_string);
+    const result = parser(stream, 'script', { must_consume_all_input: true });
+    return result.value;
+}

+ 123 - 0
packages/phoenix/src/puter-shell/coreutils/sed/script.js

@@ -0,0 +1,123 @@
+/*
+ * Copyright (C) 2024  Puter Technologies Inc.
+ *
+ * This file is part of Phoenix Shell.
+ *
+ * Phoenix Shell is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+import { JumpLocation, LabelCommand, GroupEndCommand } from './command.js';
+import { fileLines } from '../../../util/file.js';
+
+const CycleResult = {
+    Continue: Symbol('Continue'),
+    Quit: Symbol('Quit'),
+    QuitSilent: Symbol('QuitSilent'),
+};
+
+export class Script {
+    constructor(commands) {
+        this.commands = commands;
+    }
+
+    async runCycle(context) {
+        let i = 0;
+        while (i < this.commands.length) {
+            const command = this.commands[i];
+            command.updateMatchState(context);
+            const result = await command.runCommand(context);
+            switch (result) {
+                case JumpLocation.Label: {
+                    const label = context.jumpParameter;
+                    context.jumpParameter = null;
+                    const foundIndex = this.commands.findIndex(c => c instanceof LabelCommand && c.label === label);
+                    if (foundIndex === -1) {
+                        // TODO: Check for existence of labels during parsing too.
+                        throw new Error(`Label ':${label}' not found.`);
+                    }
+                    i = foundIndex;
+                    break;
+                }
+                case JumpLocation.GroupEnd: {
+                    const groupId = context.jumpParameter;
+                    context.jumpParameter = null;
+                    const foundIndex = this.commands.findIndex(c => c instanceof GroupEndCommand && c.id === groupId);
+                    if (foundIndex === -1) {
+                        // TODO: Check for matching groups during parsing too.
+                        throw new Error(`Matching } for group #${groupId} not found.`);
+                    }
+                    i = foundIndex;
+                    break;
+                }
+                case JumpLocation.Quit:
+                    return CycleResult.Quit;
+                case JumpLocation.QuitSilent:
+                    return CycleResult.QuitSilent;
+                case JumpLocation.StartOfCycle:
+                    i = 0;
+                    continue;
+                case JumpLocation.EndOfCycle:
+                    return CycleResult.Continue;
+                case JumpLocation.None:
+                    i++;
+                    break;
+            }
+        }
+    }
+
+    async run(ctx) {
+        const { out, err } = ctx.externs;
+        const { positionals, values } = ctx.locals;
+
+        const context = {
+            out: ctx.externs.out,
+            patternSpace: '',
+            holdSpace: '\n',
+            lineNumber: 1,
+            queuedOutput: '',
+        };
+
+        // All remaining positionals are file paths to process.
+        for (const relPath of positionals) {
+            context.lineNumber = 1;
+            for await (const line of fileLines(ctx, relPath)) {
+                context.patternSpace = line.replace(/\n$/, '');
+                const result = await this.runCycle(context);
+                switch (result) {
+                    case CycleResult.Quit: {
+                        if (!values.quiet) {
+                            await out.write(context.patternSpace + '\n');
+                        }
+                        return;
+                    }
+                    case CycleResult.QuitSilent: {
+                        return;
+                    }
+                }
+                if (!values.quiet) {
+                    await out.write(context.patternSpace + '\n');
+                }
+                if (context.queuedOutput) {
+                    await out.write(context.queuedOutput + '\n');
+                    context.queuedOutput = '';
+                }
+                context.lineNumber++;
+            }
+        }
+    }
+
+    dump() {
+        return `SCRIPT:\n`
+            + this.commands.map(command => command.dump(1)).join('');
+    }
+}

+ 21 - 0
packages/phoenix/src/puter-shell/coreutils/sed/utils.js

@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2024  Puter Technologies Inc.
+ *
+ * This file is part of Phoenix Shell.
+ *
+ * Phoenix Shell is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published
+ * by the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+export function makeIndent(size) {
+    return '  '.repeat(size);
+}