root
/
HeyPuter
oglindă de https://github.com/HeyPuter/puter.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
							// METADATA // {"ai-commented":{"service":"claude"}}
/*
 * Copyright (C) 2024-present Puter Technologies Inc.
 * 
 * This file is part of Puter.
 * 
 * Puter is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

const lib = {};
lib.dedent_lines = lines => {
    // If any lines are just spaces, remove the spaces
    for ( let i=0 ; i < lines.length ; i++ ) {
        if ( /^\s+$/.test(lines[i]) ) lines[i] = '';
    }
    
    // Remove leading and trailing blanks
    while ( lines[0] === '' ) lines.shift();
    while ( lines[lines.length-1] === '' ) lines.pop();

    let min_indent = Number.MAX_SAFE_INTEGER;
    for ( let i=0 ; i < lines.length ; i++ ) {
        if ( lines[i] === '' ) continue;
        let n_spaces = 0;
        for ( let j=0 ; j < lines[i].length ; j++ ) {
            if ( lines[i][j] === ' ' ) n_spaces++;
            else break;
        }
        if ( n_spaces < min_indent ) min_indent = n_spaces;
    }
    for ( let i=0 ; i < lines.length ; i++ ) {
        if ( lines[i] === '' ) continue;
        lines[i] = lines[i].slice(min_indent);
    }
};


/**
* Creates a StringStream object for parsing a string with position tracking
* @param {string} str - The string to parse
* @param {Object} [options] - Optional configuration object
* @param {Object} [options.state_] - Initial state with position
* @returns {Object} StringStream instance with parsing methods
*/
const StringStream = (str, { state_ } = {}) => {
    const state = state_ ?? { pos: 0 };
    return {
        skip_whitespace () {
            while ( /^\s/.test(str[state.pos]) ) state.pos++;
        },
        // INCOMPLETE: only handles single chars
        skip_matching (items) {
            while ( items.some(item => {
                return str[state.pos] === item;
            }) ) state.pos++;
        },
        fwd (amount) {
            state.pos += amount ?? 1;
        },
        fork () {
            return StringStream(str, { state_: { pos: state.pos } });
        },
        async get_pos () {
            return state.pos;
        },
        async get_char () {
            return str[state.pos];
        },
        async matches (re_or_lit) {
            if ( re_or_lit instanceof RegExp ) {
                const re = re_or_lit;
                return re.test(str.slice(state.pos));
            }
            
            const lit = re_or_lit;
            return lit === str.slice(state.pos, state.pos + lit.length);
        },
        async get_until (re_or_lit) {
            let index;
            if ( re_or_lit instanceof RegExp ) {
                const re = re_or_lit;
                const result = re.exec(str.slice(state.pos));
                if ( ! result ) return;
                index = state.pos + result.index;
            } else {
                const lit = re_or_lit;
                const ind = str.slice(state.pos).indexOf(lit);
                // TODO: parser warnings?
                if ( ind === -1 ) return;
                index = state.pos + ind;
            }
            const start_pos = state.pos;
            state.pos = index;
            return str.slice(start_pos, index);
        },
        async debug () {
            const l1 = str.length;
            const l2 = str.length - state.pos;
            const clean = s => s.replace(/\n/, '{LF}');
            return `[stream : "${
                clean(str.slice(0, Math.min(6, l1)))
            }"... |${state.pos}| ..."${
                clean(str.slice(state.pos, state.pos + Math.min(6, l2)))
            }"]`
        }
    };
};

const LinesCommentParser = ({
    prefix
}) => {
    return {
        parse: async (stream) => {
            stream.skip_whitespace();
            const lines = [];
            while ( await stream.matches(prefix) ) {
                const line = await stream.get_until('\n');
                if ( ! line ) return;
                lines.push(line);
                stream.fwd();
                stream.skip_matching([' ', '\t']);
                if ( await stream.get_char() === '\n' ){
                    stream.fwd();
                    break;
                }
                stream.skip_whitespace();
            }
            if ( lines.length === 0 ) return;
            for ( let i=0 ; i < lines.length ; i++ ) {
                lines[i] = lines[i].slice(prefix.length);
            }
            lib.dedent_lines(lines);
            return {
                lines,
            };
        }
    };
};

const BlockCommentParser = ({
    start,
    end,
    ignore_line_prefix,
}) => {
    return {
        parse: async (stream) => {
            stream.skip_whitespace();
            if ( ! await stream.matches(start) ) return;
            stream.fwd(start.length);
            const contents = await stream.get_until(end);
            if ( ! contents ) return;
            stream.fwd(end.length);
            // console.log('ending at', await stream.debug())
            const lines = contents.split('\n');
            
            // === Formatting Time! === //
            
            // Special case: remove the last '*' after '/**'
            if ( lines[0].trim() === ignore_line_prefix ) {
                lines.shift();
            }
            
            // First dedent pass
            lib.dedent_lines(lines);
            
            // If all the lines start with asterisks, remove
            let allofem = true;
            for ( let i=0 ; i < lines.length ; i++ ) {
                if ( lines[i] === '' ) continue;
                if ( ! lines[i].startsWith(ignore_line_prefix) ) {
                    allofem = false;
                    break
                }
            }
            
            if ( allofem ) {
                for ( let i=0 ; i < lines.length ; i++ ) {
                    if ( lines[i] === '' ) continue;
                    lines[i] = lines[i].slice(ignore_line_prefix.length);
                }
                
                // Second dedent pass
                lib.dedent_lines(lines);
            }
            
            return { lines };
        }
    };
};


/**
* Creates a writer for line-style comments with a specified prefix
* @param {Object} options - Configuration options
* @param {string} options.prefix - The prefix to use for each comment line
* @returns {Object} A comment writer object
*/
const LinesCommentWriter = ({ prefix }) => {
    return {
        write: (lines) => {
            lib.dedent_lines(lines);
            for ( let i=0 ; i < lines.length ; i++ ) {
                lines[i] = prefix + lines[i];
            }
            return lines.join('\n') + '\n';
        }
    };
};


/**
* Creates a block comment writer with specified start/end markers and prefix
* @param {Object} options - Configuration options
* @param {string} options.start - Comment start marker (e.g. "/*")
* @param {string} options.end - Comment end marker (e.g. "* /") 
* @param {string} options.prefix - Line prefix within comment (e.g. " * ")
* @returns {Object} Block comment writer object
*/
const BlockCommentWriter = ({ start, end, prefix }) => {
    return {
        write: (lines) => {
            lib.dedent_lines(lines);
            for ( let i=0 ; i < lines.length ; i++ ) {
                lines[i] = prefix + lines[i];
            }
            let s = start + '\n';
            s += lines.join('\n') + '\n';
            s += end + '\n';
            return s;
        }
    };
};


/**
* Creates a new CommentParser instance for parsing and handling source code comments
* 
* @returns {Object} An object with methods:
*   - supports: Checks if a file type is supported
*   - extract_top_comments: Extracts comments from source code
*   - output_comment: Formats and outputs comments in specified style
*/
const CommentParser = () => {
    const registry_ = {
        object: {
            parsers: {
                lines: LinesCommentParser,
                block: BlockCommentParser,
            },
            writers: {
                lines: LinesCommentWriter,
                block: BlockCommentWriter,
            },
        },
        data: {
            extensions: {
                js: 'javascript',
                cjs: 'javascript',
                mjs: 'javascript',
            },
            languages: {
                javascript: {
                    parsers: [
                        ['lines', {
                            prefix: '//',
                        }],
                        ['block', {
                            start: '/*',
                            end: '*/',
                            ignore_line_prefix: '*',
                        }],
                    ],
                    writers: {
                        lines: ['lines', {
                            prefix: '// '
                        }],
                        block: ['block', {
                            start: '/*',
                            end: ' */',
                            prefix: ' * ',
                        }]
                    },
                }
            },
        }
        
    };
    

    /**
    * Gets the language configuration for a given filename by extracting and validating its extension
    * @param {Object} params - The parameters object
    * @param {string} params.filename - The filename to get the language for
    * @returns {Object} Object containing the language configuration
    */
    const get_language_by_filename = ({ filename }) => {
        const { language } = (({ filename }) => {
            const { language_id } = (({ filename }) => {
                const { extension } = (({ filename }) => {
                    const components = ('' + filename).split('.');
                    const extension = components[components.length - 1];
                    return { extension };
                })({ filename });
                
                const language_id = registry_.data.extensions[extension];
                
                if ( ! language_id ) {
                    throw new Error(`unrecognized language id: ` +
                        language_id);
                }
                return { language_id };
            })({ filename });
            
            const language = registry_.data.languages[language_id];
            return { language };
        })({ filename });

        if ( ! language ) {
            // TODO: use strutil quot here
            throw new Error(`unrecognized language: ${language}`)
        }
        
        return { language };
    }
    

    /**
    * Checks if a given filename is supported by the comment parser
    * @param {Object} params - The parameters object
    * @param {string} params.filename - The filename to check support for
    * @returns {boolean} Whether the file type is supported
    */
    const supports = ({ filename }) => {
        try {
            get_language_by_filename({ filename });
        } catch (e) {
            return false;
        }
        return true;
    };
    
    const extract_top_comments = async ({ filename, source }) => {
        const { language } = get_language_by_filename({ filename });
        
        // TODO: registry has `data` and `object`...
        //       ... maybe add `virt` (virtual), which will
        //       behave in the way the above code is written.

        const inst_ = spec => registry_.object.parsers[spec[0]](spec[1]);
        
        let ss = StringStream(source);
        const results = [];
        for (;;) {
            let comment;
            for ( let parser of language.parsers ) {
                const parser_name = parser[0];
                parser = inst_(parser);

                const ss_ = ss.fork();
                const start_pos = await ss_.get_pos();
                comment = await parser.parse(ss_);
                const end_pos = await ss_.get_pos();
                if ( comment ) {
                    ss = ss_;
                    comment.type = parser_name;
                    comment.range = [start_pos, end_pos];
                    break;
                }
            }
            // console.log('comment?', comment);
            if ( ! comment ) break;
            results.push(comment);
        }
        
        return results;
    }
    

    /**
    * Outputs a comment in the specified style for a given filename and text
    * @param {Object} params - The parameters object
    * @param {string} params.filename - The filename to determine comment style
    * @param {string} params.style - The comment style to use ('lines' or 'block')
    * @param {string} params.text - The text content of the comment
    * @returns {string} The formatted comment string
    */
    const output_comment = ({ filename, style, text }) => {
        const { language } = get_language_by_filename({ filename });
        
        const inst_ = spec => registry_.object.writers[spec[0]](spec[1]);
        let writer = language.writers[style];
        writer = inst_(writer);
        const lines = text.split('\n');
        const s = writer.write(lines);
        return s;
    }
    
    return {
        supports,
        extract_top_comments,
        output_comment,
    };
};

module.exports = {
    StringStream,
    LinesCommentParser,
    BlockCommentParser,
    CommentParser,
};