main.js 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. // METADATA // {"ai-commented":{"service":"claude"}}
  2. /*
  3. * Copyright (C) 2024-present Puter Technologies Inc.
  4. *
  5. * This file is part of Puter.
  6. *
  7. * Puter is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published
  9. * by the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. */
  20. const lib = {};
  21. lib.dedent_lines = lines => {
  22. // If any lines are just spaces, remove the spaces
  23. for ( let i=0 ; i < lines.length ; i++ ) {
  24. if ( /^\s+$/.test(lines[i]) ) lines[i] = '';
  25. }
  26. // Remove leading and trailing blanks
  27. while ( lines[0] === '' ) lines.shift();
  28. while ( lines[lines.length-1] === '' ) lines.pop();
  29. let min_indent = Number.MAX_SAFE_INTEGER;
  30. for ( let i=0 ; i < lines.length ; i++ ) {
  31. if ( lines[i] === '' ) continue;
  32. let n_spaces = 0;
  33. for ( let j=0 ; j < lines[i].length ; j++ ) {
  34. if ( lines[i][j] === ' ' ) n_spaces++;
  35. else break;
  36. }
  37. if ( n_spaces < min_indent ) min_indent = n_spaces;
  38. }
  39. for ( let i=0 ; i < lines.length ; i++ ) {
  40. if ( lines[i] === '' ) continue;
  41. lines[i] = lines[i].slice(min_indent);
  42. }
  43. };
  44. /**
  45. * Creates a StringStream object for parsing a string with position tracking
  46. * @param {string} str - The string to parse
  47. * @param {Object} [options] - Optional configuration object
  48. * @param {Object} [options.state_] - Initial state with position
  49. * @returns {Object} StringStream instance with parsing methods
  50. */
  51. const StringStream = (str, { state_ } = {}) => {
  52. const state = state_ ?? { pos: 0 };
  53. return {
  54. skip_whitespace () {
  55. while ( /^\s/.test(str[state.pos]) ) state.pos++;
  56. },
  57. // INCOMPLETE: only handles single chars
  58. skip_matching (items) {
  59. while ( items.some(item => {
  60. return str[state.pos] === item;
  61. }) ) state.pos++;
  62. },
  63. fwd (amount) {
  64. state.pos += amount ?? 1;
  65. },
  66. fork () {
  67. return StringStream(str, { state_: { pos: state.pos } });
  68. },
  69. async get_pos () {
  70. return state.pos;
  71. },
  72. async get_char () {
  73. return str[state.pos];
  74. },
  75. async matches (re_or_lit) {
  76. if ( re_or_lit instanceof RegExp ) {
  77. const re = re_or_lit;
  78. return re.test(str.slice(state.pos));
  79. }
  80. const lit = re_or_lit;
  81. return lit === str.slice(state.pos, state.pos + lit.length);
  82. },
  83. async get_until (re_or_lit) {
  84. let index;
  85. if ( re_or_lit instanceof RegExp ) {
  86. const re = re_or_lit;
  87. const result = re.exec(str.slice(state.pos));
  88. if ( ! result ) return;
  89. index = state.pos + result.index;
  90. } else {
  91. const lit = re_or_lit;
  92. const ind = str.slice(state.pos).indexOf(lit);
  93. // TODO: parser warnings?
  94. if ( ind === -1 ) return;
  95. index = state.pos + ind;
  96. }
  97. const start_pos = state.pos;
  98. state.pos = index;
  99. return str.slice(start_pos, index);
  100. },
  101. async debug () {
  102. const l1 = str.length;
  103. const l2 = str.length - state.pos;
  104. const clean = s => s.replace(/\n/, '{LF}');
  105. return `[stream : "${
  106. clean(str.slice(0, Math.min(6, l1)))
  107. }"... |${state.pos}| ..."${
  108. clean(str.slice(state.pos, state.pos + Math.min(6, l2)))
  109. }"]`
  110. }
  111. };
  112. };
  113. const LinesCommentParser = ({
  114. prefix
  115. }) => {
  116. return {
  117. parse: async (stream) => {
  118. stream.skip_whitespace();
  119. const lines = [];
  120. while ( await stream.matches(prefix) ) {
  121. const line = await stream.get_until('\n');
  122. if ( ! line ) return;
  123. lines.push(line);
  124. stream.fwd();
  125. stream.skip_matching([' ', '\t']);
  126. if ( await stream.get_char() === '\n' ){
  127. stream.fwd();
  128. break;
  129. }
  130. stream.skip_whitespace();
  131. }
  132. if ( lines.length === 0 ) return;
  133. for ( let i=0 ; i < lines.length ; i++ ) {
  134. lines[i] = lines[i].slice(prefix.length);
  135. }
  136. lib.dedent_lines(lines);
  137. return {
  138. lines,
  139. };
  140. }
  141. };
  142. };
  143. const BlockCommentParser = ({
  144. start,
  145. end,
  146. ignore_line_prefix,
  147. }) => {
  148. return {
  149. parse: async (stream) => {
  150. stream.skip_whitespace();
  151. if ( ! await stream.matches(start) ) return;
  152. stream.fwd(start.length);
  153. const contents = await stream.get_until(end);
  154. if ( ! contents ) return;
  155. stream.fwd(end.length);
  156. // console.log('ending at', await stream.debug())
  157. const lines = contents.split('\n');
  158. // === Formatting Time! === //
  159. // Special case: remove the last '*' after '/**'
  160. if ( lines[0].trim() === ignore_line_prefix ) {
  161. lines.shift();
  162. }
  163. // First dedent pass
  164. lib.dedent_lines(lines);
  165. // If all the lines start with asterisks, remove
  166. let allofem = true;
  167. for ( let i=0 ; i < lines.length ; i++ ) {
  168. if ( lines[i] === '' ) continue;
  169. if ( ! lines[i].startsWith(ignore_line_prefix) ) {
  170. allofem = false;
  171. break
  172. }
  173. }
  174. if ( allofem ) {
  175. for ( let i=0 ; i < lines.length ; i++ ) {
  176. if ( lines[i] === '' ) continue;
  177. lines[i] = lines[i].slice(ignore_line_prefix.length);
  178. }
  179. // Second dedent pass
  180. lib.dedent_lines(lines);
  181. }
  182. return { lines };
  183. }
  184. };
  185. };
  186. /**
  187. * Creates a writer for line-style comments with a specified prefix
  188. * @param {Object} options - Configuration options
  189. * @param {string} options.prefix - The prefix to use for each comment line
  190. * @returns {Object} A comment writer object
  191. */
  192. const LinesCommentWriter = ({ prefix }) => {
  193. return {
  194. write: (lines) => {
  195. lib.dedent_lines(lines);
  196. for ( let i=0 ; i < lines.length ; i++ ) {
  197. lines[i] = prefix + lines[i];
  198. }
  199. return lines.join('\n') + '\n';
  200. }
  201. };
  202. };
  203. /**
  204. * Creates a block comment writer with specified start/end markers and prefix
  205. * @param {Object} options - Configuration options
  206. * @param {string} options.start - Comment start marker (e.g. "/*")
  207. * @param {string} options.end - Comment end marker (e.g. "* /")
  208. * @param {string} options.prefix - Line prefix within comment (e.g. " * ")
  209. * @returns {Object} Block comment writer object
  210. */
  211. const BlockCommentWriter = ({ start, end, prefix }) => {
  212. return {
  213. write: (lines) => {
  214. lib.dedent_lines(lines);
  215. for ( let i=0 ; i < lines.length ; i++ ) {
  216. lines[i] = prefix + lines[i];
  217. }
  218. let s = start + '\n';
  219. s += lines.join('\n') + '\n';
  220. s += end + '\n';
  221. return s;
  222. }
  223. };
  224. };
  225. /**
  226. * Creates a new CommentParser instance for parsing and handling source code comments
  227. *
  228. * @returns {Object} An object with methods:
  229. * - supports: Checks if a file type is supported
  230. * - extract_top_comments: Extracts comments from source code
  231. * - output_comment: Formats and outputs comments in specified style
  232. */
  233. const CommentParser = () => {
  234. const registry_ = {
  235. object: {
  236. parsers: {
  237. lines: LinesCommentParser,
  238. block: BlockCommentParser,
  239. },
  240. writers: {
  241. lines: LinesCommentWriter,
  242. block: BlockCommentWriter,
  243. },
  244. },
  245. data: {
  246. extensions: {
  247. js: 'javascript',
  248. cjs: 'javascript',
  249. mjs: 'javascript',
  250. },
  251. languages: {
  252. javascript: {
  253. parsers: [
  254. ['lines', {
  255. prefix: '//',
  256. }],
  257. ['block', {
  258. start: '/*',
  259. end: '*/',
  260. ignore_line_prefix: '*',
  261. }],
  262. ],
  263. writers: {
  264. lines: ['lines', {
  265. prefix: '// '
  266. }],
  267. block: ['block', {
  268. start: '/*',
  269. end: ' */',
  270. prefix: ' * ',
  271. }]
  272. },
  273. }
  274. },
  275. }
  276. };
  277. /**
  278. * Gets the language configuration for a given filename by extracting and validating its extension
  279. * @param {Object} params - The parameters object
  280. * @param {string} params.filename - The filename to get the language for
  281. * @returns {Object} Object containing the language configuration
  282. */
  283. const get_language_by_filename = ({ filename }) => {
  284. const { language } = (({ filename }) => {
  285. const { language_id } = (({ filename }) => {
  286. const { extension } = (({ filename }) => {
  287. const components = ('' + filename).split('.');
  288. const extension = components[components.length - 1];
  289. return { extension };
  290. })({ filename });
  291. const language_id = registry_.data.extensions[extension];
  292. if ( ! language_id ) {
  293. throw new Error(`unrecognized language id: ` +
  294. language_id);
  295. }
  296. return { language_id };
  297. })({ filename });
  298. const language = registry_.data.languages[language_id];
  299. return { language };
  300. })({ filename });
  301. if ( ! language ) {
  302. // TODO: use strutil quot here
  303. throw new Error(`unrecognized language: ${language}`)
  304. }
  305. return { language };
  306. }
  307. /**
  308. * Checks if a given filename is supported by the comment parser
  309. * @param {Object} params - The parameters object
  310. * @param {string} params.filename - The filename to check support for
  311. * @returns {boolean} Whether the file type is supported
  312. */
  313. const supports = ({ filename }) => {
  314. try {
  315. get_language_by_filename({ filename });
  316. } catch (e) {
  317. return false;
  318. }
  319. return true;
  320. };
  321. const extract_top_comments = async ({ filename, source }) => {
  322. const { language } = get_language_by_filename({ filename });
  323. // TODO: registry has `data` and `object`...
  324. // ... maybe add `virt` (virtual), which will
  325. // behave in the way the above code is written.
  326. const inst_ = spec => registry_.object.parsers[spec[0]](spec[1]);
  327. let ss = StringStream(source);
  328. const results = [];
  329. for (;;) {
  330. let comment;
  331. for ( let parser of language.parsers ) {
  332. const parser_name = parser[0];
  333. parser = inst_(parser);
  334. const ss_ = ss.fork();
  335. const start_pos = await ss_.get_pos();
  336. comment = await parser.parse(ss_);
  337. const end_pos = await ss_.get_pos();
  338. if ( comment ) {
  339. ss = ss_;
  340. comment.type = parser_name;
  341. comment.range = [start_pos, end_pos];
  342. break;
  343. }
  344. }
  345. // console.log('comment?', comment);
  346. if ( ! comment ) break;
  347. results.push(comment);
  348. }
  349. return results;
  350. }
  351. /**
  352. * Outputs a comment in the specified style for a given filename and text
  353. * @param {Object} params - The parameters object
  354. * @param {string} params.filename - The filename to determine comment style
  355. * @param {string} params.style - The comment style to use ('lines' or 'block')
  356. * @param {string} params.text - The text content of the comment
  357. * @returns {string} The formatted comment string
  358. */
  359. const output_comment = ({ filename, style, text }) => {
  360. const { language } = get_language_by_filename({ filename });
  361. const inst_ = spec => registry_.object.writers[spec[0]](spec[1]);
  362. let writer = language.writers[style];
  363. writer = inst_(writer);
  364. const lines = text.split('\n');
  365. const s = writer.write(lines);
  366. return s;
  367. }
  368. return {
  369. supports,
  370. extract_top_comments,
  371. output_comment,
  372. };
  373. };
  374. module.exports = {
  375. StringStream,
  376. LinesCommentParser,
  377. BlockCommentParser,
  378. CommentParser,
  379. };