main.js 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. * Copyright (C) 2024 Puter Technologies Inc.
  3. *
  4. * This file is part of Puter.
  5. *
  6. * Puter is free software: you can redistribute it and/or modify
  7. * it under the terms of the GNU Affero General Public License as published
  8. * by the Free Software Foundation, either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU Affero General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Affero General Public License
  17. * along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. */
  19. const lib = {};
  20. lib.dedent_lines = lines => {
  21. // If any lines are just spaces, remove the spaces
  22. for ( let i=0 ; i < lines.length ; i++ ) {
  23. if ( /^\s+$/.test(lines[i]) ) lines[i] = '';
  24. }
  25. // Remove leading and trailing blanks
  26. while ( lines[0] === '' ) lines.shift();
  27. while ( lines[lines.length-1] === '' ) lines.pop();
  28. let min_indent = Number.MAX_SAFE_INTEGER;
  29. for ( let i=0 ; i < lines.length ; i++ ) {
  30. if ( lines[i] === '' ) continue;
  31. let n_spaces = 0;
  32. for ( let j=0 ; j < lines[i].length ; j++ ) {
  33. if ( lines[i][j] === ' ' ) n_spaces++;
  34. else break;
  35. }
  36. if ( n_spaces < min_indent ) min_indent = n_spaces;
  37. }
  38. for ( let i=0 ; i < lines.length ; i++ ) {
  39. if ( lines[i] === '' ) continue;
  40. lines[i] = lines[i].slice(min_indent);
  41. }
  42. };
  43. const StringStream = (str, { state_ } = {}) => {
  44. const state = state_ ?? { pos: 0 };
  45. return {
  46. skip_whitespace () {
  47. while ( /^\s/.test(str[state.pos]) ) state.pos++;
  48. },
  49. // INCOMPLETE: only handles single chars
  50. skip_matching (items) {
  51. while ( items.some(item => {
  52. return str[state.pos] === item;
  53. }) ) state.pos++;
  54. },
  55. fwd (amount) {
  56. state.pos += amount ?? 1;
  57. },
  58. fork () {
  59. return StringStream(str, { state_: { pos: state.pos } });
  60. },
  61. async get_pos () {
  62. return state.pos;
  63. },
  64. async get_char () {
  65. return str[state.pos];
  66. },
  67. async matches (re_or_lit) {
  68. if ( re_or_lit instanceof RegExp ) {
  69. const re = re_or_lit;
  70. return re.test(str.slice(state.pos));
  71. }
  72. const lit = re_or_lit;
  73. return lit === str.slice(state.pos, state.pos + lit.length);
  74. },
  75. async get_until (re_or_lit) {
  76. let index;
  77. if ( re_or_lit instanceof RegExp ) {
  78. const re = re_or_lit;
  79. const result = re.exec(str.slice(state.pos));
  80. if ( ! result ) return;
  81. index = state.pos + result.index;
  82. } else {
  83. const lit = re_or_lit;
  84. const ind = str.slice(state.pos).indexOf(lit);
  85. // TODO: parser warnings?
  86. if ( ind === -1 ) return;
  87. index = state.pos + ind;
  88. }
  89. const start_pos = state.pos;
  90. state.pos = index;
  91. return str.slice(start_pos, index);
  92. },
  93. async debug () {
  94. const l1 = str.length;
  95. const l2 = str.length - state.pos;
  96. const clean = s => s.replace(/\n/, '{LF}');
  97. return `[stream : "${
  98. clean(str.slice(0, Math.min(6, l1)))
  99. }"... |${state.pos}| ..."${
  100. clean(str.slice(state.pos, state.pos + Math.min(6, l2)))
  101. }"]`
  102. }
  103. };
  104. };
  105. const LinesCommentParser = ({
  106. prefix
  107. }) => {
  108. return {
  109. parse: async (stream) => {
  110. stream.skip_whitespace();
  111. const lines = [];
  112. while ( await stream.matches(prefix) ) {
  113. const line = await stream.get_until('\n');
  114. if ( ! line ) return;
  115. lines.push(line);
  116. stream.fwd();
  117. stream.skip_matching([' ', '\t']);
  118. if ( await stream.get_char() === '\n' ){
  119. stream.fwd();
  120. break;
  121. }
  122. stream.skip_whitespace();
  123. }
  124. if ( lines.length === 0 ) return;
  125. for ( let i=0 ; i < lines.length ; i++ ) {
  126. lines[i] = lines[i].slice(prefix.length);
  127. }
  128. lib.dedent_lines(lines);
  129. return {
  130. lines,
  131. };
  132. }
  133. };
  134. };
  135. const BlockCommentParser = ({
  136. start,
  137. end,
  138. ignore_line_prefix,
  139. }) => {
  140. return {
  141. parse: async (stream) => {
  142. stream.skip_whitespace();
  143. if ( ! await stream.matches(start) ) return;
  144. stream.fwd(start.length);
  145. const contents = await stream.get_until(end);
  146. if ( ! contents ) return;
  147. stream.fwd(end.length);
  148. // console.log('ending at', await stream.debug())
  149. const lines = contents.split('\n');
  150. // === Formatting Time! === //
  151. // Special case: remove the last '*' after '/**'
  152. if ( lines[0].trim() === ignore_line_prefix ) {
  153. lines.shift();
  154. }
  155. // First dedent pass
  156. lib.dedent_lines(lines);
  157. // If all the lines start with asterisks, remove
  158. let allofem = true;
  159. for ( let i=0 ; i < lines.length ; i++ ) {
  160. if ( lines[i] === '' ) continue;
  161. if ( ! lines[i].startsWith(ignore_line_prefix) ) {
  162. allofem = false;
  163. break
  164. }
  165. }
  166. if ( allofem ) {
  167. for ( let i=0 ; i < lines.length ; i++ ) {
  168. if ( lines[i] === '' ) continue;
  169. lines[i] = lines[i].slice(ignore_line_prefix.length);
  170. }
  171. // Second dedent pass
  172. lib.dedent_lines(lines);
  173. }
  174. return { lines };
  175. }
  176. };
  177. };
  178. const LinesCommentWriter = ({ prefix }) => {
  179. return {
  180. write: (lines) => {
  181. lib.dedent_lines(lines);
  182. for ( let i=0 ; i < lines.length ; i++ ) {
  183. lines[i] = prefix + lines[i];
  184. }
  185. return lines.join('\n') + '\n';
  186. }
  187. };
  188. };
  189. const BlockCommentWriter = ({ start, end, prefix }) => {
  190. return {
  191. write: (lines) => {
  192. lib.dedent_lines(lines);
  193. for ( let i=0 ; i < lines.length ; i++ ) {
  194. lines[i] = prefix + lines[i];
  195. }
  196. let s = start + '\n';
  197. s += lines.join('\n') + '\n';
  198. s += end + '\n';
  199. return s;
  200. }
  201. };
  202. };
  203. const CommentParser = () => {
  204. const registry_ = {
  205. object: {
  206. parsers: {
  207. lines: LinesCommentParser,
  208. block: BlockCommentParser,
  209. },
  210. writers: {
  211. lines: LinesCommentWriter,
  212. block: BlockCommentWriter,
  213. },
  214. },
  215. data: {
  216. extensions: {
  217. js: 'javascript',
  218. cjs: 'javascript',
  219. mjs: 'javascript',
  220. },
  221. languages: {
  222. javascript: {
  223. parsers: [
  224. ['lines', {
  225. prefix: '//',
  226. }],
  227. ['block', {
  228. start: '/*',
  229. end: '*/',
  230. ignore_line_prefix: '*',
  231. }],
  232. ],
  233. writers: {
  234. lines: ['lines', {
  235. prefix: '// '
  236. }],
  237. block: ['block', {
  238. start: '/*',
  239. end: ' */',
  240. prefix: ' * ',
  241. }]
  242. },
  243. }
  244. },
  245. }
  246. };
  247. const get_language_by_filename = ({ filename }) => {
  248. const { language } = (({ filename }) => {
  249. const { language_id } = (({ filename }) => {
  250. const { extension } = (({ filename }) => {
  251. const components = ('' + filename).split('.');
  252. const extension = components[components.length - 1];
  253. return { extension };
  254. })({ filename });
  255. const language_id = registry_.data.extensions[extension];
  256. if ( ! language_id ) {
  257. throw new Error(`unrecognized language id: ` +
  258. language_id);
  259. }
  260. return { language_id };
  261. })({ filename });
  262. const language = registry_.data.languages[language_id];
  263. return { language };
  264. })({ filename });
  265. if ( ! language ) {
  266. // TODO: use strutil quot here
  267. throw new Error(`unrecognized language: ${language}`)
  268. }
  269. return { language };
  270. }
  271. const supports = ({ filename }) => {
  272. try {
  273. get_language_by_filename({ filename });
  274. } catch (e) {
  275. return false;
  276. }
  277. return true;
  278. };
  279. const extract_top_comments = async ({ filename, source }) => {
  280. const { language } = get_language_by_filename({ filename });
  281. // TODO: registry has `data` and `object`...
  282. // ... maybe add `virt` (virtual), which will
  283. // behave in the way the above code is written.
  284. const inst_ = spec => registry_.object.parsers[spec[0]](spec[1]);
  285. let ss = StringStream(source);
  286. const results = [];
  287. for (;;) {
  288. let comment;
  289. for ( let parser of language.parsers ) {
  290. const parser_name = parser[0];
  291. parser = inst_(parser);
  292. const ss_ = ss.fork();
  293. const start_pos = await ss_.get_pos();
  294. comment = await parser.parse(ss_);
  295. const end_pos = await ss_.get_pos();
  296. if ( comment ) {
  297. ss = ss_;
  298. comment.type = parser_name;
  299. comment.range = [start_pos, end_pos];
  300. break;
  301. }
  302. }
  303. // console.log('comment?', comment);
  304. if ( ! comment ) break;
  305. results.push(comment);
  306. }
  307. return results;
  308. }
  309. const output_comment = ({ filename, style, text }) => {
  310. const { language } = get_language_by_filename({ filename });
  311. const inst_ = spec => registry_.object.writers[spec[0]](spec[1]);
  312. let writer = language.writers[style];
  313. writer = inst_(writer);
  314. const lines = text.split('\n');
  315. const s = writer.write(lines);
  316. return s;
  317. }
  318. return {
  319. supports,
  320. extract_top_comments,
  321. output_comment,
  322. };
  323. };
  324. module.exports = {
  325. StringStream,
  326. LinesCommentParser,
  327. BlockCommentParser,
  328. CommentParser,
  329. };