1
const std = @import("std");
2
const assert = std.debug.assert;
3
const ascii = std.ascii;
4
5
const main = @import("main.zig");
6
const strings = @import("strings.zig");
7
const nodes = @import("nodes.zig");
8
const scanners = @import("scanners.zig");
9
const inlines = @import("inlines.zig");
10
const Options = @import("options.zig").Options;
11
const table = @import("table.zig");
12
const AutolinkProcessor = @import("autolink.zig").AutolinkProcessor;
13
14
const TAB_STOP = 4;
15
const CODE_INDENT = 4;
16
17
pub const Reference = struct {
18
url: []u8,
19
title: []u8,
20
};
21
22
pub const Parser = struct {
23
allocator: std.mem.Allocator,
24
refmap: std.StringHashMap(Reference),
25
hack_refmapKeys: std.ArrayList([]u8),
26
root: *nodes.AstNode,
27
current: *nodes.AstNode,
28
options: Options,
29
30
line_number: u32 = 0,
31
offset: usize = 0,
32
column: usize = 0,
33
first_nonspace: usize = 0,
34
first_nonspace_column: usize = 0,
35
indent: usize = 0,
36
blank: bool = false,
37
partially_consumed_tab: bool = false,
38
last_line_length: usize = 0,
39
40
special_chars: [256]bool = [_]bool{false} ** 256,
41
skip_chars: [256]bool = [_]bool{false} ** 256,
42
43
pub fn init(allocator: std.mem.Allocator, options: Options) !Parser {
44
const root = try nodes.AstNode.create(allocator, .{
45
.value = .Document,
46
.content = std.ArrayList(u8).init(allocator),
47
});
48
49
var parser = Parser{
50
.allocator = allocator,
51
.refmap = std.StringHashMap(Reference).init(allocator),
52
.hack_refmapKeys = std.ArrayList([]u8).init(allocator),
53
.root = root,
54
.current = root,
55
.options = options,
56
};
57
58
inlines.Subject.setCharsForOptions(&options, &parser.special_chars, &parser.skip_chars);
59
60
return parser;
61
}
62
63
pub fn deinit(self: *Parser) void {
64
var it = self.refmap.iterator();
65
while (it.next()) |entry| {
66
self.allocator.free(entry.key_ptr.*);
67
self.allocator.free(entry.value_ptr.url);
68
self.allocator.free(entry.value_ptr.title);
69
}
70
self.refmap.deinit();
71
}
72
73
pub fn feed(self: *Parser, s: []const u8) !void {
74
var i: usize = 0;
75
const sz = s.len;
76
var linebuf = std.ArrayList(u8).init(self.allocator);
77
defer linebuf.deinit();
78
79
while (i < sz) {
80
var process = true;
81
var eol = i;
82
while (eol < sz) {
83
if (strings.isLineEndChar(s[eol]))
84
break;
85
if (s[eol] == 0) {
86
process = false;
87
break;
88
}
89
eol += 1;
90
}
91
92
if (process) {
93
if (linebuf.items.len != 0) {
94
try linebuf.appendSlice(s[i..eol]);
95
try self.processLine(linebuf.items);
96
linebuf.items.len = 0;
97
} else if (sz > eol and s[eol] == '\n') {
98
try self.processLine(s[i .. eol + 1]);
99
} else {
100
try self.processLine(s[i..eol]);
101
}
102
103
i = eol;
104
if (i < sz and s[i] == '\r') i += 1;
105
if (i < sz and s[i] == '\n') i += 1;
106
} else {
107
assert(eol < sz and s[eol] == 0);
108
try linebuf.appendSlice(s[i..eol]);
109
try linebuf.appendSlice("\u{fffd}");
110
i = eol + 1;
111
}
112
}
113
}
114
115
pub fn finish(self: *Parser) !*nodes.AstNode {
116
try self.finalizeDocument();
117
try self.postprocessTextNodes();
118
return self.root;
119
}
120
121
fn findFirstNonspace(self: *Parser, line: []const u8) void {
122
self.first_nonspace = self.offset;
123
self.first_nonspace_column = self.column;
124
125
var chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
126
127
while (true) {
128
if (self.first_nonspace >= line.len) {
129
break;
130
}
131
switch (line[self.first_nonspace]) {
132
' ' => {
133
self.first_nonspace += 1;
134
self.first_nonspace_column += 1;
135
chars_to_tab -= 1;
136
if (chars_to_tab == 0) {
137
chars_to_tab = TAB_STOP;
138
}
139
},
140
9 => {
141
self.first_nonspace += 1;
142
self.first_nonspace_column += chars_to_tab;
143
chars_to_tab = TAB_STOP;
144
},
145
else => break,
146
}
147
}
148
149
self.indent = self.first_nonspace_column - self.column;
150
self.blank = self.first_nonspace < line.len and strings.isLineEndChar(line[self.first_nonspace]);
151
}
152
153
fn processLine(self: *Parser, input: []const u8) !void {
154
var line: []const u8 = undefined;
155
var new_line: ?[]u8 = null;
156
if (input.len == 0 or !strings.isLineEndChar(input[input.len - 1])) {
157
new_line = try self.allocator.alloc(u8, input.len + 1);
158
std.mem.copyForwards(u8, new_line.?, input);
159
new_line.?[input.len] = '\n';
160
line = new_line.?;
161
} else {
162
line = input;
163
}
164
165
self.offset = 0;
166
self.column = 0;
167
self.blank = false;
168
self.partially_consumed_tab = false;
169
170
if (self.line_number == 0 and line.len >= 3 and std.mem.eql(u8, line[0..3], "\u{feff}")) {
171
self.offset += 3;
172
}
173
174
self.line_number += 1;
175
176
const result = try self.checkOpenBlocks(line);
177
if (result.container) |last_matched_container| {
178
const current = self.current;
179
const container = try self.openNewBlocks(last_matched_container, line, result.all_matched);
180
if (current == self.current) {
181
try self.addTextToContainer(container, last_matched_container, line);
182
}
183
}
184
185
self.last_line_length = line.len;
186
if (self.last_line_length > 0 and line[self.last_line_length - 1] == '\n') {
187
self.last_line_length -= 1;
188
}
189
if (self.last_line_length > 0 and line[self.last_line_length - 1] == '\r') {
190
self.last_line_length -= 1;
191
}
192
193
if (new_line) |nl| self.allocator.free(nl);
194
}
195
196
const CheckOpenBlocksResult = struct {
197
all_matched: bool = false,
198
container: ?*nodes.AstNode,
199
};
200
201
fn checkOpenBlocks(self: *Parser, line: []const u8) !CheckOpenBlocksResult {
202
const result = try self.checkOpenBlocksInner(self.root, line);
203
if (result.container) |container| {
204
return CheckOpenBlocksResult{
205
.all_matched = result.all_matched,
206
.container = if (result.all_matched) container else container.parent.?,
207
};
208
}
209
return result;
210
}
211
212
fn checkOpenBlocksInner(self: *Parser, start_container: *nodes.AstNode, line: []const u8) !CheckOpenBlocksResult {
213
var container = start_container;
214
215
while (container.lastChildIsOpen()) {
216
container = container.last_child.?;
217
self.findFirstNonspace(line);
218
219
switch (container.data.value) {
220
.BlockQuote => {
221
if (!self.parseBlockQuotePrefix(line)) {
222
return CheckOpenBlocksResult{ .container = container };
223
}
224
},
225
.Item => |*nl| {
226
if (!self.parseNodeItemPrefix(line, container, nl)) {
227
return CheckOpenBlocksResult{ .container = container };
228
}
229
},
230
.CodeBlock => {
231
switch (try self.parseCodeBlockPrefix(line, container)) {
232
.DoNotContinue => {
233
return CheckOpenBlocksResult{ .container = null };
234
},
235
.NoMatch => {
236
return CheckOpenBlocksResult{ .container = container };
237
},
238
.Match => {},
239
}
240
},
241
.HtmlBlock => |nhb| {
242
if (!self.parseHtmlBlockPrefix(nhb.block_type)) {
243
return CheckOpenBlocksResult{ .container = container };
244
}
245
},
246
.Paragraph => {
247
if (self.blank) {
248
return CheckOpenBlocksResult{ .container = container };
249
}
250
},
251
.Table => {
252
if (!(try table.matches(self.allocator, line[self.first_nonspace..]))) {
253
return CheckOpenBlocksResult{ .container = container };
254
}
255
},
256
.Heading, .TableRow, .TableCell => {
257
return CheckOpenBlocksResult{ .container = container };
258
},
259
.Document, .List, .ThematicBreak, .Text, .SoftBreak, .LineBreak, .Code, .HtmlInline, .Emph, .Strong, .Strikethrough, .Link, .Image => {},
260
}
261
}
262
263
return CheckOpenBlocksResult{
264
.all_matched = true,
265
.container = container,
266
};
267
}
268
269
fn openNewBlocks(self: *Parser, input_container: *nodes.AstNode, line: []const u8, all_matched: bool) !*nodes.AstNode {
270
var container = input_container;
271
var maybe_lazy = switch (self.current.data.value) {
272
.Paragraph => true,
273
else => false,
274
};
275
276
var matched: usize = undefined;
277
var nl: nodes.NodeList = undefined;
278
var sc: scanners.SetextChar = undefined;
279
280
while (switch (container.data.value) {
281
.CodeBlock, .HtmlBlock => false,
282
else => true,
283
}) {
284
self.findFirstNonspace(line);
285
const indented = self.indent >= CODE_INDENT;
286
287
if (!indented and line[self.first_nonspace] == '>') {
288
const offset = self.first_nonspace + 1 - self.offset;
289
self.advanceOffset(line, offset, false);
290
if (strings.isSpaceOrTab(line[self.offset])) {
291
self.advanceOffset(line, 1, true);
292
}
293
container = try self.addChild(container, .BlockQuote);
294
} else if (!indented and try scanners.unwrap(scanners.atxHeadingStart(line[self.first_nonspace..]), &matched)) {
295
const heading_startpos = self.first_nonspace;
296
const offset = self.offset;
297
self.advanceOffset(line, heading_startpos + matched - offset, false);
298
container = try self.addChild(container, .{ .Heading = .{} });
299
300
var hashpos = std.mem.indexOfScalar(u8, line[self.first_nonspace..], '#').? + self.first_nonspace;
301
var level: u8 = 0;
302
while (line[hashpos] == '#') {
303
if (level < 6)
304
level += 1;
305
hashpos += 1;
306
}
307
308
container.data.value = .{ .Heading = .{ .level = level, .setext = false } };
309
} else if (!indented and try scanners.unwrap(scanners.openCodeFence(line[self.first_nonspace..]), &matched)) {
310
const first_nonspace = self.first_nonspace;
311
const offset = self.offset;
312
const ncb = nodes.NodeCodeBlock{
313
.fenced = true,
314
.fence_char = line[first_nonspace],
315
.fence_length = matched,
316
.fence_offset = first_nonspace - offset,
317
.info = null,
318
.literal = std.ArrayList(u8).init(self.allocator),
319
};
320
container = try self.addChild(container, .{ .CodeBlock = ncb });
321
self.advanceOffset(line, first_nonspace + matched - offset, false);
322
} else if (!indented and ((try scanners.htmlBlockStart(line[self.first_nonspace..], &matched)) or switch (container.data.value) {
323
.Paragraph => false,
324
else => try scanners.htmlBlockStart7(line[self.first_nonspace..], &matched),
325
})) {
326
const nhb = nodes.NodeHtmlBlock{
327
.block_type = @truncate(matched),
328
.literal = std.ArrayList(u8).init(self.allocator),
329
};
330
container = try self.addChild(container, .{ .HtmlBlock = nhb });
331
} else if (!indented and switch (container.data.value) {
332
.Paragraph => try scanners.setextHeadingLine(line[self.first_nonspace..], &sc),
333
else => false,
334
}) {
335
const has_content = try self.resolveReferenceLinkDefinitions(&container.data.content);
336
if (has_content) {
337
container.data.value = .{
338
.Heading = .{
339
.level = switch (sc) {
340
.Equals => 1,
341
.Hyphen => 2,
342
},
343
.setext = true,
344
},
345
};
346
const adv = line.len - 1 - self.offset;
347
self.advanceOffset(line, adv, false);
348
}
349
} else if (!indented and !(switch (container.data.value) {
350
.Paragraph => !all_matched,
351
else => false,
352
}) and try scanners.unwrap(scanners.thematicBreak(line[self.first_nonspace..]), &matched)) {
353
container = try self.addChild(container, .ThematicBreak);
354
const adv = line.len - 1 - self.offset;
355
self.advanceOffset(line, adv, false);
356
} else if ((!indented or switch (container.data.value) {
357
.List => true,
358
else => false,
359
}) and self.indent < 4 and parseListMarker(line, self.first_nonspace, switch (container.data.value) {
360
.Paragraph => true,
361
else => false,
362
}, &matched, &nl)) {
363
const offset = self.first_nonspace + matched - self.offset;
364
self.advanceOffset(line, offset, false);
365
366
const save_partially_consumed_tab = self.partially_consumed_tab;
367
const save_offset = self.offset;
368
const save_column = self.column;
369
370
while (self.column - save_column <= 5 and strings.isSpaceOrTab(line[self.offset])) {
371
self.advanceOffset(line, 1, true);
372
}
373
374
const i = self.column - save_column;
375
if (i >= 5 or i < 1 or strings.isLineEndChar(line[self.offset])) {
376
nl.padding = matched + 1;
377
self.partially_consumed_tab = save_partially_consumed_tab;
378
self.offset = save_offset;
379
self.column = save_column;
380
if (i > 0)
381
self.advanceOffset(line, 1, true);
382
} else {
383
nl.padding = matched + i;
384
}
385
386
nl.marker_offset = self.indent;
387
388
if (switch (container.data.value) {
389
.List => |*mnl| !listsMatch(&nl, mnl),
390
else => true,
391
}) {
392
container = try self.addChild(container, .{ .List = nl });
393
}
394
395
container = try self.addChild(container, .{ .Item = nl });
396
} else if (indented and !maybe_lazy and !self.blank) {
397
self.advanceOffset(line, CODE_INDENT, true);
398
container = try self.addChild(container, .{
399
.CodeBlock = .{
400
.fenced = false,
401
.fence_char = 0,
402
.fence_length = 0,
403
.fence_offset = 0,
404
.info = null,
405
.literal = std.ArrayList(u8).init(self.allocator),
406
},
407
});
408
} else {
409
var replace: bool = undefined;
410
const new_container = if (!indented and self.options.extensions.table)
411
try table.tryOpeningBlock(self, container, line, &replace)
412
else
413
null;
414
415
if (new_container) |new| {
416
if (replace) {
417
container.insertAfter(new);
418
container.detachDeinit();
419
container = new;
420
} else {
421
container = new;
422
}
423
} else {
424
break;
425
}
426
}
427
428
if (container.data.value.acceptsLines()) {
429
break;
430
}
431
432
maybe_lazy = false;
433
}
434
435
return container;
436
}
437
438
pub fn addChild(self: *Parser, input_parent: *nodes.AstNode, value: nodes.NodeValue) !*nodes.AstNode {
439
var parent = input_parent;
440
while (!parent.data.value.canContainType(value)) {
441
parent = (try self.finalize(parent)).?;
442
}
443
444
const node = try nodes.AstNode.create(self.allocator, .{
445
.value = value,
446
.start_line = self.line_number,
447
.content = std.ArrayList(u8).init(self.allocator),
448
});
449
parent.append(node);
450
return node;
451
}
452
453
fn addTextToContainer(self: *Parser, input_container: *nodes.AstNode, last_matched_container: *nodes.AstNode, line: []const u8) !void {
454
var container = input_container;
455
self.findFirstNonspace(line);
456
457
if (self.blank) {
458
if (container.last_child) |last_child| {
459
last_child.data.last_line_blank = true;
460
}
461
}
462
463
container.data.last_line_blank = self.blank and
464
switch (container.data.value) {
465
.BlockQuote, .Heading, .ThematicBreak => false,
466
.CodeBlock => |ncb| !ncb.fenced,
467
.Item => container.first_child != null or container.data.start_line != self.line_number,
468
else => true,
469
};
470
471
var tmp = container;
472
while (tmp.parent) |parent| {
473
parent.data.last_line_blank = false;
474
tmp = parent;
475
}
476
477
if (self.current != last_matched_container and container == last_matched_container and !self.blank and self.current.data.value == .Paragraph) {
478
try self.addLine(self.current, line);
479
return;
480
}
481
482
while (self.current != last_matched_container) {
483
self.current = (try self.finalize(self.current)).?;
484
}
485
486
switch (container.data.value) {
487
.CodeBlock => {
488
try self.addLine(container, line);
489
},
490
.HtmlBlock => |nhb| {
491
try self.addLine(container, line);
492
const matches_end_condition = switch (nhb.block_type) {
493
1 => scanners.htmlBlockEnd1(line[self.first_nonspace..]),
494
2 => scanners.htmlBlockEnd2(line[self.first_nonspace..]),
495
3 => scanners.htmlBlockEnd3(line[self.first_nonspace..]),
496
4 => scanners.htmlBlockEnd4(line[self.first_nonspace..]),
497
5 => scanners.htmlBlockEnd5(line[self.first_nonspace..]),
498
else => false,
499
};
500
501
if (matches_end_condition) {
502
container = (try self.finalize(container)).?;
503
}
504
},
505
else => {
506
if (self.blank) {
507
// do nothing
508
} else if (container.data.value.acceptsLines()) {
509
var consider_line: []const u8 = line;
510
511
switch (container.data.value) {
512
.Heading => |nh| if (!nh.setext) {
513
consider_line = strings.chopTrailingHashtags(line);
514
},
515
else => {},
516
}
517
518
const count = self.first_nonspace - self.offset;
519
if (self.first_nonspace <= consider_line.len) {
520
self.advanceOffset(consider_line, count, false);
521
try self.addLine(container, consider_line);
522
}
523
} else {
524
container = try self.addChild(container, .Paragraph);
525
const count = self.first_nonspace - self.offset;
526
self.advanceOffset(line, count, false);
527
try self.addLine(container, line);
528
}
529
},
530
}
531
532
self.current = container;
533
}
534
535
fn addLine(self: *Parser, node: *nodes.AstNode, line: []const u8) !void {
536
assert(node.data.open);
537
if (self.partially_consumed_tab) {
538
self.offset += 1;
539
var chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
540
while (chars_to_tab > 0) : (chars_to_tab -= 1) {
541
try node.data.content.append(' ');
542
}
543
}
544
if (self.offset < line.len) {
545
try node.data.content.appendSlice(line[self.offset..]);
546
}
547
}
548
549
fn finalizeDocument(self: *Parser) !void {
550
while (self.current != self.root) {
551
self.current = (try self.finalize(self.current)).?;
552
}
553
554
_ = try self.finalize(self.root);
555
try self.processInlines();
556
}
557
558
fn finalize(self: *Parser, node: *nodes.AstNode) !?*nodes.AstNode {
559
assert(node.data.open);
560
node.data.open = false;
561
const parent = node.parent;
562
563
switch (node.data.value) {
564
.Paragraph => {
565
const has_content = try self.resolveReferenceLinkDefinitions(&node.data.content);
566
if (!has_content) {
567
node.detachDeinit();
568
}
569
},
570
.CodeBlock => |*ncb| {
571
if (!ncb.fenced) {
572
strings.removeTrailingBlankLines(&node.data.content);
573
try node.data.content.append('\n');
574
} else {
575
var pos: usize = 0;
576
while (pos < node.data.content.items.len) : (pos += 1) {
577
if (strings.isLineEndChar(node.data.content.items[pos]))
578
break;
579
}
580
assert(pos < node.data.content.items.len);
581
582
const info = try strings.cleanUrl(self.allocator, node.data.content.items[0..pos]);
583
if (info.len != 0) {
584
ncb.info = info;
585
}
586
587
if (node.data.content.items[pos] == '\r') pos += 1;
588
if (node.data.content.items[pos] == '\n') pos += 1;
589
590
try node.data.content.replaceRange(0, pos, "");
591
}
592
std.mem.swap(std.ArrayList(u8), &ncb.literal, &node.data.content);
593
},
594
.HtmlBlock => |*nhb| {
595
std.mem.swap(std.ArrayList(u8), &nhb.literal, &node.data.content);
596
},
597
.List => |*nl| {
598
nl.tight = true;
599
var it = node.first_child;
600
601
while (it) |item| {
602
if (item.data.last_line_blank and item.next != null) {
603
nl.tight = false;
604
break;
605
}
606
607
var subit = item.first_child;
608
while (subit) |subitem| {
609
if (subitem.endsWithBlankLine() and (item.next != null or subitem.next != null)) {
610
nl.tight = false;
611
break;
612
}
613
subit = subitem.next;
614
}
615
616
if (!nl.tight) {
617
break;
618
}
619
620
it = item.next;
621
}
622
},
623
else => {},
624
}
625
626
return parent;
627
}
628
629
fn postprocessTextNodes(self: *Parser) !void {
630
var stack = try std.ArrayList(*nodes.AstNode).initCapacity(self.allocator, 1);
631
defer stack.deinit();
632
var children = std.ArrayList(*nodes.AstNode).init(self.allocator);
633
defer children.deinit();
634
635
try stack.append(self.root);
636
637
while (stack.pop()) |node| {
638
var nch = node.first_child;
639
640
while (nch) |n| {
641
var this_bracket = false;
642
643
while (true) {
644
switch (n.data.value) {
645
.Text => |*root| {
646
var ns = n.next orelse {
647
try self.postprocessTextNode(n, root);
648
break;
649
};
650
651
switch (ns.data.value) {
652
.Text => |adj| {
653
const old_len = root.len;
654
root.* = try self.allocator.realloc(root.*, old_len + adj.len);
655
@memcpy(root.*[old_len..], adj);
656
ns.detachDeinit();
657
},
658
else => {
659
try self.postprocessTextNode(n, root);
660
break;
661
},
662
}
663
},
664
.Link, .Image => {
665
this_bracket = true;
666
break;
667
},
668
else => break,
669
}
670
}
671
672
if (!this_bracket) {
673
try children.append(n);
674
}
675
676
nch = n.next;
677
}
678
679
while (children.pop()) |child| try stack.append(child);
680
}
681
}
682
683
fn postprocessTextNode(self: *Parser, node: *nodes.AstNode, text: *[]u8) !void {
684
if (self.options.extensions.autolink) {
685
try AutolinkProcessor.init(self.allocator, text).process(node);
686
}
687
}
688
689
fn resolveReferenceLinkDefinitions(self: *Parser, content: *std.ArrayList(u8)) !bool {
690
var seeked: usize = 0;
691
var pos: usize = undefined;
692
var seek = content.items;
693
694
while (seek.len > 0 and seek[0] == '[' and try self.parseReferenceInline(seek, &pos)) {
695
seek = seek[pos..];
696
seeked += pos;
697
}
698
699
try content.replaceRange(0, seeked, "");
700
701
return !strings.isBlank(content.items);
702
}
703
704
fn parseReferenceInline(self: *Parser, content: []const u8, pos: *usize) !bool {
705
var subj = inlines.Subject.init(self.allocator, &self.refmap, &self.options, &self.special_chars, &self.skip_chars, content);
706
defer subj.deinit();
707
708
const lab = if (subj.linkLabel()) |l| lab: {
709
if (l.len == 0)
710
return false;
711
break :lab l;
712
} else return false;
713
714
if (subj.peekChar() orelse 0 != ':')
715
return false;
716
717
subj.pos += 1;
718
subj.spnl();
719
720
var url: []const u8 = undefined;
721
var match_len: usize = undefined;
722
if (!inlines.Subject.manualScanLinkUrl(subj.input[subj.pos..], &url, &match_len))
723
return false;
724
subj.pos += match_len;
725
726
const beforetitle = subj.pos;
727
subj.spnl();
728
const title_search: ?usize = if (subj.pos == beforetitle)
729
null
730
else
731
try scanners.linkTitle(subj.input[subj.pos..]);
732
const title = if (title_search) |title_match| title: {
733
const t = subj.input[subj.pos .. subj.pos + title_match];
734
subj.pos += title_match;
735
break :title try self.allocator.dupe(u8, t);
736
} else title: {
737
subj.pos = beforetitle;
738
break :title &[_]u8{};
739
};
740
defer self.allocator.free(title);
741
742
subj.skipSpaces();
743
if (!subj.skipLineEnd()) {
744
if (title.len > 0) {
745
subj.pos = beforetitle;
746
subj.skipSpaces();
747
if (!subj.skipLineEnd()) {
748
return false;
749
}
750
} else {
751
return false;
752
}
753
}
754
755
const normalized = try strings.normalizeLabel(self.allocator, lab);
756
if (normalized.len > 0) {
757
// refmap takes ownership of `normalized'.
758
const result = try subj.refmap.getOrPut(normalized);
759
if (!result.found_existing) {
760
result.value_ptr.* = Reference{
761
.url = try strings.cleanUrl(self.allocator, url),
762
.title = try strings.cleanTitle(self.allocator, title),
763
};
764
} else {
765
self.allocator.free(normalized);
766
}
767
}
768
769
pos.* = subj.pos;
770
return true;
771
}
772
773
fn processInlines(self: *Parser) !void {
774
try self.processInlinesNode(self.root);
775
}
776
777
fn processInlinesNode(self: *Parser, node: *nodes.AstNode) inlines.ParseError!void {
778
var it = node.descendantsIterator();
779
while (it.next()) |descendant| {
780
if (descendant.data.value.containsInlines()) {
781
try self.parseInlines(descendant);
782
}
783
}
784
}
785
786
fn parseInlines(self: *Parser, node: *nodes.AstNode) inlines.ParseError!void {
787
const content = strings.rtrim(node.data.content.items);
788
var subj = inlines.Subject.init(self.allocator, &self.refmap, &self.options, &self.special_chars, &self.skip_chars, content);
789
defer subj.deinit();
790
while (try subj.parseInline(node)) {}
791
try subj.processEmphasis(null);
792
while (subj.popBracket()) {}
793
}
794
795
pub fn advanceOffset(self: *Parser, line: []const u8, in_count: usize, columns: bool) void {
796
var count = in_count;
797
while (count > 0) {
798
switch (line[self.offset]) {
799
'\t' => {
800
const chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
801
if (columns) {
802
self.partially_consumed_tab = chars_to_tab > count;
803
const chars_to_advance = @min(count, chars_to_tab);
804
self.column += chars_to_advance;
805
self.offset += @as(u8, if (self.partially_consumed_tab) 0 else 1);
806
count -= chars_to_advance;
807
} else {
808
self.partially_consumed_tab = false;
809
self.column += chars_to_tab;
810
self.offset += 1;
811
count -= 1;
812
}
813
},
814
else => {
815
self.partially_consumed_tab = false;
816
self.offset += 1;
817
self.column += 1;
818
count -= 1;
819
},
820
}
821
}
822
}
823
824
fn parseBlockQuotePrefix(self: *Parser, line: []const u8) bool {
825
const indent = self.indent;
826
if (indent <= 3 and line[self.first_nonspace] == '>') {
827
self.advanceOffset(line, indent + 1, true);
828
829
if (strings.isSpaceOrTab(line[self.offset])) {
830
self.advanceOffset(line, 1, true);
831
}
832
833
return true;
834
}
835
836
return false;
837
}
838
839
fn parseNodeItemPrefix(self: *Parser, line: []const u8, container: *nodes.AstNode, nl: *const nodes.NodeList) bool {
840
if (self.indent >= nl.marker_offset + nl.padding) {
841
self.advanceOffset(line, nl.marker_offset + nl.padding, true);
842
return true;
843
} else if (self.blank and container.first_child != null) {
844
const offset = self.first_nonspace - self.offset;
845
self.advanceOffset(line, offset, false);
846
return true;
847
}
848
return false;
849
}
850
851
const CodeBlockPrefixParseResult = enum {
852
DoNotContinue,
853
NoMatch,
854
Match,
855
};
856
857
fn parseCodeBlockPrefix(self: *Parser, line: []const u8, container: *nodes.AstNode) !CodeBlockPrefixParseResult {
858
const ncb = switch (container.data.value) {
859
.CodeBlock => |i| i,
860
else => unreachable,
861
};
862
863
if (!ncb.fenced) {
864
if (self.indent >= CODE_INDENT) {
865
self.advanceOffset(line, CODE_INDENT, true);
866
return .Match;
867
} else if (self.blank) {
868
const offset = self.first_nonspace - self.offset;
869
self.advanceOffset(line, offset, false);
870
return .Match;
871
}
872
return .NoMatch;
873
}
874
875
const matched = if (self.indent <= 3 and line[self.first_nonspace] == ncb.fence_char)
876
(try scanners.closeCodeFence(line[self.first_nonspace..])) orelse 0
877
else
878
0;
879
880
if (matched >= ncb.fence_length) {
881
self.advanceOffset(line, matched, false);
882
self.current = (try self.finalize(container)).?;
883
return .DoNotContinue;
884
}
885
886
var i = ncb.fence_offset;
887
while (i > 0 and strings.isSpaceOrTab(line[self.offset])) : (i -= 1) {
888
self.advanceOffset(line, 1, true);
889
}
890
891
return .Match;
892
}
893
894
fn parseHtmlBlockPrefix(self: *Parser, t: u8) bool {
895
return switch (t) {
896
1, 2, 3, 4, 5 => true,
897
6, 7 => !self.blank,
898
else => unreachable,
899
};
900
}
901
902
fn parseListMarker(line: []const u8, input_pos: usize, interrupts_paragraph: bool, matched: *usize, nl: *nodes.NodeList) bool {
903
var pos = input_pos;
904
var c = line[pos];
905
const startpos = pos;
906
907
if (c == '*' or c == '-' or c == '+') {
908
pos += 1;
909
if (!ascii.isWhitespace(line[pos])) {
910
return false;
911
}
912
913
if (interrupts_paragraph) {
914
var i = pos;
915
while (strings.isSpaceOrTab(line[i])) : (i += 1) {}
916
if (line[i] == '\n') {
917
return false;
918
}
919
}
920
921
matched.* = pos - startpos;
922
nl.* = .{
923
.list_type = .Bullet,
924
.marker_offset = 0,
925
.padding = 0,
926
.start = 1,
927
.delimiter = .Period,
928
.bullet_char = c,
929
.tight = false,
930
};
931
return true;
932
}
933
934
if (ascii.isDigit(c)) {
935
var start: usize = 0;
936
var digits: u8 = 0;
937
938
while (digits < 9 and ascii.isDigit(line[pos])) {
939
start = (10 * start) + (line[pos] - '0');
940
pos += 1;
941
digits += 1;
942
}
943
944
if (interrupts_paragraph and start != 1) {
945
return false;
946
}
947
948
c = line[pos];
949
if (c != '.' and c != ')') {
950
return false;
951
}
952
953
pos += 1;
954
955
if (!ascii.isWhitespace(line[pos])) {
956
return false;
957
}
958
959
if (interrupts_paragraph) {
960
var i = pos;
961
while (strings.isSpaceOrTab(line[i])) : (i += 1) {}
962
if (strings.isLineEndChar(line[i])) {
963
return false;
964
}
965
}
966
967
matched.* = pos - startpos;
968
nl.* = .{
969
.list_type = .Ordered,
970
.marker_offset = 0,
971
.padding = 0,
972
.start = start,
973
.delimiter = if (c == '.')
974
.Period
975
else
976
.Paren,
977
.bullet_char = 0,
978
.tight = false,
979
};
980
return true;
981
}
982
983
return false;
984
}
985
986
fn listsMatch(list_data: *const nodes.NodeList, item_data: *const nodes.NodeList) bool {
987
return list_data.list_type == item_data.list_type and list_data.delimiter == item_data.delimiter and list_data.bullet_char == item_data.bullet_char;
988
}
989
};
990
991
fn expectMarkdownHTML(options: Options, markdown: []const u8, html: []const u8) !void {
992
const output = try main.testMarkdownToHtml(options, markdown);
993
defer std.testing.allocator.free(output);
994
try std.testing.expectEqualStrings(html, output);
995
}
996
997
test "convert simple emphases" {
998
try expectMarkdownHTML(.{},
999
\\hello, _world_ __world__ ___world___ *_world_* **_world_** *__world__*
1000
\\
1001
\\this is `yummy`
1002
\\
1003
,
1004
\\<p>hello, <em>world</em> <strong>world</strong> <em><strong>world</strong></em> <em><em>world</em></em> <strong><em>world</em></strong> <em><strong>world</strong></em></p>
1005
\\<p>this is <code>yummy</code></p>
1006
\\
1007
);
1008
}
1009
test "smart quotes" {
1010
try expectMarkdownHTML(.{ .parse = .{ .smart = true } }, "\"Hey,\" she said. \"What's 'up'?\"\n", "<p>“Hey,” she said. “What’s ‘up’?”</p>\n");
1011
}
1012
test "handles EOF without EOL" {
1013
try expectMarkdownHTML(.{}, "hello", "<p>hello</p>\n");
1014
}
1015
test "accepts multiple lines" {
1016
try expectMarkdownHTML(.{}, "hello\nthere\n", "<p>hello\nthere</p>\n");
1017
try expectMarkdownHTML(.{ .render = .{ .hard_breaks = true } }, "hello\nthere\n", "<p>hello<br />\nthere</p>\n");
1018
}
1019
test "smart hyphens" {
1020
try expectMarkdownHTML(.{ .parse = .{ .smart = true } }, "hyphen - en -- em --- four ---- five ----- six ------ seven -------\n", "<p>hyphen - en – em — four –– five —– six —— seven —––</p>\n");
1021
}
1022
test "handles tabs" {
1023
try expectMarkdownHTML(.{}, "\tfoo\tbaz\t\tbim\n", "<pre><code>foo\tbaz\t\tbim\n</code></pre>\n");
1024
try expectMarkdownHTML(.{}, " \tfoo\tbaz\t\tbim\n", "<pre><code>foo\tbaz\t\tbim\n</code></pre>\n");
1025
try expectMarkdownHTML(.{}, " - foo\n\n\tbar\n", "<ul>\n<li>\n<p>foo</p>\n<p>bar</p>\n</li>\n</ul>\n");
1026
try expectMarkdownHTML(.{}, "#\tFoo\n", "<h1>Foo</h1>\n");
1027
try expectMarkdownHTML(.{}, "*\t*\t*\t\n", "<hr />\n");
1028
}
1029
test "escapes" {
1030
try expectMarkdownHTML(.{}, "\\## foo\n", "<p>## foo</p>\n");
1031
}
1032
test "setext heading override pointy" {
1033
try expectMarkdownHTML(.{}, "<a title=\"a lot\n---\nof dashes\"/>\n", "<h2><a title="a lot</h2>\n<p>of dashes"/></p>\n");
1034
}
1035
test "fenced code blocks" {
1036
try expectMarkdownHTML(.{}, "```\n<\n >\n```\n", "<pre><code><\n >\n</code></pre>\n");
1037
try expectMarkdownHTML(.{}, "````\naaa\n```\n``````\n", "<pre><code>aaa\n```\n</code></pre>\n");
1038
}
1039
test "html blocks" {
1040
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1041
\\_world_.
1042
\\</pre>
1043
,
1044
\\<p><em>world</em>.
1045
\\</pre></p>
1046
\\
1047
);
1048
1049
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1050
\\<table><tr><td>
1051
\\<pre>
1052
\\**Hello**,
1053
\\
1054
\\_world_.
1055
\\</pre>
1056
\\</td></tr></table>
1057
,
1058
\\<table><tr><td>
1059
\\<pre>
1060
\\**Hello**,
1061
\\<p><em>world</em>.
1062
\\</pre></p>
1063
\\</td></tr></table>
1064
\\
1065
);
1066
1067
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1068
\\<DIV CLASS="foo">
1069
\\
1070
\\*Markdown*
1071
\\
1072
\\</DIV>
1073
,
1074
\\<DIV CLASS="foo">
1075
\\<p><em>Markdown</em></p>
1076
\\</DIV>
1077
\\
1078
);
1079
1080
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1081
\\<pre language="haskell"><code>
1082
\\import Text.HTML.TagSoup
1083
\\
1084
\\main :: IO ()
1085
\\main = print $ parseTags tags
1086
\\</code></pre>
1087
\\okay
1088
\\
1089
,
1090
\\<pre language="haskell"><code>
1091
\\import Text.HTML.TagSoup
1092
\\
1093
\\main :: IO ()
1094
\\main = print $ parseTags tags
1095
\\</code></pre>
1096
\\<p>okay</p>
1097
\\
1098
);
1099
}
1100
test "links" {
1101
try expectMarkdownHTML(.{}, "[foo](/url)\n", "<p><a href=\"/url\">foo</a></p>\n");
1102
try expectMarkdownHTML(.{}, "[foo](/url \"title\")\n", "<p><a href=\"/url\" title=\"title\">foo</a></p>\n");
1103
}
1104
test "link reference definitions" {
1105
try expectMarkdownHTML(.{}, "[foo]: /url \"title\"\n\n[foo]\n", "<p><a href=\"/url\" title=\"title\">foo</a></p>\n");
1106
try expectMarkdownHTML(.{}, "[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n", "<p><a href=\"/url%5Cbar*baz\" title=\"foo"bar\\baz\">foo</a></p>\n");
1107
}
1108
test "tables" {
1109
try expectMarkdownHTML(.{ .extensions = .{ .table = true } },
1110
\\| foo | bar |
1111
\\| --- | --- |
1112
\\| baz | bim |
1113
\\
1114
,
1115
\\<table>
1116
\\<thead>
1117
\\<tr>
1118
\\<th>foo</th>
1119
\\<th>bar</th>
1120
\\</tr>
1121
\\</thead>
1122
\\<tbody>
1123
\\<tr>
1124
\\<td>baz</td>
1125
\\<td>bim</td>
1126
\\</tr>
1127
\\</tbody>
1128
\\</table>
1129
\\
1130
);
1131
}
1132
test "strikethroughs" {
1133
try expectMarkdownHTML(.{ .extensions = .{ .strikethrough = true } }, "Hello ~world~ there.\n", "<p>Hello <del>world</del> there.</p>\n");
1134
}
1135
test "images" {
1136
try expectMarkdownHTML(.{}, "[](/uri)\n", "<p><a href=\"/uri\"><img src=\"moon.jpg\" alt=\"moon\" /></a></p>\n");
1137
}
1138
test "autolink" {
1139
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "www.commonmark.org\n", "<p><a href=\"http://www.commonmark.org\">www.commonmark.org</a></p>\n");
1140
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "http://commonmark.org\n", "<p><a href=\"http://commonmark.org\">http://commonmark.org</a></p>\n");
1141
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "foo@bar.baz\n", "<p><a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
1142
}
1143
test "header anchors" {
1144
try expectMarkdownHTML(.{ .render = .{ .header_anchors = true } },
1145
\\# Hi.
1146
\\## Hi 1.
1147
\\### Hi.
1148
\\#### Hello.
1149
\\##### Hi.
1150
\\###### Hello.
1151
\\# Isn't it grand?
1152
\\
1153
,
1154
\\<h1><a href="#hi" id="hi"></a>Hi.</h1>
1155
\\<h2><a href="#hi-1" id="hi-1"></a>Hi 1.</h2>
1156
\\<h3><a href="#hi-2" id="hi-2"></a>Hi.</h3>
1157
\\<h4><a href="#hello" id="hello"></a>Hello.</h4>
1158
\\<h5><a href="#hi-3" id="hi-3"></a>Hi.</h5>
1159
\\<h6><a href="#hello-1" id="hello-1"></a>Hello.</h6>
1160
\\<h1><a href="#isnt-it-grand" id="isnt-it-grand"></a>Isn't it grand?</h1>
1161
\\
1162
);
1163
}
1164