1
const std = @import("std");
2
const assert = std.debug.assert;
3
const ascii = std.ascii;
4
const ArrayList = std.array_list.Managed;
5
6
const main = @import("main.zig");
7
const strings = @import("strings.zig");
8
const nodes = @import("nodes.zig");
9
const scanners = @import("scanners.zig");
10
const inlines = @import("inlines.zig");
11
const Options = @import("options.zig").Options;
12
const table = @import("table.zig");
13
const AutolinkProcessor = @import("autolink.zig").AutolinkProcessor;
14
15
const TAB_STOP = 4;
16
const CODE_INDENT = 4;
17
18
pub const Reference = struct {
19
url: []u8,
20
title: []u8,
21
};
22
23
pub const RefMap = struct {
24
map: std.StringHashMap(Reference),
25
max_ref_size: usize = std.math.maxInt(usize),
26
ref_size: usize = 0,
27
28
pub fn init(allocator: std.mem.Allocator) RefMap {
29
return .{ .map = std.StringHashMap(Reference).init(allocator) };
30
}
31
32
pub fn lookup(self: *RefMap, key: []const u8) ?Reference {
33
const ref = self.map.get(key) orelse return null;
34
const size = ref.url.len + ref.title.len;
35
if (size > self.max_ref_size - self.ref_size) return null;
36
self.ref_size += size;
37
return ref;
38
}
39
};
40
41
pub const Parser = struct {
42
allocator: std.mem.Allocator,
43
refmap: RefMap,
44
hack_refmapKeys: ArrayList([]u8),
45
root: *nodes.AstNode,
46
current: *nodes.AstNode,
47
options: Options,
48
49
line_number: u32 = 0,
50
offset: usize = 0,
51
column: usize = 0,
52
first_nonspace: usize = 0,
53
first_nonspace_column: usize = 0,
54
indent: usize = 0,
55
blank: bool = false,
56
partially_consumed_tab: bool = false,
57
last_line_length: usize = 0,
58
total_size: usize = 0,
59
thematic_break_kill_pos: usize = 0,
60
61
special_chars: [256]bool = [_]bool{false} ** 256,
62
skip_chars: [256]bool = [_]bool{false} ** 256,
63
64
pub fn init(allocator: std.mem.Allocator, options: Options) !Parser {
65
const root = try nodes.AstNode.create(allocator, .{
66
.value = .Document,
67
.content = ArrayList(u8).init(allocator),
68
});
69
70
var parser = Parser{
71
.allocator = allocator,
72
.refmap = RefMap.init(allocator),
73
.hack_refmapKeys = ArrayList([]u8).init(allocator),
74
.root = root,
75
.current = root,
76
.options = options,
77
};
78
79
inlines.Subject.setCharsForOptions(&options, &parser.special_chars, &parser.skip_chars);
80
81
return parser;
82
}
83
84
pub fn deinit(self: *Parser) void {
85
var it = self.refmap.map.iterator();
86
while (it.next()) |entry| {
87
self.allocator.free(entry.key_ptr.*);
88
self.allocator.free(entry.value_ptr.url);
89
self.allocator.free(entry.value_ptr.title);
90
}
91
self.refmap.map.deinit();
92
}
93
94
pub fn feed(self: *Parser, s: []const u8) !void {
95
self.total_size +|= s.len;
96
var i: usize = 0;
97
const sz = s.len;
98
var linebuf = ArrayList(u8).init(self.allocator);
99
defer linebuf.deinit();
100
101
while (i < sz) {
102
var process = true;
103
var eol = i;
104
while (eol < sz) {
105
if (strings.isLineEndChar(s[eol]))
106
break;
107
if (s[eol] == 0) {
108
process = false;
109
break;
110
}
111
eol += 1;
112
}
113
114
if (process) {
115
if (linebuf.items.len != 0) {
116
try linebuf.appendSlice(s[i..eol]);
117
try self.processLine(linebuf.items);
118
linebuf.items.len = 0;
119
} else if (sz > eol and s[eol] == '\n') {
120
try self.processLine(s[i .. eol + 1]);
121
} else {
122
try self.processLine(s[i..eol]);
123
}
124
125
i = eol;
126
if (i < sz and s[i] == '\r') i += 1;
127
if (i < sz and s[i] == '\n') i += 1;
128
} else {
129
assert(eol < sz and s[eol] == 0);
130
try linebuf.appendSlice(s[i..eol]);
131
try linebuf.appendSlice("\u{fffd}");
132
i = eol + 1;
133
}
134
}
135
}
136
137
pub fn finish(self: *Parser) !*nodes.AstNode {
138
try self.finalizeDocument();
139
try self.postprocessTextNodes();
140
return self.root;
141
}
142
143
fn findFirstNonspace(self: *Parser, line: []const u8) void {
144
if (self.first_nonspace <= self.offset) {
145
self.first_nonspace = self.offset;
146
self.first_nonspace_column = self.column;
147
148
var chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
149
150
while (true) {
151
if (self.first_nonspace >= line.len) {
152
break;
153
}
154
switch (line[self.first_nonspace]) {
155
' ' => {
156
self.first_nonspace += 1;
157
self.first_nonspace_column += 1;
158
chars_to_tab -= 1;
159
if (chars_to_tab == 0) {
160
chars_to_tab = TAB_STOP;
161
}
162
},
163
9 => {
164
self.first_nonspace += 1;
165
self.first_nonspace_column += chars_to_tab;
166
chars_to_tab = TAB_STOP;
167
},
168
else => break,
169
}
170
}
171
}
172
173
self.indent = self.first_nonspace_column - self.column;
174
self.blank = self.first_nonspace < line.len and strings.isLineEndChar(line[self.first_nonspace]);
175
}
176
177
fn scanThematicBreak(self: *Parser, line: []const u8, matched: *usize) !bool {
178
if (self.thematic_break_kill_pos > self.first_nonspace) return false;
179
const result = try scanners.unwrap(scanners.thematicBreak(line[self.first_nonspace..]), matched);
180
if (!result) {
181
self.thematic_break_kill_pos = self.first_nonspace + 1;
182
}
183
return result;
184
}
185
186
fn processLine(self: *Parser, input: []const u8) !void {
187
var line: []const u8 = undefined;
188
var new_line: ?[]u8 = null;
189
if (input.len == 0 or !strings.isLineEndChar(input[input.len - 1])) {
190
new_line = try self.allocator.alloc(u8, input.len + 1);
191
std.mem.copyForwards(u8, new_line.?, input);
192
new_line.?[input.len] = '\n';
193
line = new_line.?;
194
} else {
195
line = input;
196
}
197
198
self.offset = 0;
199
self.column = 0;
200
self.first_nonspace = 0;
201
self.first_nonspace_column = 0;
202
self.blank = false;
203
self.thematic_break_kill_pos = 0;
204
self.partially_consumed_tab = false;
205
206
if (self.line_number == 0 and line.len >= 3 and std.mem.eql(u8, line[0..3], "\u{feff}")) {
207
self.offset += 3;
208
}
209
210
self.line_number += 1;
211
212
const result = try self.checkOpenBlocks(line);
213
if (result.container) |last_matched_container| {
214
const current = self.current;
215
const container = try self.openNewBlocks(last_matched_container, line, result.all_matched);
216
if (current == self.current) {
217
try self.addTextToContainer(container, last_matched_container, line);
218
}
219
}
220
221
self.last_line_length = line.len;
222
if (self.last_line_length > 0 and line[self.last_line_length - 1] == '\n') {
223
self.last_line_length -= 1;
224
}
225
if (self.last_line_length > 0 and line[self.last_line_length - 1] == '\r') {
226
self.last_line_length -= 1;
227
}
228
229
if (new_line) |nl| self.allocator.free(nl);
230
}
231
232
const CheckOpenBlocksResult = struct {
233
all_matched: bool = false,
234
container: ?*nodes.AstNode,
235
};
236
237
fn checkOpenBlocks(self: *Parser, line: []const u8) !CheckOpenBlocksResult {
238
const result = try self.checkOpenBlocksInner(self.root, line);
239
if (result.container) |container| {
240
return CheckOpenBlocksResult{
241
.all_matched = result.all_matched,
242
.container = if (result.all_matched) container else container.parent.?,
243
};
244
}
245
return result;
246
}
247
248
fn checkOpenBlocksInner(self: *Parser, start_container: *nodes.AstNode, line: []const u8) !CheckOpenBlocksResult {
249
var container = start_container;
250
251
while (container.lastChildIsOpen()) {
252
container = container.last_child.?;
253
self.findFirstNonspace(line);
254
255
switch (container.data.value) {
256
.BlockQuote => {
257
if (!self.parseBlockQuotePrefix(line)) {
258
return CheckOpenBlocksResult{ .container = container };
259
}
260
},
261
.Item => |*nl| {
262
if (!self.parseNodeItemPrefix(line, container, nl)) {
263
return CheckOpenBlocksResult{ .container = container };
264
}
265
},
266
.CodeBlock => {
267
switch (try self.parseCodeBlockPrefix(line, container)) {
268
.DoNotContinue => {
269
return CheckOpenBlocksResult{ .container = null };
270
},
271
.NoMatch => {
272
return CheckOpenBlocksResult{ .container = container };
273
},
274
.Match => {},
275
}
276
},
277
.HtmlBlock => |nhb| {
278
if (!self.parseHtmlBlockPrefix(nhb.block_type)) {
279
return CheckOpenBlocksResult{ .container = container };
280
}
281
},
282
.Paragraph => {
283
if (self.blank) {
284
return CheckOpenBlocksResult{ .container = container };
285
}
286
},
287
.Table => {
288
if (!(try table.matches(self.allocator, line[self.first_nonspace..]))) {
289
return CheckOpenBlocksResult{ .container = container };
290
}
291
},
292
.Heading, .TableRow, .TableCell => {
293
return CheckOpenBlocksResult{ .container = container };
294
},
295
.Document, .List, .ThematicBreak, .Text, .SoftBreak, .LineBreak, .Code, .HtmlInline, .Emph, .Strong, .Strikethrough, .Link, .Image => {},
296
}
297
}
298
299
return CheckOpenBlocksResult{
300
.all_matched = true,
301
.container = container,
302
};
303
}
304
305
fn openNewBlocks(self: *Parser, input_container: *nodes.AstNode, line: []const u8, all_matched: bool) !*nodes.AstNode {
306
var container = input_container;
307
var maybe_lazy = switch (self.current.data.value) {
308
.Paragraph => true,
309
else => false,
310
};
311
312
var matched: usize = undefined;
313
var nl: nodes.NodeList = undefined;
314
var sc: scanners.SetextChar = undefined;
315
316
while (switch (container.data.value) {
317
.CodeBlock, .HtmlBlock => false,
318
else => true,
319
}) {
320
self.findFirstNonspace(line);
321
const indented = self.indent >= CODE_INDENT;
322
323
if (!indented and line[self.first_nonspace] == '>') {
324
const offset = self.first_nonspace + 1 - self.offset;
325
self.advanceOffset(line, offset, false);
326
if (strings.isSpaceOrTab(line[self.offset])) {
327
self.advanceOffset(line, 1, true);
328
}
329
container = try self.addChild(container, .BlockQuote);
330
} else if (!indented and try scanners.unwrap(scanners.atxHeadingStart(line[self.first_nonspace..]), &matched)) {
331
const heading_startpos = self.first_nonspace;
332
const offset = self.offset;
333
self.advanceOffset(line, heading_startpos + matched - offset, false);
334
container = try self.addChild(container, .{ .Heading = .{} });
335
336
var hashpos = std.mem.indexOfScalar(u8, line[self.first_nonspace..], '#').? + self.first_nonspace;
337
var level: u8 = 0;
338
while (line[hashpos] == '#') {
339
if (level < 6)
340
level += 1;
341
hashpos += 1;
342
}
343
344
container.data.value = .{ .Heading = .{ .level = level, .setext = false } };
345
} else if (!indented and try scanners.unwrap(scanners.openCodeFence(line[self.first_nonspace..]), &matched)) {
346
const first_nonspace = self.first_nonspace;
347
const offset = self.offset;
348
const ncb = nodes.NodeCodeBlock{
349
.fenced = true,
350
.fence_char = line[first_nonspace],
351
.fence_length = matched,
352
.fence_offset = first_nonspace - offset,
353
.info = null,
354
.literal = ArrayList(u8).init(self.allocator),
355
};
356
container = try self.addChild(container, .{ .CodeBlock = ncb });
357
self.advanceOffset(line, first_nonspace + matched - offset, false);
358
} else if (!indented and ((try scanners.htmlBlockStart(line[self.first_nonspace..], &matched)) or switch (container.data.value) {
359
.Paragraph => false,
360
else => try scanners.htmlBlockStart7(line[self.first_nonspace..], &matched),
361
})) {
362
const nhb = nodes.NodeHtmlBlock{
363
.block_type = @truncate(matched),
364
.literal = ArrayList(u8).init(self.allocator),
365
};
366
container = try self.addChild(container, .{ .HtmlBlock = nhb });
367
} else if (!indented and switch (container.data.value) {
368
.Paragraph => try scanners.setextHeadingLine(line[self.first_nonspace..], &sc),
369
else => false,
370
}) {
371
const has_content = try self.resolveReferenceLinkDefinitions(&container.data.content);
372
if (has_content) {
373
container.data.value = .{
374
.Heading = .{
375
.level = switch (sc) {
376
.Equals => 1,
377
.Hyphen => 2,
378
},
379
.setext = true,
380
},
381
};
382
const adv = line.len - 1 - self.offset;
383
self.advanceOffset(line, adv, false);
384
}
385
} else if (!indented and !(switch (container.data.value) {
386
.Paragraph => !all_matched,
387
else => false,
388
}) and try self.scanThematicBreak(line, &matched)) {
389
container = try self.addChild(container, .ThematicBreak);
390
const adv = line.len - 1 - self.offset;
391
self.advanceOffset(line, adv, false);
392
} else if ((!indented or switch (container.data.value) {
393
.List => true,
394
else => false,
395
}) and self.indent < 4 and parseListMarker(line, self.first_nonspace, switch (container.data.value) {
396
.Paragraph => true,
397
else => false,
398
}, &matched, &nl)) {
399
const offset = self.first_nonspace + matched - self.offset;
400
self.advanceOffset(line, offset, false);
401
402
const save_partially_consumed_tab = self.partially_consumed_tab;
403
const save_offset = self.offset;
404
const save_column = self.column;
405
406
while (self.column - save_column <= 5 and strings.isSpaceOrTab(line[self.offset])) {
407
self.advanceOffset(line, 1, true);
408
}
409
410
const i = self.column - save_column;
411
if (i >= 5 or i < 1 or strings.isLineEndChar(line[self.offset])) {
412
nl.padding = matched + 1;
413
self.partially_consumed_tab = save_partially_consumed_tab;
414
self.offset = save_offset;
415
self.column = save_column;
416
if (i > 0)
417
self.advanceOffset(line, 1, true);
418
} else {
419
nl.padding = matched + i;
420
}
421
422
nl.marker_offset = self.indent;
423
424
if (switch (container.data.value) {
425
.List => |*mnl| !listsMatch(&nl, mnl),
426
else => true,
427
}) {
428
container = try self.addChild(container, .{ .List = nl });
429
}
430
431
container = try self.addChild(container, .{ .Item = nl });
432
} else if (indented and !maybe_lazy and !self.blank) {
433
self.advanceOffset(line, CODE_INDENT, true);
434
container = try self.addChild(container, .{
435
.CodeBlock = .{
436
.fenced = false,
437
.fence_char = 0,
438
.fence_length = 0,
439
.fence_offset = 0,
440
.info = null,
441
.literal = ArrayList(u8).init(self.allocator),
442
},
443
});
444
} else {
445
var replace: bool = undefined;
446
const new_container = if (!indented and self.options.extensions.table)
447
try table.tryOpeningBlock(self, container, line, &replace)
448
else
449
null;
450
451
if (new_container) |new| {
452
if (replace) {
453
container.insertAfter(new);
454
container.detachDeinit();
455
container = new;
456
} else {
457
container = new;
458
}
459
} else {
460
break;
461
}
462
}
463
464
if (container.data.value.acceptsLines()) {
465
break;
466
}
467
468
maybe_lazy = false;
469
}
470
471
return container;
472
}
473
474
pub fn addChild(self: *Parser, input_parent: *nodes.AstNode, value: nodes.NodeValue) !*nodes.AstNode {
475
var parent = input_parent;
476
while (!parent.data.value.canContainType(value)) {
477
parent = (try self.finalize(parent)).?;
478
}
479
480
const node = try nodes.AstNode.create(self.allocator, .{
481
.value = value,
482
.start_line = self.line_number,
483
.content = ArrayList(u8).init(self.allocator),
484
});
485
parent.append(node);
486
return node;
487
}
488
489
fn addTextToContainer(self: *Parser, input_container: *nodes.AstNode, last_matched_container: *nodes.AstNode, line: []const u8) !void {
490
var container = input_container;
491
self.findFirstNonspace(line);
492
493
if (self.blank) {
494
if (container.last_child) |last_child| {
495
last_child.data.last_line_blank = true;
496
}
497
}
498
499
container.data.last_line_blank = self.blank and
500
switch (container.data.value) {
501
.BlockQuote, .Heading, .ThematicBreak => false,
502
.CodeBlock => |ncb| !ncb.fenced,
503
.Item => container.first_child != null or container.data.start_line != self.line_number,
504
else => true,
505
};
506
507
var tmp = container;
508
while (tmp.parent) |parent| {
509
parent.data.last_line_blank = false;
510
tmp = parent;
511
}
512
513
if (self.current != last_matched_container and container == last_matched_container and !self.blank and self.current.data.value == .Paragraph) {
514
try self.addLine(self.current, line);
515
return;
516
}
517
518
while (self.current != last_matched_container) {
519
self.current = (try self.finalize(self.current)).?;
520
}
521
522
switch (container.data.value) {
523
.CodeBlock => {
524
try self.addLine(container, line);
525
},
526
.HtmlBlock => |nhb| {
527
try self.addLine(container, line);
528
const matches_end_condition = switch (nhb.block_type) {
529
1 => scanners.htmlBlockEnd1(line[self.first_nonspace..]),
530
2 => scanners.htmlBlockEnd2(line[self.first_nonspace..]),
531
3 => scanners.htmlBlockEnd3(line[self.first_nonspace..]),
532
4 => scanners.htmlBlockEnd4(line[self.first_nonspace..]),
533
5 => scanners.htmlBlockEnd5(line[self.first_nonspace..]),
534
else => false,
535
};
536
537
if (matches_end_condition) {
538
container = (try self.finalize(container)).?;
539
}
540
},
541
else => {
542
if (self.blank) {
543
// do nothing
544
} else if (container.data.value.acceptsLines()) {
545
var consider_line: []const u8 = line;
546
547
switch (container.data.value) {
548
.Heading => |nh| if (!nh.setext) {
549
consider_line = strings.chopTrailingHashtags(line);
550
},
551
else => {},
552
}
553
554
const count = self.first_nonspace - self.offset;
555
if (self.first_nonspace <= consider_line.len) {
556
self.advanceOffset(consider_line, count, false);
557
try self.addLine(container, consider_line);
558
}
559
} else {
560
container = try self.addChild(container, .Paragraph);
561
const count = self.first_nonspace - self.offset;
562
self.advanceOffset(line, count, false);
563
try self.addLine(container, line);
564
}
565
},
566
}
567
568
self.current = container;
569
}
570
571
fn addLine(self: *Parser, node: *nodes.AstNode, line: []const u8) !void {
572
assert(node.data.open);
573
if (self.partially_consumed_tab) {
574
self.offset += 1;
575
var chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
576
while (chars_to_tab > 0) : (chars_to_tab -= 1) {
577
try node.data.content.append(' ');
578
}
579
}
580
if (self.offset < line.len) {
581
try node.data.content.appendSlice(line[self.offset..]);
582
}
583
}
584
585
fn finalizeDocument(self: *Parser) !void {
586
while (self.current != self.root) {
587
self.current = (try self.finalize(self.current)).?;
588
}
589
590
_ = try self.finalize(self.root);
591
self.refmap.max_ref_size = @max(100_000, self.total_size);
592
try self.processInlines();
593
}
594
595
fn finalize(self: *Parser, node: *nodes.AstNode) !?*nodes.AstNode {
596
assert(node.data.open);
597
node.data.open = false;
598
const parent = node.parent;
599
600
switch (node.data.value) {
601
.Paragraph => {
602
const has_content = try self.resolveReferenceLinkDefinitions(&node.data.content);
603
if (!has_content) {
604
node.detachDeinit();
605
}
606
},
607
.CodeBlock => |*ncb| {
608
if (!ncb.fenced) {
609
strings.removeTrailingBlankLines(&node.data.content);
610
try node.data.content.append('\n');
611
} else {
612
var pos: usize = 0;
613
while (pos < node.data.content.items.len) : (pos += 1) {
614
if (strings.isLineEndChar(node.data.content.items[pos]))
615
break;
616
}
617
assert(pos < node.data.content.items.len);
618
619
const info = try strings.cleanUrl(self.allocator, node.data.content.items[0..pos]);
620
if (info.len != 0) {
621
ncb.info = info;
622
}
623
624
if (node.data.content.items[pos] == '\r') pos += 1;
625
if (node.data.content.items[pos] == '\n') pos += 1;
626
627
try node.data.content.replaceRange(0, pos, "");
628
}
629
std.mem.swap(ArrayList(u8), &ncb.literal, &node.data.content);
630
},
631
.HtmlBlock => |*nhb| {
632
std.mem.swap(ArrayList(u8), &nhb.literal, &node.data.content);
633
},
634
.List => |*nl| {
635
nl.tight = true;
636
var it = node.first_child;
637
638
while (it) |item| {
639
if (item.data.last_line_blank and item.next != null) {
640
nl.tight = false;
641
break;
642
}
643
644
var subit = item.first_child;
645
while (subit) |subitem| {
646
if (subitem.endsWithBlankLine() and (item.next != null or subitem.next != null)) {
647
nl.tight = false;
648
break;
649
}
650
subit = subitem.next;
651
}
652
653
if (!nl.tight) {
654
break;
655
}
656
657
it = item.next;
658
}
659
},
660
else => {},
661
}
662
663
return parent;
664
}
665
666
fn postprocessTextNodes(self: *Parser) !void {
667
var stack = try ArrayList(*nodes.AstNode).initCapacity(self.allocator, 1);
668
defer stack.deinit();
669
var children = ArrayList(*nodes.AstNode).init(self.allocator);
670
defer children.deinit();
671
672
try stack.append(self.root);
673
674
while (stack.pop()) |node| {
675
var nch = node.first_child;
676
677
while (nch) |n| {
678
var this_bracket = false;
679
680
while (true) {
681
switch (n.data.value) {
682
.Text => |*root| {
683
var ns = n.next orelse {
684
try self.postprocessTextNode(n, root);
685
break;
686
};
687
688
switch (ns.data.value) {
689
.Text => |adj| {
690
const old_len = root.len;
691
root.* = try self.allocator.realloc(root.*, old_len + adj.len);
692
@memcpy(root.*[old_len..], adj);
693
ns.detachDeinit();
694
},
695
else => {
696
try self.postprocessTextNode(n, root);
697
break;
698
},
699
}
700
},
701
.Link, .Image => {
702
this_bracket = true;
703
break;
704
},
705
else => break,
706
}
707
}
708
709
if (!this_bracket) {
710
try children.append(n);
711
}
712
713
nch = n.next;
714
}
715
716
while (children.pop()) |child| try stack.append(child);
717
}
718
}
719
720
fn postprocessTextNode(self: *Parser, node: *nodes.AstNode, text: *[]u8) !void {
721
if (self.options.extensions.autolink) {
722
try AutolinkProcessor.init(self.allocator, text).process(node);
723
}
724
}
725
726
fn resolveReferenceLinkDefinitions(self: *Parser, content: *ArrayList(u8)) !bool {
727
var seeked: usize = 0;
728
var pos: usize = undefined;
729
var seek = content.items;
730
731
while (seek.len > 0 and seek[0] == '[' and try self.parseReferenceInline(seek, &pos)) {
732
seek = seek[pos..];
733
seeked += pos;
734
}
735
736
try content.replaceRange(0, seeked, "");
737
738
return !strings.isBlank(content.items);
739
}
740
741
fn parseReferenceInline(self: *Parser, content: []const u8, pos: *usize) !bool {
742
var subj = inlines.Subject.init(self.allocator, &self.refmap, &self.options, &self.special_chars, &self.skip_chars, content);
743
defer subj.deinit();
744
745
const lab = if (subj.linkLabel()) |l| lab: {
746
if (l.len == 0)
747
return false;
748
break :lab l;
749
} else return false;
750
751
if (subj.peekChar() orelse 0 != ':')
752
return false;
753
754
subj.pos += 1;
755
subj.spnl();
756
757
var url: []const u8 = undefined;
758
var match_len: usize = undefined;
759
if (!inlines.Subject.manualScanLinkUrl(subj.input[subj.pos..], &url, &match_len))
760
return false;
761
subj.pos += match_len;
762
763
const beforetitle = subj.pos;
764
subj.spnl();
765
const title_search: ?usize = if (subj.pos == beforetitle)
766
null
767
else
768
try scanners.linkTitle(subj.input[subj.pos..]);
769
const title = if (title_search) |title_match| title: {
770
const t = subj.input[subj.pos .. subj.pos + title_match];
771
subj.pos += title_match;
772
break :title try self.allocator.dupe(u8, t);
773
} else title: {
774
subj.pos = beforetitle;
775
break :title &[_]u8{};
776
};
777
defer self.allocator.free(title);
778
779
subj.skipSpaces();
780
if (!subj.skipLineEnd()) {
781
if (title.len > 0) {
782
subj.pos = beforetitle;
783
subj.skipSpaces();
784
if (!subj.skipLineEnd()) {
785
return false;
786
}
787
} else {
788
return false;
789
}
790
}
791
792
const normalized = try strings.normalizeLabel(self.allocator, lab);
793
if (normalized.len > 0) {
794
// refmap takes ownership of `normalized'.
795
const result = try subj.refmap.map.getOrPut(normalized);
796
if (!result.found_existing) {
797
result.value_ptr.* = Reference{
798
.url = try strings.cleanUrl(self.allocator, url),
799
.title = try strings.cleanTitle(self.allocator, title),
800
};
801
} else {
802
self.allocator.free(normalized);
803
}
804
}
805
806
pos.* = subj.pos;
807
return true;
808
}
809
810
fn processInlines(self: *Parser) !void {
811
try self.processInlinesNode(self.root);
812
}
813
814
fn processInlinesNode(self: *Parser, node: *nodes.AstNode) inlines.ParseError!void {
815
var it = node.descendantsIterator();
816
while (it.next()) |descendant| {
817
if (descendant.data.value.containsInlines()) {
818
try self.parseInlines(descendant);
819
}
820
}
821
}
822
823
fn parseInlines(self: *Parser, node: *nodes.AstNode) inlines.ParseError!void {
824
const content = strings.rtrim(node.data.content.items);
825
var subj = inlines.Subject.init(self.allocator, &self.refmap, &self.options, &self.special_chars, &self.skip_chars, content);
826
defer subj.deinit();
827
while (try subj.parseInline(node)) {}
828
try subj.processEmphasis(0);
829
while (subj.popBracket()) {}
830
}
831
832
pub fn advanceOffset(self: *Parser, line: []const u8, in_count: usize, columns: bool) void {
833
var count = in_count;
834
while (count > 0) {
835
switch (line[self.offset]) {
836
'\t' => {
837
const chars_to_tab = TAB_STOP - (self.column % TAB_STOP);
838
if (columns) {
839
self.partially_consumed_tab = chars_to_tab > count;
840
const chars_to_advance = @min(count, chars_to_tab);
841
self.column += chars_to_advance;
842
self.offset += @as(u8, if (self.partially_consumed_tab) 0 else 1);
843
count -= chars_to_advance;
844
} else {
845
self.partially_consumed_tab = false;
846
self.column += chars_to_tab;
847
self.offset += 1;
848
count -= 1;
849
}
850
},
851
else => {
852
self.partially_consumed_tab = false;
853
self.offset += 1;
854
self.column += 1;
855
count -= 1;
856
},
857
}
858
}
859
}
860
861
fn parseBlockQuotePrefix(self: *Parser, line: []const u8) bool {
862
const indent = self.indent;
863
if (indent <= 3 and line[self.first_nonspace] == '>') {
864
self.advanceOffset(line, indent + 1, true);
865
866
if (strings.isSpaceOrTab(line[self.offset])) {
867
self.advanceOffset(line, 1, true);
868
}
869
870
return true;
871
}
872
873
return false;
874
}
875
876
fn parseNodeItemPrefix(self: *Parser, line: []const u8, container: *nodes.AstNode, nl: *const nodes.NodeList) bool {
877
if (self.indent >= nl.marker_offset + nl.padding) {
878
self.advanceOffset(line, nl.marker_offset + nl.padding, true);
879
return true;
880
} else if (self.blank and container.first_child != null) {
881
const offset = self.first_nonspace - self.offset;
882
self.advanceOffset(line, offset, false);
883
return true;
884
}
885
return false;
886
}
887
888
const CodeBlockPrefixParseResult = enum {
889
DoNotContinue,
890
NoMatch,
891
Match,
892
};
893
894
fn parseCodeBlockPrefix(self: *Parser, line: []const u8, container: *nodes.AstNode) !CodeBlockPrefixParseResult {
895
const ncb = switch (container.data.value) {
896
.CodeBlock => |i| i,
897
else => unreachable,
898
};
899
900
if (!ncb.fenced) {
901
if (self.indent >= CODE_INDENT) {
902
self.advanceOffset(line, CODE_INDENT, true);
903
return .Match;
904
} else if (self.blank) {
905
const offset = self.first_nonspace - self.offset;
906
self.advanceOffset(line, offset, false);
907
return .Match;
908
}
909
return .NoMatch;
910
}
911
912
const matched = if (self.indent <= 3 and line[self.first_nonspace] == ncb.fence_char)
913
(try scanners.closeCodeFence(line[self.first_nonspace..])) orelse 0
914
else
915
0;
916
917
if (matched >= ncb.fence_length) {
918
self.advanceOffset(line, matched, false);
919
self.current = (try self.finalize(container)).?;
920
return .DoNotContinue;
921
}
922
923
var i = ncb.fence_offset;
924
while (i > 0 and strings.isSpaceOrTab(line[self.offset])) : (i -= 1) {
925
self.advanceOffset(line, 1, true);
926
}
927
928
return .Match;
929
}
930
931
fn parseHtmlBlockPrefix(self: *Parser, t: u8) bool {
932
return switch (t) {
933
1, 2, 3, 4, 5 => true,
934
6, 7 => !self.blank,
935
else => unreachable,
936
};
937
}
938
939
fn parseListMarker(line: []const u8, input_pos: usize, interrupts_paragraph: bool, matched: *usize, nl: *nodes.NodeList) bool {
940
var pos = input_pos;
941
var c = line[pos];
942
const startpos = pos;
943
944
if (c == '*' or c == '-' or c == '+') {
945
pos += 1;
946
if (!ascii.isWhitespace(line[pos])) {
947
return false;
948
}
949
950
if (interrupts_paragraph) {
951
var i = pos;
952
while (strings.isSpaceOrTab(line[i])) : (i += 1) {}
953
if (line[i] == '\n') {
954
return false;
955
}
956
}
957
958
matched.* = pos - startpos;
959
nl.* = .{
960
.list_type = .Bullet,
961
.marker_offset = 0,
962
.padding = 0,
963
.start = 1,
964
.delimiter = .Period,
965
.bullet_char = c,
966
.tight = false,
967
};
968
return true;
969
}
970
971
if (ascii.isDigit(c)) {
972
var start: usize = 0;
973
var digits: u8 = 0;
974
975
while (digits < 9 and ascii.isDigit(line[pos])) {
976
start = (10 * start) + (line[pos] - '0');
977
pos += 1;
978
digits += 1;
979
}
980
981
if (interrupts_paragraph and start != 1) {
982
return false;
983
}
984
985
c = line[pos];
986
if (c != '.' and c != ')') {
987
return false;
988
}
989
990
pos += 1;
991
992
if (!ascii.isWhitespace(line[pos])) {
993
return false;
994
}
995
996
if (interrupts_paragraph) {
997
var i = pos;
998
while (strings.isSpaceOrTab(line[i])) : (i += 1) {}
999
if (strings.isLineEndChar(line[i])) {
1000
return false;
1001
}
1002
}
1003
1004
matched.* = pos - startpos;
1005
nl.* = .{
1006
.list_type = .Ordered,
1007
.marker_offset = 0,
1008
.padding = 0,
1009
.start = start,
1010
.delimiter = if (c == '.')
1011
.Period
1012
else
1013
.Paren,
1014
.bullet_char = 0,
1015
.tight = false,
1016
};
1017
return true;
1018
}
1019
1020
return false;
1021
}
1022
1023
fn listsMatch(list_data: *const nodes.NodeList, item_data: *const nodes.NodeList) bool {
1024
return list_data.list_type == item_data.list_type and list_data.delimiter == item_data.delimiter and list_data.bullet_char == item_data.bullet_char;
1025
}
1026
};
1027
1028
fn expectMarkdownHTML(options: Options, markdown: []const u8, html: []const u8) !void {
1029
const output = try main.testMarkdownToHtml(options, markdown);
1030
defer std.testing.allocator.free(output);
1031
try std.testing.expectEqualStrings(html, output);
1032
}
1033
1034
test "convert simple emphases" {
1035
try expectMarkdownHTML(.{},
1036
\\hello, _world_ __world__ ___world___ *_world_* **_world_** *__world__*
1037
\\
1038
\\this is `yummy`
1039
\\
1040
,
1041
\\<p>hello, <em>world</em> <strong>world</strong> <em><strong>world</strong></em> <em><em>world</em></em> <strong><em>world</em></strong> <em><strong>world</strong></em></p>
1042
\\<p>this is <code>yummy</code></p>
1043
\\
1044
);
1045
}
1046
1047
test "smart quotes" {
1048
try expectMarkdownHTML(.{ .parse = .{ .smart = true } }, "\"Hey,\" she said. \"What's 'up'?\"\n", "<p>“Hey,” she said. “What’s ‘up’?”</p>\n");
1049
}
1050
1051
test "handles EOF without EOL" {
1052
try expectMarkdownHTML(.{}, "hello", "<p>hello</p>\n");
1053
}
1054
1055
test "accepts multiple lines" {
1056
try expectMarkdownHTML(.{}, "hello\nthere\n", "<p>hello\nthere</p>\n");
1057
try expectMarkdownHTML(.{ .render = .{ .hard_breaks = true } }, "hello\nthere\n", "<p>hello<br />\nthere</p>\n");
1058
}
1059
1060
test "smart hyphens" {
1061
try expectMarkdownHTML(.{ .parse = .{ .smart = true } }, "hyphen - en -- em --- four ---- five ----- six ------ seven -------\n", "<p>hyphen - en – em — four –– five —– six —— seven —––</p>\n");
1062
}
1063
1064
test "handles tabs" {
1065
try expectMarkdownHTML(.{}, "\tfoo\tbaz\t\tbim\n", "<pre><code>foo\tbaz\t\tbim\n</code></pre>\n");
1066
try expectMarkdownHTML(.{}, " \tfoo\tbaz\t\tbim\n", "<pre><code>foo\tbaz\t\tbim\n</code></pre>\n");
1067
try expectMarkdownHTML(.{}, " - foo\n\n\tbar\n", "<ul>\n<li>\n<p>foo</p>\n<p>bar</p>\n</li>\n</ul>\n");
1068
try expectMarkdownHTML(.{}, "#\tFoo\n", "<h1>Foo</h1>\n");
1069
try expectMarkdownHTML(.{}, "*\t*\t*\t\n", "<hr />\n");
1070
}
1071
1072
test "escapes" {
1073
try expectMarkdownHTML(.{}, "\\## foo\n", "<p>## foo</p>\n");
1074
}
1075
1076
test "setext heading override pointy" {
1077
try expectMarkdownHTML(.{}, "<a title=\"a lot\n---\nof dashes\"/>\n", "<h2><a title="a lot</h2>\n<p>of dashes"/></p>\n");
1078
}
1079
1080
test "fenced code blocks" {
1081
try expectMarkdownHTML(.{}, "```\n<\n >\n```\n", "<pre><code><\n >\n</code></pre>\n");
1082
try expectMarkdownHTML(.{}, "````\naaa\n```\n``````\n", "<pre><code>aaa\n```\n</code></pre>\n");
1083
}
1084
1085
test "html blocks" {
1086
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1087
\\_world_.
1088
\\</pre>
1089
,
1090
\\<p><em>world</em>.
1091
\\</pre></p>
1092
\\
1093
);
1094
1095
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1096
\\<table><tr><td>
1097
\\<pre>
1098
\\**Hello**,
1099
\\
1100
\\_world_.
1101
\\</pre>
1102
\\</td></tr></table>
1103
,
1104
\\<table><tr><td>
1105
\\<pre>
1106
\\**Hello**,
1107
\\<p><em>world</em>.
1108
\\</pre></p>
1109
\\</td></tr></table>
1110
\\
1111
);
1112
1113
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1114
\\<DIV CLASS="foo">
1115
\\
1116
\\*Markdown*
1117
\\
1118
\\</DIV>
1119
,
1120
\\<DIV CLASS="foo">
1121
\\<p><em>Markdown</em></p>
1122
\\</DIV>
1123
\\
1124
);
1125
1126
try expectMarkdownHTML(.{ .render = .{ .unsafe = true } },
1127
\\<pre language="haskell"><code>
1128
\\import Text.HTML.TagSoup
1129
\\
1130
\\main :: IO ()
1131
\\main = print $ parseTags tags
1132
\\</code></pre>
1133
\\okay
1134
\\
1135
,
1136
\\<pre language="haskell"><code>
1137
\\import Text.HTML.TagSoup
1138
\\
1139
\\main :: IO ()
1140
\\main = print $ parseTags tags
1141
\\</code></pre>
1142
\\<p>okay</p>
1143
\\
1144
);
1145
}
1146
1147
test "links" {
1148
try expectMarkdownHTML(.{}, "[foo](/url)\n", "<p><a href=\"/url\">foo</a></p>\n");
1149
try expectMarkdownHTML(.{}, "[foo](/url \"title\")\n", "<p><a href=\"/url\" title=\"title\">foo</a></p>\n");
1150
}
1151
1152
test "link reference definitions" {
1153
try expectMarkdownHTML(.{}, "[foo]: /url \"title\"\n\n[foo]\n", "<p><a href=\"/url\" title=\"title\">foo</a></p>\n");
1154
try expectMarkdownHTML(.{}, "[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]\n", "<p><a href=\"/url%5Cbar*baz\" title=\"foo"bar\\baz\">foo</a></p>\n");
1155
}
1156
1157
test "tables" {
1158
try expectMarkdownHTML(.{ .extensions = .{ .table = true } },
1159
\\| foo | bar |
1160
\\| --- | --- |
1161
\\| baz | bim |
1162
\\
1163
,
1164
\\<table>
1165
\\<thead>
1166
\\<tr>
1167
\\<th>foo</th>
1168
\\<th>bar</th>
1169
\\</tr>
1170
\\</thead>
1171
\\<tbody>
1172
\\<tr>
1173
\\<td>baz</td>
1174
\\<td>bim</td>
1175
\\</tr>
1176
\\</tbody>
1177
\\</table>
1178
\\
1179
);
1180
}
1181
1182
test "strikethroughs" {
1183
try expectMarkdownHTML(.{ .extensions = .{ .strikethrough = true } }, "Hello ~world~ there.\n", "<p>Hello <del>world</del> there.</p>\n");
1184
}
1185
1186
test "images" {
1187
try expectMarkdownHTML(.{}, "[](/uri)\n", "<p><a href=\"/uri\"><img src=\"moon.jpg\" alt=\"moon\" /></a></p>\n");
1188
}
1189
1190
test "autolink" {
1191
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "www.commonmark.org\n", "<p><a href=\"http://www.commonmark.org\">www.commonmark.org</a></p>\n");
1192
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "http://commonmark.org\n", "<p><a href=\"http://commonmark.org\">http://commonmark.org</a></p>\n");
1193
try expectMarkdownHTML(.{ .extensions = .{ .autolink = true } }, "foo@bar.baz\n", "<p><a href=\"mailto:foo@bar.baz\">foo@bar.baz</a></p>\n");
1194
}
1195
1196
test "header anchors" {
1197
try expectMarkdownHTML(.{ .render = .{ .header_anchors = true } },
1198
\\# Hi.
1199
\\## Hi 1.
1200
\\### Hi.
1201
\\#### Hello.
1202
\\##### Hi.
1203
\\###### Hello.
1204
\\# Isn't it grand?
1205
\\
1206
,
1207
\\<h1><a href="#hi" id="hi"></a>Hi.</h1>
1208
\\<h2><a href="#hi-1" id="hi-1"></a>Hi 1.</h2>
1209
\\<h3><a href="#hi-2" id="hi-2"></a>Hi.</h3>
1210
\\<h4><a href="#hello" id="hello"></a>Hello.</h4>
1211
\\<h5><a href="#hi-3" id="hi-3"></a>Hi.</h5>
1212
\\<h6><a href="#hello-1" id="hello-1"></a>Hello.</h6>
1213
\\<h1><a href="#isnt-it-grand" id="isnt-it-grand"></a>Isn't it grand?</h1>
1214
\\
1215
);
1216
}
1217
1218
test "image lazy loading" {
1219
try expectMarkdownHTML(.{ .render = .{ .lazy_load_images = true } }, "", "<p><img loading=\"lazy\" src=\"kass.gif\" alt=\"awawa\" /></p>\n");
1220
}
1221