node.go (view raw)
1package blackfriday
2
3import (
4 "bytes"
5 "fmt"
6)
7
8type NodeType int
9
10const (
11 Document NodeType = iota
12 BlockQuote
13 List
14 Item
15 Paragraph
16 Header
17 HorizontalRule
18 Emph
19 Strong
20 Del
21 Link
22 Image
23 Text
24 HTMLBlock
25 CodeBlock
26 Softbreak
27 Hardbreak
28 Code
29 HTMLSpan
30 Table
31 TableCell
32 TableHead
33 TableBody
34 TableRow
35)
36
37var nodeTypeNames = []string{
38 Document: "Document",
39 BlockQuote: "BlockQuote",
40 List: "List",
41 Item: "Item",
42 Paragraph: "Paragraph",
43 Header: "Header",
44 HorizontalRule: "HorizontalRule",
45 Emph: "Emph",
46 Strong: "Strong",
47 Del: "Del",
48 Link: "Link",
49 Image: "Image",
50 Text: "Text",
51 HTMLBlock: "HTMLBlock",
52 CodeBlock: "CodeBlock",
53 Softbreak: "Softbreak",
54 Hardbreak: "Hardbreak",
55 Code: "Code",
56 HTMLSpan: "HTMLSpan",
57 Table: "Table",
58 TableCell: "TableCell",
59 TableHead: "TableHead",
60 TableBody: "TableBody",
61 TableRow: "TableRow",
62}
63
64func (t NodeType) String() string {
65 return nodeTypeNames[t]
66}
67
68type ListData struct {
69 ListFlags ListType
70 Tight bool // Skip <p>s around list item data if true
71 BulletChar byte // '*', '+' or '-' in bullet lists
72 Delimiter byte // '.' or ')' after the number in ordered lists
73 RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
74}
75
76type LinkData struct {
77 Destination []byte
78 Title []byte
79 NoteID int
80}
81
82type CodeBlockData struct {
83 IsFenced bool // Specifies whether it's a fenced code block or an indented one
84 Info []byte // This holds the info string
85 FenceChar byte
86 FenceLength int
87 FenceOffset int
88}
89
90type TableCellData struct {
91 IsHeader bool // This tells if it's under the header row
92 Align CellAlignFlags // This holds the value for align attribute
93}
94
95type HeaderData struct {
96 Level int // This holds the heading level number
97 HeaderID string // This might hold header ID, if present
98 IsTitleblock bool // Specifies whether it's a title block
99}
100
101// Node is a single element in the abstract syntax tree of the parsed document.
102// It holds connections to the structurally neighboring nodes and, for certain
103// types of nodes, additional information that might be needed when rendering.
104type Node struct {
105 Type NodeType // Determines the type of the node
106 Parent *Node // Points to the parent
107 FirstChild *Node // Points to the first child, if any
108 LastChild *Node // Points to the last child, if any
109 Prev *Node // Previous sibling; nil if it's the first child
110 Next *Node // Next sibling; nil if it's the last child
111
112 Literal []byte // Text contents of the leaf nodes
113
114 HeaderData // Populated if Type == Header
115 ListData // Populated if Type == List
116 CodeBlockData // Populated if Type == CodeBlock
117 LinkData // Populated if Type == Link
118 TableCellData // Populated if Type == TableCell
119
120 content []byte // Markdown content of the block nodes
121 open bool // Specifies an open block node that has not been finished to process yet
122}
123
124func NewNode(typ NodeType) *Node {
125 return &Node{
126 Type: typ,
127 open: true,
128 }
129}
130
131func (n *Node) unlink() {
132 if n.Prev != nil {
133 n.Prev.Next = n.Next
134 } else if n.Parent != nil {
135 n.Parent.FirstChild = n.Next
136 }
137 if n.Next != nil {
138 n.Next.Prev = n.Prev
139 } else if n.Parent != nil {
140 n.Parent.LastChild = n.Prev
141 }
142 n.Parent = nil
143 n.Next = nil
144 n.Prev = nil
145}
146
147func (n *Node) appendChild(child *Node) {
148 child.unlink()
149 child.Parent = n
150 if n.LastChild != nil {
151 n.LastChild.Next = child
152 child.Prev = n.LastChild
153 n.LastChild = child
154 } else {
155 n.FirstChild = child
156 n.LastChild = child
157 }
158}
159
160func (n *Node) insertBefore(sibling *Node) {
161 sibling.unlink()
162 sibling.Prev = n.Prev
163 if sibling.Prev != nil {
164 sibling.Prev.Next = sibling
165 }
166 sibling.Next = n
167 n.Prev = sibling
168 sibling.Parent = n.Parent
169 if sibling.Prev == nil {
170 sibling.Parent.FirstChild = sibling
171 }
172}
173
174func (n *Node) isContainer() bool {
175 switch n.Type {
176 case Document:
177 fallthrough
178 case BlockQuote:
179 fallthrough
180 case List:
181 fallthrough
182 case Item:
183 fallthrough
184 case Paragraph:
185 fallthrough
186 case Header:
187 fallthrough
188 case Emph:
189 fallthrough
190 case Strong:
191 fallthrough
192 case Del:
193 fallthrough
194 case Link:
195 fallthrough
196 case Image:
197 fallthrough
198 case Table:
199 fallthrough
200 case TableHead:
201 fallthrough
202 case TableBody:
203 fallthrough
204 case TableRow:
205 fallthrough
206 case TableCell:
207 return true
208 default:
209 return false
210 }
211 return false
212}
213
214func (n *Node) canContain(t NodeType) bool {
215 if n.Type == List {
216 return t == Item
217 }
218 if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
219 return t != Item
220 }
221 if n.Type == Table {
222 return t == TableHead || t == TableBody
223 }
224 if n.Type == TableHead || n.Type == TableBody {
225 return t == TableRow
226 }
227 if n.Type == TableRow {
228 return t == TableCell
229 }
230 return false
231}
232
233// WalkStatus allows NodeVisitor to have some control over the tree traversal.
234// It is returned from NodeVisitor and different values allow Node.Walk to
235// decide which node to go to next.
236type WalkStatus int
237
238const (
239 GoToNext WalkStatus = iota // The default traversal of every node.
240 SkipChildren // Skips all children of current node.
241 Terminate // Terminates the traversal.
242)
243
244// NodeVisitor is a callback to be called when traversing the syntax tree.
245// Called twice for every node: once with entering=true when the branch is
246// first visited, then with entering=false after all the children are done.
247type NodeVisitor func(node *Node, entering bool) WalkStatus
248
249func (root *Node) Walk(visitor NodeVisitor) {
250 walker := NewNodeWalker(root)
251 node, entering := walker.next()
252 for node != nil {
253 status := visitor(node, entering)
254 switch status {
255 case GoToNext:
256 node, entering = walker.next()
257 case SkipChildren:
258 node, entering = walker.resumeAt(node, false)
259 case Terminate:
260 return
261 }
262 }
263}
264
265type NodeWalker struct {
266 current *Node
267 root *Node
268 entering bool
269}
270
271func NewNodeWalker(root *Node) *NodeWalker {
272 return &NodeWalker{
273 current: root,
274 root: nil,
275 entering: true,
276 }
277}
278
279func (nw *NodeWalker) next() (*Node, bool) {
280 if nw.current == nil {
281 return nil, false
282 }
283 if nw.root == nil {
284 nw.root = nw.current
285 return nw.current, nw.entering
286 }
287 if nw.entering && nw.current.isContainer() {
288 if nw.current.FirstChild != nil {
289 nw.current = nw.current.FirstChild
290 nw.entering = true
291 } else {
292 nw.entering = false
293 }
294 } else if nw.current.Next == nil {
295 nw.current = nw.current.Parent
296 nw.entering = false
297 } else {
298 nw.current = nw.current.Next
299 nw.entering = true
300 }
301 if nw.current == nw.root {
302 return nil, false
303 }
304 return nw.current, nw.entering
305}
306
307func (nw *NodeWalker) resumeAt(node *Node, entering bool) (*Node, bool) {
308 nw.current = node
309 nw.entering = entering
310 return nw.next()
311}
312
313func (ast *Node) String() string {
314 return dumpString(ast)
315}
316
317func dump(ast *Node) {
318 fmt.Println(dumpString(ast))
319}
320
321func dump_r(ast *Node, depth int) string {
322 if ast == nil {
323 return ""
324 }
325 indent := bytes.Repeat([]byte("\t"), depth)
326 content := ast.Literal
327 if content == nil {
328 content = ast.content
329 }
330 result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
331 for n := ast.FirstChild; n != nil; n = n.Next {
332 result += dump_r(n, depth+1)
333 }
334 return result
335}
336
337func dumpString(ast *Node) string {
338 return dump_r(ast, 0)
339}