node.go (view raw)
1package blackfriday
2
3import (
4 "bytes"
5 "fmt"
6)
7
8// NodeType specifies a type of a single node of a syntax tree. Usually one
9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
10// or code block.
11type NodeType int
12
13// Constants for identifying different types of nodes. See NodeType.
14const (
15 Document NodeType = iota
16 BlockQuote
17 List
18 Item
19 Paragraph
20 Header
21 HorizontalRule
22 Emph
23 Strong
24 Del
25 Link
26 Image
27 Text
28 HTMLBlock
29 CodeBlock
30 Softbreak
31 Hardbreak
32 Code
33 HTMLSpan
34 Table
35 TableCell
36 TableHead
37 TableBody
38 TableRow
39)
40
41var nodeTypeNames = []string{
42 Document: "Document",
43 BlockQuote: "BlockQuote",
44 List: "List",
45 Item: "Item",
46 Paragraph: "Paragraph",
47 Header: "Header",
48 HorizontalRule: "HorizontalRule",
49 Emph: "Emph",
50 Strong: "Strong",
51 Del: "Del",
52 Link: "Link",
53 Image: "Image",
54 Text: "Text",
55 HTMLBlock: "HTMLBlock",
56 CodeBlock: "CodeBlock",
57 Softbreak: "Softbreak",
58 Hardbreak: "Hardbreak",
59 Code: "Code",
60 HTMLSpan: "HTMLSpan",
61 Table: "Table",
62 TableCell: "TableCell",
63 TableHead: "TableHead",
64 TableBody: "TableBody",
65 TableRow: "TableRow",
66}
67
68func (t NodeType) String() string {
69 return nodeTypeNames[t]
70}
71
72// ListData contains fields relevant to a List node type.
73type ListData struct {
74 ListFlags ListType
75 Tight bool // Skip <p>s around list item data if true
76 BulletChar byte // '*', '+' or '-' in bullet lists
77 Delimiter byte // '.' or ')' after the number in ordered lists
78 RefLink []byte // If not nil, turns this list item into a footnote item and triggers different rendering
79}
80
81// LinkData contains fields relevant to a Link node type.
82type LinkData struct {
83 Destination []byte
84 Title []byte
85 NoteID int
86}
87
88// CodeBlockData contains fields relevant to a CodeBlock node type.
89type CodeBlockData struct {
90 IsFenced bool // Specifies whether it's a fenced code block or an indented one
91 Info []byte // This holds the info string
92 FenceChar byte
93 FenceLength int
94 FenceOffset int
95}
96
97// TableCellData contains fields relevant to a TableCell node type.
98type TableCellData struct {
99 IsHeader bool // This tells if it's under the header row
100 Align CellAlignFlags // This holds the value for align attribute
101}
102
103// HeaderData contains fields relevant to a Header node type.
104type HeaderData struct {
105 Level int // This holds the heading level number
106 HeaderID string // This might hold header ID, if present
107 IsTitleblock bool // Specifies whether it's a title block
108}
109
110// Node is a single element in the abstract syntax tree of the parsed document.
111// It holds connections to the structurally neighboring nodes and, for certain
112// types of nodes, additional information that might be needed when rendering.
113type Node struct {
114 Type NodeType // Determines the type of the node
115 Parent *Node // Points to the parent
116 FirstChild *Node // Points to the first child, if any
117 LastChild *Node // Points to the last child, if any
118 Prev *Node // Previous sibling; nil if it's the first child
119 Next *Node // Next sibling; nil if it's the last child
120
121 Literal []byte // Text contents of the leaf nodes
122
123 HeaderData // Populated if Type is Header
124 ListData // Populated if Type is List
125 CodeBlockData // Populated if Type is CodeBlock
126 LinkData // Populated if Type is Link
127 TableCellData // Populated if Type is TableCell
128
129 content []byte // Markdown content of the block nodes
130 open bool // Specifies an open block node that has not been finished to process yet
131}
132
133// NewNode allocates a node of a specified type.
134func NewNode(typ NodeType) *Node {
135 return &Node{
136 Type: typ,
137 open: true,
138 }
139}
140
141func (n *Node) String() string {
142 ellipsis := ""
143 snippet := n.Literal
144 if len(snippet) > 16 {
145 snippet = snippet[:16]
146 ellipsis = "..."
147 }
148 return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
149}
150
151func (n *Node) unlink() {
152 if n.Prev != nil {
153 n.Prev.Next = n.Next
154 } else if n.Parent != nil {
155 n.Parent.FirstChild = n.Next
156 }
157 if n.Next != nil {
158 n.Next.Prev = n.Prev
159 } else if n.Parent != nil {
160 n.Parent.LastChild = n.Prev
161 }
162 n.Parent = nil
163 n.Next = nil
164 n.Prev = nil
165}
166
167func (n *Node) appendChild(child *Node) {
168 child.unlink()
169 child.Parent = n
170 if n.LastChild != nil {
171 n.LastChild.Next = child
172 child.Prev = n.LastChild
173 n.LastChild = child
174 } else {
175 n.FirstChild = child
176 n.LastChild = child
177 }
178}
179
180func (n *Node) insertBefore(sibling *Node) {
181 sibling.unlink()
182 sibling.Prev = n.Prev
183 if sibling.Prev != nil {
184 sibling.Prev.Next = sibling
185 }
186 sibling.Next = n
187 n.Prev = sibling
188 sibling.Parent = n.Parent
189 if sibling.Prev == nil {
190 sibling.Parent.FirstChild = sibling
191 }
192}
193
194func (n *Node) isContainer() bool {
195 switch n.Type {
196 case Document:
197 fallthrough
198 case BlockQuote:
199 fallthrough
200 case List:
201 fallthrough
202 case Item:
203 fallthrough
204 case Paragraph:
205 fallthrough
206 case Header:
207 fallthrough
208 case Emph:
209 fallthrough
210 case Strong:
211 fallthrough
212 case Del:
213 fallthrough
214 case Link:
215 fallthrough
216 case Image:
217 fallthrough
218 case Table:
219 fallthrough
220 case TableHead:
221 fallthrough
222 case TableBody:
223 fallthrough
224 case TableRow:
225 fallthrough
226 case TableCell:
227 return true
228 default:
229 return false
230 }
231}
232
233func (n *Node) canContain(t NodeType) bool {
234 if n.Type == List {
235 return t == Item
236 }
237 if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
238 return t != Item
239 }
240 if n.Type == Table {
241 return t == TableHead || t == TableBody
242 }
243 if n.Type == TableHead || n.Type == TableBody {
244 return t == TableRow
245 }
246 if n.Type == TableRow {
247 return t == TableCell
248 }
249 return false
250}
251
252// WalkStatus allows NodeVisitor to have some control over the tree traversal.
253// It is returned from NodeVisitor and different values allow Node.Walk to
254// decide which node to go to next.
255type WalkStatus int
256
257const (
258 // GoToNext is the default traversal of every node.
259 GoToNext WalkStatus = iota
260 // SkipChildren tells walker to skip all children of current node.
261 SkipChildren
262 // Terminate tells walker to terminate the traversal.
263 Terminate
264)
265
266// NodeVisitor is a callback to be called when traversing the syntax tree.
267// Called twice for every node: once with entering=true when the branch is
268// first visited, then with entering=false after all the children are done.
269type NodeVisitor func(node *Node, entering bool) WalkStatus
270
271// Walk is a convenience method that instantiates a walker and starts a
272// traversal of subtree rooted at n.
273func (root *Node) Walk(visitor NodeVisitor) {
274 w := newNodeWalker(root)
275 for w.current != nil {
276 status := visitor(w.current, w.entering)
277 switch status {
278 case GoToNext:
279 w.next()
280 case SkipChildren:
281 w.entering = false
282 w.next()
283 case Terminate:
284 return
285 }
286 }
287}
288
289type nodeWalker struct {
290 current *Node
291 root *Node
292 entering bool
293}
294
295func newNodeWalker(root *Node) *nodeWalker {
296 return &nodeWalker{
297 current: root,
298 root: root,
299 entering: true,
300 }
301}
302
303func (nw *nodeWalker) next() {
304 if !nw.entering && nw.current == nw.root {
305 nw.current = nil
306 return
307 }
308 if nw.entering && nw.current.isContainer() {
309 if nw.current.FirstChild != nil {
310 nw.current = nw.current.FirstChild
311 nw.entering = true
312 } else {
313 nw.entering = false
314 }
315 } else if nw.current.Next == nil {
316 nw.current = nw.current.Parent
317 nw.entering = false
318 } else {
319 nw.current = nw.current.Next
320 nw.entering = true
321 }
322}
323
324func dump(ast *Node) {
325 fmt.Println(dumpString(ast))
326}
327
328func dumpR(ast *Node, depth int) string {
329 if ast == nil {
330 return ""
331 }
332 indent := bytes.Repeat([]byte("\t"), depth)
333 content := ast.Literal
334 if content == nil {
335 content = ast.content
336 }
337 result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
338 for n := ast.FirstChild; n != nil; n = n.Next {
339 result += dumpR(n, depth+1)
340 }
341 return result
342}
343
344func dumpString(ast *Node) string {
345 return dumpR(ast, 0)
346}