all repos — grayfriday @ d9ffdb74649c9a714d43540d0f9dc596f742ebba

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Header
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Header:         "Header",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List and Item node type.
 73type ListData struct {
 74	ListFlags       ListType
 75	Tight           bool   // Skip <p>s around list item data if true
 76	BulletChar      byte   // '*', '+' or '-' in bullet lists
 77	Delimiter       byte   // '.' or ')' after the number in ordered lists
 78	RefLink         []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79	IsFootnotesList bool   // This is a list of footnotes
 80}
 81
 82// LinkData contains fields relevant to a Link node type.
 83type LinkData struct {
 84	Destination []byte // Destination is what goes into a href
 85	Title       []byte // Title is the tooltip thing that goes in a title attribute
 86	NoteID      int    // NoteID contains a serial number of a footnote, zero if it's not a footnote
 87}
 88
 89// CodeBlockData contains fields relevant to a CodeBlock node type.
 90type CodeBlockData struct {
 91	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 92	Info        []byte // This holds the info string
 93	FenceChar   byte
 94	FenceLength int
 95	FenceOffset int
 96}
 97
 98// TableCellData contains fields relevant to a TableCell node type.
 99type TableCellData struct {
100	IsHeader bool           // This tells if it's under the header row
101	Align    CellAlignFlags // This holds the value for align attribute
102}
103
104// HeaderData contains fields relevant to a Header node type.
105type HeaderData struct {
106	Level        int    // This holds the heading level number
107	HeaderID     string // This might hold header ID, if present
108	IsTitleblock bool   // Specifies whether it's a title block
109}
110
111// Node is a single element in the abstract syntax tree of the parsed document.
112// It holds connections to the structurally neighboring nodes and, for certain
113// types of nodes, additional information that might be needed when rendering.
114type Node struct {
115	Type       NodeType // Determines the type of the node
116	Parent     *Node    // Points to the parent
117	FirstChild *Node    // Points to the first child, if any
118	LastChild  *Node    // Points to the last child, if any
119	Prev       *Node    // Previous sibling; nil if it's the first child
120	Next       *Node    // Next sibling; nil if it's the last child
121
122	Literal []byte // Text contents of the leaf nodes
123
124	HeaderData    // Populated if Type is Header
125	ListData      // Populated if Type is List
126	CodeBlockData // Populated if Type is CodeBlock
127	LinkData      // Populated if Type is Link
128	TableCellData // Populated if Type is TableCell
129
130	content []byte // Markdown content of the block nodes
131	open    bool   // Specifies an open block node that has not been finished to process yet
132}
133
134// NewNode allocates a node of a specified type.
135func NewNode(typ NodeType) *Node {
136	return &Node{
137		Type: typ,
138		open: true,
139	}
140}
141
142func (n *Node) String() string {
143	ellipsis := ""
144	snippet := n.Literal
145	if len(snippet) > 16 {
146		snippet = snippet[:16]
147		ellipsis = "..."
148	}
149	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
150}
151
152// Unlink removes node 'n' from the tree.
153// It panics if the node is nil.
154func (n *Node) Unlink() {
155	if n.Prev != nil {
156		n.Prev.Next = n.Next
157	} else if n.Parent != nil {
158		n.Parent.FirstChild = n.Next
159	}
160	if n.Next != nil {
161		n.Next.Prev = n.Prev
162	} else if n.Parent != nil {
163		n.Parent.LastChild = n.Prev
164	}
165	n.Parent = nil
166	n.Next = nil
167	n.Prev = nil
168}
169
170// AppendChild adds a node 'child' as a child of 'n'.
171// It panics if either node is nil.
172func (n *Node) AppendChild(child *Node) {
173	child.Unlink()
174	child.Parent = n
175	if n.LastChild != nil {
176		n.LastChild.Next = child
177		child.Prev = n.LastChild
178		n.LastChild = child
179	} else {
180		n.FirstChild = child
181		n.LastChild = child
182	}
183}
184
185// InsertBefore inserts 'sibling' immediately before 'n'.
186// It panics if either node is nil.
187func (n *Node) InsertBefore(sibling *Node) {
188	sibling.Unlink()
189	sibling.Prev = n.Prev
190	if sibling.Prev != nil {
191		sibling.Prev.Next = sibling
192	}
193	sibling.Next = n
194	n.Prev = sibling
195	sibling.Parent = n.Parent
196	if sibling.Prev == nil {
197		sibling.Parent.FirstChild = sibling
198	}
199}
200
201func (n *Node) isContainer() bool {
202	switch n.Type {
203	case Document:
204		fallthrough
205	case BlockQuote:
206		fallthrough
207	case List:
208		fallthrough
209	case Item:
210		fallthrough
211	case Paragraph:
212		fallthrough
213	case Header:
214		fallthrough
215	case Emph:
216		fallthrough
217	case Strong:
218		fallthrough
219	case Del:
220		fallthrough
221	case Link:
222		fallthrough
223	case Image:
224		fallthrough
225	case Table:
226		fallthrough
227	case TableHead:
228		fallthrough
229	case TableBody:
230		fallthrough
231	case TableRow:
232		fallthrough
233	case TableCell:
234		return true
235	default:
236		return false
237	}
238}
239
240func (n *Node) canContain(t NodeType) bool {
241	if n.Type == List {
242		return t == Item
243	}
244	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
245		return t != Item
246	}
247	if n.Type == Table {
248		return t == TableHead || t == TableBody
249	}
250	if n.Type == TableHead || n.Type == TableBody {
251		return t == TableRow
252	}
253	if n.Type == TableRow {
254		return t == TableCell
255	}
256	return false
257}
258
259// WalkStatus allows NodeVisitor to have some control over the tree traversal.
260// It is returned from NodeVisitor and different values allow Node.Walk to
261// decide which node to go to next.
262type WalkStatus int
263
264const (
265	// GoToNext is the default traversal of every node.
266	GoToNext WalkStatus = iota
267	// SkipChildren tells walker to skip all children of current node.
268	SkipChildren
269	// Terminate tells walker to terminate the traversal.
270	Terminate
271)
272
273// NodeVisitor is a callback to be called when traversing the syntax tree.
274// Called twice for every node: once with entering=true when the branch is
275// first visited, then with entering=false after all the children are done.
276type NodeVisitor func(node *Node, entering bool) WalkStatus
277
278// Walk is a convenience method that instantiates a walker and starts a
279// traversal of subtree rooted at n.
280func (root *Node) Walk(visitor NodeVisitor) {
281	w := newNodeWalker(root)
282	for w.current != nil {
283		status := visitor(w.current, w.entering)
284		switch status {
285		case GoToNext:
286			w.next()
287		case SkipChildren:
288			w.entering = false
289			w.next()
290		case Terminate:
291			return
292		}
293	}
294}
295
296type nodeWalker struct {
297	current  *Node
298	root     *Node
299	entering bool
300}
301
302func newNodeWalker(root *Node) *nodeWalker {
303	return &nodeWalker{
304		current:  root,
305		root:     root,
306		entering: true,
307	}
308}
309
310func (nw *nodeWalker) next() {
311	if !nw.entering && nw.current == nw.root {
312		nw.current = nil
313		return
314	}
315	if nw.entering && nw.current.isContainer() {
316		if nw.current.FirstChild != nil {
317			nw.current = nw.current.FirstChild
318			nw.entering = true
319		} else {
320			nw.entering = false
321		}
322	} else if nw.current.Next == nil {
323		nw.current = nw.current.Parent
324		nw.entering = false
325	} else {
326		nw.current = nw.current.Next
327		nw.entering = true
328	}
329}
330
331func dump(ast *Node) {
332	fmt.Println(dumpString(ast))
333}
334
335func dumpR(ast *Node, depth int) string {
336	if ast == nil {
337		return ""
338	}
339	indent := bytes.Repeat([]byte("\t"), depth)
340	content := ast.Literal
341	if content == nil {
342		content = ast.content
343	}
344	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
345	for n := ast.FirstChild; n != nil; n = n.Next {
346		result += dumpR(n, depth+1)
347	}
348	return result
349}
350
351func dumpString(ast *Node) string {
352	return dumpR(ast, 0)
353}