all repos — grayfriday @ df341a4ed85b3aa2ee8cd16874520f2f789e4568

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Heading
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Heading:        "Heading",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List and Item node type.
 73type ListData struct {
 74	ListFlags       ListType
 75	Tight           bool   // Skip <p>s around list item data if true
 76	BulletChar      byte   // '*', '+' or '-' in bullet lists
 77	Delimiter       byte   // '.' or ')' after the number in ordered lists
 78	RefLink         []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79	IsFootnotesList bool   // This is a list of footnotes
 80}
 81
 82// LinkData contains fields relevant to a Link node type.
 83type LinkData struct {
 84	Destination []byte // Destination is what goes into a href
 85	Title       []byte // Title is the tooltip thing that goes in a title attribute
 86	NoteID      int    // NoteID contains a serial number of a footnote, zero if it's not a footnote
 87	Footnote    *Node  // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
 88}
 89
 90// CodeBlockData contains fields relevant to a CodeBlock node type.
 91type CodeBlockData struct {
 92	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 93	Info        []byte // This holds the info string
 94	FenceChar   byte
 95	FenceLength int
 96	FenceOffset int
 97}
 98
 99// TableCellData contains fields relevant to a TableCell node type.
100type TableCellData struct {
101	IsHeader bool           // This tells if it's under the header row
102	Align    CellAlignFlags // This holds the value for align attribute
103}
104
105// HeadingData contains fields relevant to a Heading node type.
106type HeadingData struct {
107	Level        int    // This holds the heading level number
108	HeadingID    string // This might hold heading ID, if present
109	IsTitleblock bool   // Specifies whether it's a title block
110}
111
112// Node is a single element in the abstract syntax tree of the parsed document.
113// It holds connections to the structurally neighboring nodes and, for certain
114// types of nodes, additional information that might be needed when rendering.
115type Node struct {
116	Type       NodeType // Determines the type of the node
117	Parent     *Node    // Points to the parent
118	FirstChild *Node    // Points to the first child, if any
119	LastChild  *Node    // Points to the last child, if any
120	Prev       *Node    // Previous sibling; nil if it's the first child
121	Next       *Node    // Next sibling; nil if it's the last child
122
123	Literal []byte // Text contents of the leaf nodes
124
125	HeadingData   // Populated if Type is Heading
126	ListData      // Populated if Type is List
127	CodeBlockData // Populated if Type is CodeBlock
128	LinkData      // Populated if Type is Link
129	TableCellData // Populated if Type is TableCell
130
131	content []byte // Markdown content of the block nodes
132	open    bool   // Specifies an open block node that has not been finished to process yet
133}
134
135// NewNode allocates a node of a specified type.
136func NewNode(typ NodeType) *Node {
137	return &Node{
138		Type: typ,
139		open: true,
140	}
141}
142
143func (n *Node) String() string {
144	ellipsis := ""
145	snippet := n.Literal
146	if len(snippet) > 16 {
147		snippet = snippet[:16]
148		ellipsis = "..."
149	}
150	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
151}
152
153// Unlink removes node 'n' from the tree.
154// It panics if the node is nil.
155func (n *Node) Unlink() {
156	if n.Prev != nil {
157		n.Prev.Next = n.Next
158	} else if n.Parent != nil {
159		n.Parent.FirstChild = n.Next
160	}
161	if n.Next != nil {
162		n.Next.Prev = n.Prev
163	} else if n.Parent != nil {
164		n.Parent.LastChild = n.Prev
165	}
166	n.Parent = nil
167	n.Next = nil
168	n.Prev = nil
169}
170
171// AppendChild adds a node 'child' as a child of 'n'.
172// It panics if either node is nil.
173func (n *Node) AppendChild(child *Node) {
174	child.Unlink()
175	child.Parent = n
176	if n.LastChild != nil {
177		n.LastChild.Next = child
178		child.Prev = n.LastChild
179		n.LastChild = child
180	} else {
181		n.FirstChild = child
182		n.LastChild = child
183	}
184}
185
186// InsertBefore inserts 'sibling' immediately before 'n'.
187// It panics if either node is nil.
188func (n *Node) InsertBefore(sibling *Node) {
189	sibling.Unlink()
190	sibling.Prev = n.Prev
191	if sibling.Prev != nil {
192		sibling.Prev.Next = sibling
193	}
194	sibling.Next = n
195	n.Prev = sibling
196	sibling.Parent = n.Parent
197	if sibling.Prev == nil {
198		sibling.Parent.FirstChild = sibling
199	}
200}
201
202// IsContainer returns true if 'n' can contain children.
203func (n *Node) IsContainer() bool {
204	switch n.Type {
205	case Document:
206		fallthrough
207	case BlockQuote:
208		fallthrough
209	case List:
210		fallthrough
211	case Item:
212		fallthrough
213	case Paragraph:
214		fallthrough
215	case Heading:
216		fallthrough
217	case Emph:
218		fallthrough
219	case Strong:
220		fallthrough
221	case Del:
222		fallthrough
223	case Link:
224		fallthrough
225	case Image:
226		fallthrough
227	case Table:
228		fallthrough
229	case TableHead:
230		fallthrough
231	case TableBody:
232		fallthrough
233	case TableRow:
234		fallthrough
235	case TableCell:
236		return true
237	default:
238		return false
239	}
240}
241
242// IsLeaf returns true if 'n' is a leaf node.
243func (n *Node) IsLeaf() bool {
244	return !n.IsContainer()
245}
246
247func (n *Node) canContain(t NodeType) bool {
248	if n.Type == List {
249		return t == Item
250	}
251	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
252		return t != Item
253	}
254	if n.Type == Table {
255		return t == TableHead || t == TableBody
256	}
257	if n.Type == TableHead || n.Type == TableBody {
258		return t == TableRow
259	}
260	if n.Type == TableRow {
261		return t == TableCell
262	}
263	return false
264}
265
266// WalkStatus allows NodeVisitor to have some control over the tree traversal.
267// It is returned from NodeVisitor and different values allow Node.Walk to
268// decide which node to go to next.
269type WalkStatus int
270
271const (
272	// GoToNext is the default traversal of every node.
273	GoToNext WalkStatus = iota
274	// SkipChildren tells walker to skip all children of current node.
275	SkipChildren
276	// Terminate tells walker to terminate the traversal.
277	Terminate
278)
279
280// NodeVisitor is a callback to be called when traversing the syntax tree.
281// Called twice for every node: once with entering=true when the branch is
282// first visited, then with entering=false after all the children are done.
283type NodeVisitor func(node *Node, entering bool) WalkStatus
284
285// Walk is a convenience method that instantiates a walker and starts a
286// traversal of subtree rooted at n.
287func (n *Node) Walk(visitor NodeVisitor) {
288	w := newNodeWalker(n)
289	for w.current != nil {
290		status := visitor(w.current, w.entering)
291		switch status {
292		case GoToNext:
293			w.next()
294		case SkipChildren:
295			w.entering = false
296			w.next()
297		case Terminate:
298			return
299		}
300	}
301}
302
303type nodeWalker struct {
304	current  *Node
305	root     *Node
306	entering bool
307}
308
309func newNodeWalker(root *Node) *nodeWalker {
310	return &nodeWalker{
311		current:  root,
312		root:     root,
313		entering: true,
314	}
315}
316
317func (nw *nodeWalker) next() {
318	if (!nw.current.IsContainer() || !nw.entering) && nw.current == nw.root {
319		nw.current = nil
320		return
321	}
322	if nw.entering && nw.current.IsContainer() {
323		if nw.current.FirstChild != nil {
324			nw.current = nw.current.FirstChild
325			nw.entering = true
326		} else {
327			nw.entering = false
328		}
329	} else if nw.current.Next == nil {
330		nw.current = nw.current.Parent
331		nw.entering = false
332	} else {
333		nw.current = nw.current.Next
334		nw.entering = true
335	}
336}
337
338func dump(ast *Node) {
339	fmt.Println(dumpString(ast))
340}
341
342func dumpR(ast *Node, depth int) string {
343	if ast == nil {
344		return ""
345	}
346	indent := bytes.Repeat([]byte("\t"), depth)
347	content := ast.Literal
348	if content == nil {
349		content = ast.content
350	}
351	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
352	for n := ast.FirstChild; n != nil; n = n.Next {
353		result += dumpR(n, depth+1)
354	}
355	return result
356}
357
358func dumpString(ast *Node) string {
359	return dumpR(ast, 0)
360}