all repos — grayfriday @ 8ca36f24b78b58fd3e812987cfd45975fd9ed1af

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Heading
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Heading:        "Heading",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List and Item node type.
 73type ListData struct {
 74	ListFlags       ListType
 75	Tight           bool   // Skip <p>s around list item data if true
 76	BulletChar      byte   // '*', '+' or '-' in bullet lists
 77	Delimiter       byte   // '.' or ')' after the number in ordered lists
 78	RefLink         []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79	IsFootnotesList bool   // This is a list of footnotes
 80}
 81
 82// LinkData contains fields relevant to a Link node type.
 83type LinkData struct {
 84	Destination []byte // Destination is what goes into a href
 85	Title       []byte // Title is the tooltip thing that goes in a title attribute
 86	NoteID      int    // NoteID contains a serial number of a footnote, zero if it's not a footnote
 87	Width       int    // Width attribute for an img
 88	Height      int    // Height attribute for an img
 89	Footnote    *Node  // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
 90}
 91
 92// CodeBlockData contains fields relevant to a CodeBlock node type.
 93type CodeBlockData struct {
 94	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 95	Info        []byte // This holds the info string
 96	FenceChar   byte
 97	FenceLength int
 98	FenceOffset int
 99}
100
101// TableCellData contains fields relevant to a TableCell node type.
102type TableCellData struct {
103	IsHeader bool           // This tells if it's under the header row
104	Align    CellAlignFlags // This holds the value for align attribute
105}
106
107// HeadingData contains fields relevant to a Heading node type.
108type HeadingData struct {
109	Level        int    // This holds the heading level number
110	HeadingID    string // This might hold heading ID, if present
111	IsTitleblock bool   // Specifies whether it's a title block
112}
113
114// Node is a single element in the abstract syntax tree of the parsed document.
115// It holds connections to the structurally neighboring nodes and, for certain
116// types of nodes, additional information that might be needed when rendering.
117type Node struct {
118	Type       NodeType // Determines the type of the node
119	Parent     *Node    // Points to the parent
120	FirstChild *Node    // Points to the first child, if any
121	LastChild  *Node    // Points to the last child, if any
122	Prev       *Node    // Previous sibling; nil if it's the first child
123	Next       *Node    // Next sibling; nil if it's the last child
124
125	Literal []byte // Text contents of the leaf nodes
126
127	HeadingData   // Populated if Type is Heading
128	ListData      // Populated if Type is List
129	CodeBlockData // Populated if Type is CodeBlock
130	LinkData      // Populated if Type is Link
131	TableCellData // Populated if Type is TableCell
132
133	content []byte // Markdown content of the block nodes
134	open    bool   // Specifies an open block node that has not been finished to process yet
135}
136
137// NewNode allocates a node of a specified type.
138func NewNode(typ NodeType) *Node {
139	return &Node{
140		Type: typ,
141		open: true,
142	}
143}
144
145func (n *Node) String() string {
146	ellipsis := ""
147	snippet := n.Literal
148	if len(snippet) > 16 {
149		snippet = snippet[:16]
150		ellipsis = "..."
151	}
152	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
153}
154
155// Unlink removes node 'n' from the tree.
156// It panics if the node is nil.
157func (n *Node) Unlink() {
158	if n.Prev != nil {
159		n.Prev.Next = n.Next
160	} else if n.Parent != nil {
161		n.Parent.FirstChild = n.Next
162	}
163	if n.Next != nil {
164		n.Next.Prev = n.Prev
165	} else if n.Parent != nil {
166		n.Parent.LastChild = n.Prev
167	}
168	n.Parent = nil
169	n.Next = nil
170	n.Prev = nil
171}
172
173// AppendChild adds a node 'child' as a child of 'n'.
174// It panics if either node is nil.
175func (n *Node) AppendChild(child *Node) {
176	child.Unlink()
177	child.Parent = n
178	if n.LastChild != nil {
179		n.LastChild.Next = child
180		child.Prev = n.LastChild
181		n.LastChild = child
182	} else {
183		n.FirstChild = child
184		n.LastChild = child
185	}
186}
187
188// InsertBefore inserts 'sibling' immediately before 'n'.
189// It panics if either node is nil.
190func (n *Node) InsertBefore(sibling *Node) {
191	sibling.Unlink()
192	sibling.Prev = n.Prev
193	if sibling.Prev != nil {
194		sibling.Prev.Next = sibling
195	}
196	sibling.Next = n
197	n.Prev = sibling
198	sibling.Parent = n.Parent
199	if sibling.Prev == nil {
200		sibling.Parent.FirstChild = sibling
201	}
202}
203
204// IsContainer returns true if 'n' can contain children.
205func (n *Node) IsContainer() bool {
206	switch n.Type {
207	case Document:
208		fallthrough
209	case BlockQuote:
210		fallthrough
211	case List:
212		fallthrough
213	case Item:
214		fallthrough
215	case Paragraph:
216		fallthrough
217	case Heading:
218		fallthrough
219	case Emph:
220		fallthrough
221	case Strong:
222		fallthrough
223	case Del:
224		fallthrough
225	case Link:
226		fallthrough
227	case Image:
228		fallthrough
229	case Table:
230		fallthrough
231	case TableHead:
232		fallthrough
233	case TableBody:
234		fallthrough
235	case TableRow:
236		fallthrough
237	case TableCell:
238		return true
239	default:
240		return false
241	}
242}
243
244// IsLeaf returns true if 'n' is a leaf node.
245func (n *Node) IsLeaf() bool {
246	return !n.IsContainer()
247}
248
249func (n *Node) canContain(t NodeType) bool {
250	if n.Type == List {
251		return t == Item
252	}
253	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
254		return t != Item
255	}
256	if n.Type == Table {
257		return t == TableHead || t == TableBody
258	}
259	if n.Type == TableHead || n.Type == TableBody {
260		return t == TableRow
261	}
262	if n.Type == TableRow {
263		return t == TableCell
264	}
265	return false
266}
267
268// WalkStatus allows NodeVisitor to have some control over the tree traversal.
269// It is returned from NodeVisitor and different values allow Node.Walk to
270// decide which node to go to next.
271type WalkStatus int
272
273const (
274	// GoToNext is the default traversal of every node.
275	GoToNext WalkStatus = iota
276	// SkipChildren tells walker to skip all children of current node.
277	SkipChildren
278	// Terminate tells walker to terminate the traversal.
279	Terminate
280)
281
282// NodeVisitor is a callback to be called when traversing the syntax tree.
283// Called twice for every node: once with entering=true when the branch is
284// first visited, then with entering=false after all the children are done.
285type NodeVisitor func(node *Node, entering bool) WalkStatus
286
287// Walk is a convenience method that instantiates a walker and starts a
288// traversal of subtree rooted at n.
289func (n *Node) Walk(visitor NodeVisitor) {
290	w := newNodeWalker(n)
291	for w.current != nil {
292		status := visitor(w.current, w.entering)
293		switch status {
294		case GoToNext:
295			w.next()
296		case SkipChildren:
297			w.entering = false
298			w.next()
299		case Terminate:
300			return
301		}
302	}
303}
304
305type nodeWalker struct {
306	current  *Node
307	root     *Node
308	entering bool
309}
310
311func newNodeWalker(root *Node) *nodeWalker {
312	return &nodeWalker{
313		current:  root,
314		root:     root,
315		entering: true,
316	}
317}
318
319func (nw *nodeWalker) next() {
320	if (!nw.current.IsContainer() || !nw.entering) && nw.current == nw.root {
321		nw.current = nil
322		return
323	}
324	if nw.entering && nw.current.IsContainer() {
325		if nw.current.FirstChild != nil {
326			nw.current = nw.current.FirstChild
327			nw.entering = true
328		} else {
329			nw.entering = false
330		}
331	} else if nw.current.Next == nil {
332		nw.current = nw.current.Parent
333		nw.entering = false
334	} else {
335		nw.current = nw.current.Next
336		nw.entering = true
337	}
338}
339
340func dump(ast *Node) {
341	fmt.Println(dumpString(ast))
342}
343
344func dumpR(ast *Node, depth int) string {
345	if ast == nil {
346		return ""
347	}
348	indent := bytes.Repeat([]byte("\t"), depth)
349	content := ast.Literal
350	if content == nil {
351		content = ast.content
352	}
353	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
354	for n := ast.FirstChild; n != nil; n = n.Next {
355		result += dumpR(n, depth+1)
356	}
357	return result
358}
359
360func dumpString(ast *Node) string {
361	return dumpR(ast, 0)
362}