all repos — grayfriday @ 3d1baecb3d2a8f7db23de78dfd69d2408a6494ac

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Header
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Header:         "Header",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List and Item node type.
 73type ListData struct {
 74	ListFlags       ListType
 75	Tight           bool   // Skip <p>s around list item data if true
 76	BulletChar      byte   // '*', '+' or '-' in bullet lists
 77	Delimiter       byte   // '.' or ')' after the number in ordered lists
 78	RefLink         []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79	IsFootnotesList bool   // This is a list of footnotes
 80}
 81
 82// LinkData contains fields relevant to a Link node type.
 83type LinkData struct {
 84	Destination []byte // Destination is what goes into a href
 85	Title       []byte // Title is the tooltip thing that goes in a title attribute
 86	NoteID      int    // NoteID contains a serial number of a footnote, zero if it's not a footnote
 87	Footnote    *Node  // If it's a footnote, this is a direct link to the footnote Node. Otherwise nil.
 88}
 89
 90// CodeBlockData contains fields relevant to a CodeBlock node type.
 91type CodeBlockData struct {
 92	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 93	Info        []byte // This holds the info string
 94	FenceChar   byte
 95	FenceLength int
 96	FenceOffset int
 97}
 98
 99// TableCellData contains fields relevant to a TableCell node type.
100type TableCellData struct {
101	IsHeader bool           // This tells if it's under the header row
102	Align    CellAlignFlags // This holds the value for align attribute
103}
104
105// HeaderData contains fields relevant to a Header node type.
106type HeaderData struct {
107	Level        int    // This holds the heading level number
108	HeaderID     string // This might hold header ID, if present
109	IsTitleblock bool   // Specifies whether it's a title block
110}
111
112// Node is a single element in the abstract syntax tree of the parsed document.
113// It holds connections to the structurally neighboring nodes and, for certain
114// types of nodes, additional information that might be needed when rendering.
115type Node struct {
116	Type       NodeType // Determines the type of the node
117	Parent     *Node    // Points to the parent
118	FirstChild *Node    // Points to the first child, if any
119	LastChild  *Node    // Points to the last child, if any
120	Prev       *Node    // Previous sibling; nil if it's the first child
121	Next       *Node    // Next sibling; nil if it's the last child
122
123	Literal []byte // Text contents of the leaf nodes
124
125	HeaderData    // Populated if Type is Header
126	ListData      // Populated if Type is List
127	CodeBlockData // Populated if Type is CodeBlock
128	LinkData      // Populated if Type is Link
129	TableCellData // Populated if Type is TableCell
130
131	content []byte // Markdown content of the block nodes
132	open    bool   // Specifies an open block node that has not been finished to process yet
133}
134
135// NewNode allocates a node of a specified type.
136func NewNode(typ NodeType) *Node {
137	return &Node{
138		Type: typ,
139		open: true,
140	}
141}
142
143func (n *Node) String() string {
144	ellipsis := ""
145	snippet := n.Literal
146	if len(snippet) > 16 {
147		snippet = snippet[:16]
148		ellipsis = "..."
149	}
150	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
151}
152
153// Unlink removes node 'n' from the tree.
154// It panics if the node is nil.
155func (n *Node) Unlink() {
156	if n.Prev != nil {
157		n.Prev.Next = n.Next
158	} else if n.Parent != nil {
159		n.Parent.FirstChild = n.Next
160	}
161	if n.Next != nil {
162		n.Next.Prev = n.Prev
163	} else if n.Parent != nil {
164		n.Parent.LastChild = n.Prev
165	}
166	n.Parent = nil
167	n.Next = nil
168	n.Prev = nil
169}
170
171// AppendChild adds a node 'child' as a child of 'n'.
172// It panics if either node is nil.
173func (n *Node) AppendChild(child *Node) {
174	child.Unlink()
175	child.Parent = n
176	if n.LastChild != nil {
177		n.LastChild.Next = child
178		child.Prev = n.LastChild
179		n.LastChild = child
180	} else {
181		n.FirstChild = child
182		n.LastChild = child
183	}
184}
185
186// InsertBefore inserts 'sibling' immediately before 'n'.
187// It panics if either node is nil.
188func (n *Node) InsertBefore(sibling *Node) {
189	sibling.Unlink()
190	sibling.Prev = n.Prev
191	if sibling.Prev != nil {
192		sibling.Prev.Next = sibling
193	}
194	sibling.Next = n
195	n.Prev = sibling
196	sibling.Parent = n.Parent
197	if sibling.Prev == nil {
198		sibling.Parent.FirstChild = sibling
199	}
200}
201
202func (n *Node) isContainer() bool {
203	switch n.Type {
204	case Document:
205		fallthrough
206	case BlockQuote:
207		fallthrough
208	case List:
209		fallthrough
210	case Item:
211		fallthrough
212	case Paragraph:
213		fallthrough
214	case Header:
215		fallthrough
216	case Emph:
217		fallthrough
218	case Strong:
219		fallthrough
220	case Del:
221		fallthrough
222	case Link:
223		fallthrough
224	case Image:
225		fallthrough
226	case Table:
227		fallthrough
228	case TableHead:
229		fallthrough
230	case TableBody:
231		fallthrough
232	case TableRow:
233		fallthrough
234	case TableCell:
235		return true
236	default:
237		return false
238	}
239}
240
241func (n *Node) canContain(t NodeType) bool {
242	if n.Type == List {
243		return t == Item
244	}
245	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
246		return t != Item
247	}
248	if n.Type == Table {
249		return t == TableHead || t == TableBody
250	}
251	if n.Type == TableHead || n.Type == TableBody {
252		return t == TableRow
253	}
254	if n.Type == TableRow {
255		return t == TableCell
256	}
257	return false
258}
259
260// WalkStatus allows NodeVisitor to have some control over the tree traversal.
261// It is returned from NodeVisitor and different values allow Node.Walk to
262// decide which node to go to next.
263type WalkStatus int
264
265const (
266	// GoToNext is the default traversal of every node.
267	GoToNext WalkStatus = iota
268	// SkipChildren tells walker to skip all children of current node.
269	SkipChildren
270	// Terminate tells walker to terminate the traversal.
271	Terminate
272)
273
274// NodeVisitor is a callback to be called when traversing the syntax tree.
275// Called twice for every node: once with entering=true when the branch is
276// first visited, then with entering=false after all the children are done.
277type NodeVisitor func(node *Node, entering bool) WalkStatus
278
279// Walk is a convenience method that instantiates a walker and starts a
280// traversal of subtree rooted at n.
281func (n *Node) Walk(visitor NodeVisitor) {
282	w := newNodeWalker(n)
283	for w.current != nil {
284		status := visitor(w.current, w.entering)
285		switch status {
286		case GoToNext:
287			w.next()
288		case SkipChildren:
289			w.entering = false
290			w.next()
291		case Terminate:
292			return
293		}
294	}
295}
296
297type nodeWalker struct {
298	current  *Node
299	root     *Node
300	entering bool
301}
302
303func newNodeWalker(root *Node) *nodeWalker {
304	return &nodeWalker{
305		current:  root,
306		root:     root,
307		entering: true,
308	}
309}
310
311func (nw *nodeWalker) next() {
312	if (!nw.current.isContainer() || !nw.entering) && nw.current == nw.root {
313		nw.current = nil
314		return
315	}
316	if nw.entering && nw.current.isContainer() {
317		if nw.current.FirstChild != nil {
318			nw.current = nw.current.FirstChild
319			nw.entering = true
320		} else {
321			nw.entering = false
322		}
323	} else if nw.current.Next == nil {
324		nw.current = nw.current.Parent
325		nw.entering = false
326	} else {
327		nw.current = nw.current.Next
328		nw.entering = true
329	}
330}
331
332func dump(ast *Node) {
333	fmt.Println(dumpString(ast))
334}
335
336func dumpR(ast *Node, depth int) string {
337	if ast == nil {
338		return ""
339	}
340	indent := bytes.Repeat([]byte("\t"), depth)
341	content := ast.Literal
342	if content == nil {
343		content = ast.content
344	}
345	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
346	for n := ast.FirstChild; n != nil; n = n.Next {
347		result += dumpR(n, depth+1)
348	}
349	return result
350}
351
352func dumpString(ast *Node) string {
353	return dumpR(ast, 0)
354}