all repos — grayfriday @ a5270b6f564501a64972e7d7fb907e9937102c35

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Header
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Header:         "Header",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List node type.
 73type ListData struct {
 74	ListFlags  ListType
 75	Tight      bool   // Skip <p>s around list item data if true
 76	BulletChar byte   // '*', '+' or '-' in bullet lists
 77	Delimiter  byte   // '.' or ')' after the number in ordered lists
 78	RefLink    []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79}
 80
 81// LinkData contains fields relevant to a Link node type.
 82type LinkData struct {
 83	Destination []byte
 84	Title       []byte
 85	NoteID      int
 86}
 87
 88// CodeBlockData contains fields relevant to a CodeBlock node type.
 89type CodeBlockData struct {
 90	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 91	Info        []byte // This holds the info string
 92	FenceChar   byte
 93	FenceLength int
 94	FenceOffset int
 95}
 96
 97// TableCellData contains fields relevant to a TableCell node type.
 98type TableCellData struct {
 99	IsHeader bool           // This tells if it's under the header row
100	Align    CellAlignFlags // This holds the value for align attribute
101}
102
103// HeaderData contains fields relevant to a Header node type.
104type HeaderData struct {
105	Level        int    // This holds the heading level number
106	HeaderID     string // This might hold header ID, if present
107	IsTitleblock bool   // Specifies whether it's a title block
108}
109
110// Node is a single element in the abstract syntax tree of the parsed document.
111// It holds connections to the structurally neighboring nodes and, for certain
112// types of nodes, additional information that might be needed when rendering.
113type Node struct {
114	Type       NodeType // Determines the type of the node
115	Parent     *Node    // Points to the parent
116	FirstChild *Node    // Points to the first child, if any
117	LastChild  *Node    // Points to the last child, if any
118	Prev       *Node    // Previous sibling; nil if it's the first child
119	Next       *Node    // Next sibling; nil if it's the last child
120
121	Literal []byte // Text contents of the leaf nodes
122
123	HeaderData    // Populated if Type is Header
124	ListData      // Populated if Type is List
125	CodeBlockData // Populated if Type is CodeBlock
126	LinkData      // Populated if Type is Link
127	TableCellData // Populated if Type is TableCell
128
129	content []byte // Markdown content of the block nodes
130	open    bool   // Specifies an open block node that has not been finished to process yet
131}
132
133// NewNode allocates a node of a specified type.
134func NewNode(typ NodeType) *Node {
135	return &Node{
136		Type: typ,
137		open: true,
138	}
139}
140
141func (n *Node) String() string {
142	ellipsis := ""
143	snippet := n.Literal
144	if len(snippet) > 16 {
145		snippet = snippet[:16]
146		ellipsis = "..."
147	}
148	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
149}
150
151func (n *Node) unlink() {
152	if n.Prev != nil {
153		n.Prev.Next = n.Next
154	} else if n.Parent != nil {
155		n.Parent.FirstChild = n.Next
156	}
157	if n.Next != nil {
158		n.Next.Prev = n.Prev
159	} else if n.Parent != nil {
160		n.Parent.LastChild = n.Prev
161	}
162	n.Parent = nil
163	n.Next = nil
164	n.Prev = nil
165}
166
167func (n *Node) appendChild(child *Node) {
168	child.unlink()
169	child.Parent = n
170	if n.LastChild != nil {
171		n.LastChild.Next = child
172		child.Prev = n.LastChild
173		n.LastChild = child
174	} else {
175		n.FirstChild = child
176		n.LastChild = child
177	}
178}
179
180func (n *Node) insertBefore(sibling *Node) {
181	sibling.unlink()
182	sibling.Prev = n.Prev
183	if sibling.Prev != nil {
184		sibling.Prev.Next = sibling
185	}
186	sibling.Next = n
187	n.Prev = sibling
188	sibling.Parent = n.Parent
189	if sibling.Prev == nil {
190		sibling.Parent.FirstChild = sibling
191	}
192}
193
194func (n *Node) isContainer() bool {
195	switch n.Type {
196	case Document:
197		fallthrough
198	case BlockQuote:
199		fallthrough
200	case List:
201		fallthrough
202	case Item:
203		fallthrough
204	case Paragraph:
205		fallthrough
206	case Header:
207		fallthrough
208	case Emph:
209		fallthrough
210	case Strong:
211		fallthrough
212	case Del:
213		fallthrough
214	case Link:
215		fallthrough
216	case Image:
217		fallthrough
218	case Table:
219		fallthrough
220	case TableHead:
221		fallthrough
222	case TableBody:
223		fallthrough
224	case TableRow:
225		fallthrough
226	case TableCell:
227		return true
228	default:
229		return false
230	}
231}
232
233func (n *Node) canContain(t NodeType) bool {
234	if n.Type == List {
235		return t == Item
236	}
237	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
238		return t != Item
239	}
240	if n.Type == Table {
241		return t == TableHead || t == TableBody
242	}
243	if n.Type == TableHead || n.Type == TableBody {
244		return t == TableRow
245	}
246	if n.Type == TableRow {
247		return t == TableCell
248	}
249	return false
250}
251
252// WalkStatus allows NodeVisitor to have some control over the tree traversal.
253// It is returned from NodeVisitor and different values allow Node.Walk to
254// decide which node to go to next.
255type WalkStatus int
256
257const (
258	// GoToNext is the default traversal of every node.
259	GoToNext WalkStatus = iota
260	// SkipChildren tells walker to skip all children of current node.
261	SkipChildren
262	// Terminate tells walker to terminate the traversal.
263	Terminate
264)
265
266// NodeVisitor is a callback to be called when traversing the syntax tree.
267// Called twice for every node: once with entering=true when the branch is
268// first visited, then with entering=false after all the children are done.
269type NodeVisitor func(node *Node, entering bool) WalkStatus
270
271// Walk is a convenience method that instantiates a walker and starts a
272// traversal of subtree rooted at n.
273func (n *Node) Walk(visitor NodeVisitor) {
274	walker := newNodeWalker(n)
275	node, entering := walker.next()
276	for node != nil {
277		status := visitor(node, entering)
278		switch status {
279		case GoToNext:
280			node, entering = walker.next()
281		case SkipChildren:
282			node, entering = walker.resumeAt(node, false)
283		case Terminate:
284			return
285		}
286	}
287}
288
289type nodeWalker struct {
290	current  *Node
291	root     *Node
292	entering bool
293}
294
295func newNodeWalker(root *Node) *nodeWalker {
296	return &nodeWalker{
297		current:  root,
298		root:     nil,
299		entering: true,
300	}
301}
302
303func (nw *nodeWalker) next() (*Node, bool) {
304	if nw.current == nil {
305		return nil, false
306	}
307	if nw.root == nil {
308		nw.root = nw.current
309		return nw.current, nw.entering
310	}
311	if nw.entering && nw.current.isContainer() {
312		if nw.current.FirstChild != nil {
313			nw.current = nw.current.FirstChild
314			nw.entering = true
315		} else {
316			nw.entering = false
317		}
318	} else if nw.current.Next == nil {
319		nw.current = nw.current.Parent
320		nw.entering = false
321	} else {
322		nw.current = nw.current.Next
323		nw.entering = true
324	}
325	if nw.current == nw.root {
326		return nil, false
327	}
328	return nw.current, nw.entering
329}
330
331func (nw *nodeWalker) resumeAt(node *Node, entering bool) (*Node, bool) {
332	nw.current = node
333	nw.entering = entering
334	return nw.next()
335}
336
337func dump(ast *Node) {
338	fmt.Println(dumpString(ast))
339}
340
341func dumpR(ast *Node, depth int) string {
342	if ast == nil {
343		return ""
344	}
345	indent := bytes.Repeat([]byte("\t"), depth)
346	content := ast.Literal
347	if content == nil {
348		content = ast.content
349	}
350	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
351	for n := ast.FirstChild; n != nil; n = n.Next {
352		result += dumpR(n, depth+1)
353	}
354	return result
355}
356
357func dumpString(ast *Node) string {
358	return dumpR(ast, 0)
359}