all repos — grayfriday @ efa77da18b0fa3413ebafb99ef6afad578ad6593

blackfriday fork with a few changes

node.go (view raw)

  1package blackfriday
  2
  3import (
  4	"bytes"
  5	"fmt"
  6)
  7
  8// NodeType specifies a type of a single node of a syntax tree. Usually one
  9// node (and its type) corresponds to a single markdown feature, e.g. emphasis
 10// or code block.
 11type NodeType int
 12
 13// Constants for identifying different types of nodes. See NodeType.
 14const (
 15	Document NodeType = iota
 16	BlockQuote
 17	List
 18	Item
 19	Paragraph
 20	Header
 21	HorizontalRule
 22	Emph
 23	Strong
 24	Del
 25	Link
 26	Image
 27	Text
 28	HTMLBlock
 29	CodeBlock
 30	Softbreak
 31	Hardbreak
 32	Code
 33	HTMLSpan
 34	Table
 35	TableCell
 36	TableHead
 37	TableBody
 38	TableRow
 39)
 40
 41var nodeTypeNames = []string{
 42	Document:       "Document",
 43	BlockQuote:     "BlockQuote",
 44	List:           "List",
 45	Item:           "Item",
 46	Paragraph:      "Paragraph",
 47	Header:         "Header",
 48	HorizontalRule: "HorizontalRule",
 49	Emph:           "Emph",
 50	Strong:         "Strong",
 51	Del:            "Del",
 52	Link:           "Link",
 53	Image:          "Image",
 54	Text:           "Text",
 55	HTMLBlock:      "HTMLBlock",
 56	CodeBlock:      "CodeBlock",
 57	Softbreak:      "Softbreak",
 58	Hardbreak:      "Hardbreak",
 59	Code:           "Code",
 60	HTMLSpan:       "HTMLSpan",
 61	Table:          "Table",
 62	TableCell:      "TableCell",
 63	TableHead:      "TableHead",
 64	TableBody:      "TableBody",
 65	TableRow:       "TableRow",
 66}
 67
 68func (t NodeType) String() string {
 69	return nodeTypeNames[t]
 70}
 71
 72// ListData contains fields relevant to a List node type.
 73type ListData struct {
 74	ListFlags  ListType
 75	Tight      bool   // Skip <p>s around list item data if true
 76	BulletChar byte   // '*', '+' or '-' in bullet lists
 77	Delimiter  byte   // '.' or ')' after the number in ordered lists
 78	RefLink    []byte // If not nil, turns this list item into a footnote item and triggers different rendering
 79}
 80
 81// LinkData contains fields relevant to a Link node type.
 82type LinkData struct {
 83	Destination []byte
 84	Title       []byte
 85	NoteID      int
 86}
 87
 88// CodeBlockData contains fields relevant to a CodeBlock node type.
 89type CodeBlockData struct {
 90	IsFenced    bool   // Specifies whether it's a fenced code block or an indented one
 91	Info        []byte // This holds the info string
 92	FenceChar   byte
 93	FenceLength int
 94	FenceOffset int
 95}
 96
 97// TableCellData contains fields relevant to a TableCell node type.
 98type TableCellData struct {
 99	IsHeader bool           // This tells if it's under the header row
100	Align    CellAlignFlags // This holds the value for align attribute
101}
102
103// HeaderData contains fields relevant to a Header node type.
104type HeaderData struct {
105	Level        int    // This holds the heading level number
106	HeaderID     string // This might hold header ID, if present
107	IsTitleblock bool   // Specifies whether it's a title block
108}
109
110// Node is a single element in the abstract syntax tree of the parsed document.
111// It holds connections to the structurally neighboring nodes and, for certain
112// types of nodes, additional information that might be needed when rendering.
113type Node struct {
114	Type       NodeType // Determines the type of the node
115	Parent     *Node    // Points to the parent
116	FirstChild *Node    // Points to the first child, if any
117	LastChild  *Node    // Points to the last child, if any
118	Prev       *Node    // Previous sibling; nil if it's the first child
119	Next       *Node    // Next sibling; nil if it's the last child
120
121	Literal []byte // Text contents of the leaf nodes
122
123	HeaderData    // Populated if Type is Header
124	ListData      // Populated if Type is List
125	CodeBlockData // Populated if Type is CodeBlock
126	LinkData      // Populated if Type is Link
127	TableCellData // Populated if Type is TableCell
128
129	content []byte // Markdown content of the block nodes
130	open    bool   // Specifies an open block node that has not been finished to process yet
131}
132
133// NewNode allocates a node of a specified type.
134func NewNode(typ NodeType) *Node {
135	return &Node{
136		Type: typ,
137		open: true,
138	}
139}
140
141func (n *Node) String() string {
142	ellipsis := ""
143	snippet := n.Literal
144	if len(snippet) > 16 {
145		snippet = snippet[:16]
146		ellipsis = "..."
147	}
148	return fmt.Sprintf("%s: '%s%s'", n.Type, snippet, ellipsis)
149}
150
151// Unlink removes node 'n' from the tree.
152// It panics if the node is nil.
153func (n *Node) Unlink() {
154	if n.Prev != nil {
155		n.Prev.Next = n.Next
156	} else if n.Parent != nil {
157		n.Parent.FirstChild = n.Next
158	}
159	if n.Next != nil {
160		n.Next.Prev = n.Prev
161	} else if n.Parent != nil {
162		n.Parent.LastChild = n.Prev
163	}
164	n.Parent = nil
165	n.Next = nil
166	n.Prev = nil
167}
168
169// AppendChild adds a node 'child' as a child of 'n'.
170// It panics if either node is nil.
171func (n *Node) AppendChild(child *Node) {
172	child.Unlink()
173	child.Parent = n
174	if n.LastChild != nil {
175		n.LastChild.Next = child
176		child.Prev = n.LastChild
177		n.LastChild = child
178	} else {
179		n.FirstChild = child
180		n.LastChild = child
181	}
182}
183
184// InsertBefore inserts 'sibling' immediately before 'n'.
185// It panics if either node is nil.
186func (n *Node) InsertBefore(sibling *Node) {
187	sibling.Unlink()
188	sibling.Prev = n.Prev
189	if sibling.Prev != nil {
190		sibling.Prev.Next = sibling
191	}
192	sibling.Next = n
193	n.Prev = sibling
194	sibling.Parent = n.Parent
195	if sibling.Prev == nil {
196		sibling.Parent.FirstChild = sibling
197	}
198}
199
200func (n *Node) isContainer() bool {
201	switch n.Type {
202	case Document:
203		fallthrough
204	case BlockQuote:
205		fallthrough
206	case List:
207		fallthrough
208	case Item:
209		fallthrough
210	case Paragraph:
211		fallthrough
212	case Header:
213		fallthrough
214	case Emph:
215		fallthrough
216	case Strong:
217		fallthrough
218	case Del:
219		fallthrough
220	case Link:
221		fallthrough
222	case Image:
223		fallthrough
224	case Table:
225		fallthrough
226	case TableHead:
227		fallthrough
228	case TableBody:
229		fallthrough
230	case TableRow:
231		fallthrough
232	case TableCell:
233		return true
234	default:
235		return false
236	}
237}
238
239func (n *Node) canContain(t NodeType) bool {
240	if n.Type == List {
241		return t == Item
242	}
243	if n.Type == Document || n.Type == BlockQuote || n.Type == Item {
244		return t != Item
245	}
246	if n.Type == Table {
247		return t == TableHead || t == TableBody
248	}
249	if n.Type == TableHead || n.Type == TableBody {
250		return t == TableRow
251	}
252	if n.Type == TableRow {
253		return t == TableCell
254	}
255	return false
256}
257
258// WalkStatus allows NodeVisitor to have some control over the tree traversal.
259// It is returned from NodeVisitor and different values allow Node.Walk to
260// decide which node to go to next.
261type WalkStatus int
262
263const (
264	// GoToNext is the default traversal of every node.
265	GoToNext WalkStatus = iota
266	// SkipChildren tells walker to skip all children of current node.
267	SkipChildren
268	// Terminate tells walker to terminate the traversal.
269	Terminate
270)
271
272// NodeVisitor is a callback to be called when traversing the syntax tree.
273// Called twice for every node: once with entering=true when the branch is
274// first visited, then with entering=false after all the children are done.
275type NodeVisitor func(node *Node, entering bool) WalkStatus
276
277// Walk is a convenience method that instantiates a walker and starts a
278// traversal of subtree rooted at n.
279func (root *Node) Walk(visitor NodeVisitor) {
280	w := newNodeWalker(root)
281	for w.current != nil {
282		status := visitor(w.current, w.entering)
283		switch status {
284		case GoToNext:
285			w.next()
286		case SkipChildren:
287			w.entering = false
288			w.next()
289		case Terminate:
290			return
291		}
292	}
293}
294
295type nodeWalker struct {
296	current  *Node
297	root     *Node
298	entering bool
299}
300
301func newNodeWalker(root *Node) *nodeWalker {
302	return &nodeWalker{
303		current:  root,
304		root:     root,
305		entering: true,
306	}
307}
308
309func (nw *nodeWalker) next() {
310	if !nw.entering && nw.current == nw.root {
311		nw.current = nil
312		return
313	}
314	if nw.entering && nw.current.isContainer() {
315		if nw.current.FirstChild != nil {
316			nw.current = nw.current.FirstChild
317			nw.entering = true
318		} else {
319			nw.entering = false
320		}
321	} else if nw.current.Next == nil {
322		nw.current = nw.current.Parent
323		nw.entering = false
324	} else {
325		nw.current = nw.current.Next
326		nw.entering = true
327	}
328}
329
330func dump(ast *Node) {
331	fmt.Println(dumpString(ast))
332}
333
334func dumpR(ast *Node, depth int) string {
335	if ast == nil {
336		return ""
337	}
338	indent := bytes.Repeat([]byte("\t"), depth)
339	content := ast.Literal
340	if content == nil {
341		content = ast.content
342	}
343	result := fmt.Sprintf("%s%s(%q)\n", indent, ast.Type, content)
344	for n := ast.FirstChild; n != nil; n = n.Next {
345		result += dumpR(n, depth+1)
346	}
347	return result
348}
349
350func dumpString(ast *Node) string {
351	return dumpR(ast, 0)
352}