mirror of
https://codeberg.org/forgejo/forgejo.git
synced 2025-01-07 14:12:21 +01:00
27757714d0
* Move to goldmark Markdown rendering moved from blackfriday to the goldmark. Multiple subtle changes required to the goldmark extensions to keep current rendering and defaults. Can go further with goldmark linkify and have this work within markdown rendering making the link processor unnecessary. Need to think about how to go about allowing extensions - at present it seems that these would be hard to do without recompilation. * linter fixes Co-authored-by: Lauris BH <lauris@nix.lv>
1211 lines
32 KiB
Go
1211 lines
32 KiB
Go
// Package parser contains stuff that are related to parsing a Markdown text.
|
|
package parser
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"sync"
|
|
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/text"
|
|
"github.com/yuin/goldmark/util"
|
|
)
|
|
|
|
// A Reference interface represents a link reference in Markdown text.
|
|
type Reference interface {
|
|
// String implements Stringer.
|
|
String() string
|
|
|
|
// Label returns a label of the reference.
|
|
Label() []byte
|
|
|
|
// Destination returns a destination(URL) of the reference.
|
|
Destination() []byte
|
|
|
|
// Title returns a title of the reference.
|
|
Title() []byte
|
|
}
|
|
|
|
type reference struct {
|
|
label []byte
|
|
destination []byte
|
|
title []byte
|
|
}
|
|
|
|
// NewReference returns a new Reference.
|
|
func NewReference(label, destination, title []byte) Reference {
|
|
return &reference{label, destination, title}
|
|
}
|
|
|
|
func (r *reference) Label() []byte {
|
|
return r.label
|
|
}
|
|
|
|
func (r *reference) Destination() []byte {
|
|
return r.destination
|
|
}
|
|
|
|
func (r *reference) Title() []byte {
|
|
return r.title
|
|
}
|
|
|
|
func (r *reference) String() string {
|
|
return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
|
|
}
|
|
|
|
// An IDs interface is a collection of the element ids.
|
|
type IDs interface {
|
|
// Generate generates a new element id.
|
|
Generate(value []byte, kind ast.NodeKind) []byte
|
|
|
|
// Put puts a given element id to the used ids table.
|
|
Put(value []byte)
|
|
}
|
|
|
|
type ids struct {
|
|
values map[string]bool
|
|
}
|
|
|
|
func newIDs() IDs {
|
|
return &ids{
|
|
values: map[string]bool{},
|
|
}
|
|
}
|
|
|
|
func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
|
|
value = util.TrimLeftSpace(value)
|
|
value = util.TrimRightSpace(value)
|
|
result := []byte{}
|
|
for i := 0; i < len(value); {
|
|
v := value[i]
|
|
l := util.UTF8Len(v)
|
|
i += int(l)
|
|
if l != 1 {
|
|
continue
|
|
}
|
|
if util.IsAlphaNumeric(v) {
|
|
if 'A' <= v && v <= 'Z' {
|
|
v += 'a' - 'A'
|
|
}
|
|
result = append(result, v)
|
|
} else if util.IsSpace(v) || v == '-' || v == '_' {
|
|
result = append(result, '-')
|
|
}
|
|
}
|
|
if len(result) == 0 {
|
|
if kind == ast.KindHeading {
|
|
result = []byte("heading")
|
|
} else {
|
|
result = []byte("id")
|
|
}
|
|
}
|
|
if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
|
|
s.values[util.BytesToReadOnlyString(result)] = true
|
|
return result
|
|
}
|
|
for i := 1; ; i++ {
|
|
newResult := fmt.Sprintf("%s-%d", result, i)
|
|
if _, ok := s.values[newResult]; !ok {
|
|
s.values[newResult] = true
|
|
return []byte(newResult)
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
func (s *ids) Put(value []byte) {
|
|
s.values[util.BytesToReadOnlyString(value)] = true
|
|
}
|
|
|
|
// ContextKey is a key that is used to set arbitrary values to the context.
|
|
type ContextKey int
|
|
|
|
// ContextKeyMax is a maximum value of the ContextKey.
|
|
var ContextKeyMax ContextKey
|
|
|
|
// NewContextKey return a new ContextKey value.
|
|
func NewContextKey() ContextKey {
|
|
ContextKeyMax++
|
|
return ContextKeyMax
|
|
}
|
|
|
|
// A Context interface holds a information that are necessary to parse
|
|
// Markdown text.
|
|
type Context interface {
|
|
// String implements Stringer.
|
|
String() string
|
|
|
|
// Get returns a value associated with the given key.
|
|
Get(ContextKey) interface{}
|
|
|
|
// Set sets the given value to the context.
|
|
Set(ContextKey, interface{})
|
|
|
|
// AddReference adds the given reference to this context.
|
|
AddReference(Reference)
|
|
|
|
// Reference returns (a reference, true) if a reference associated with
|
|
// the given label exists, otherwise (nil, false).
|
|
Reference(label string) (Reference, bool)
|
|
|
|
// References returns a list of references.
|
|
References() []Reference
|
|
|
|
// IDs returns a collection of the element ids.
|
|
IDs() IDs
|
|
|
|
// BlockOffset returns a first non-space character position on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
// BlockOffset returns -1 if current line is blank.
|
|
BlockOffset() int
|
|
|
|
// BlockOffset sets a first non-space character position on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
SetBlockOffset(int)
|
|
|
|
// BlockIndent returns an indent width on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
// BlockIndent returns -1 if current line is blank.
|
|
BlockIndent() int
|
|
|
|
// BlockIndent sets an indent width on current line.
|
|
// This value is valid only for BlockParser.Open.
|
|
SetBlockIndent(int)
|
|
|
|
// FirstDelimiter returns a first delimiter of the current delimiter list.
|
|
FirstDelimiter() *Delimiter
|
|
|
|
// LastDelimiter returns a last delimiter of the current delimiter list.
|
|
LastDelimiter() *Delimiter
|
|
|
|
// PushDelimiter appends the given delimiter to the tail of the current
|
|
// delimiter list.
|
|
PushDelimiter(delimiter *Delimiter)
|
|
|
|
// RemoveDelimiter removes the given delimiter from the current delimiter list.
|
|
RemoveDelimiter(d *Delimiter)
|
|
|
|
// ClearDelimiters clears the current delimiter list.
|
|
ClearDelimiters(bottom ast.Node)
|
|
|
|
// OpenedBlocks returns a list of nodes that are currently in parsing.
|
|
OpenedBlocks() []Block
|
|
|
|
// SetOpenedBlocks sets a list of nodes that are currently in parsing.
|
|
SetOpenedBlocks([]Block)
|
|
|
|
// LastOpenedBlock returns a last node that is currently in parsing.
|
|
LastOpenedBlock() Block
|
|
|
|
// IsInLinkLabel returns true if current position seems to be in link label.
|
|
IsInLinkLabel() bool
|
|
}
|
|
|
|
// A ContextConfig struct is a data structure that holds configuration of the Context.
|
|
type ContextConfig struct {
|
|
IDs IDs
|
|
}
|
|
|
|
// An ContextOption is a functional option type for the Context.
|
|
type ContextOption func(*ContextConfig)
|
|
|
|
// WithIDs is a functional option for the Context.
|
|
func WithIDs(ids IDs) ContextOption {
|
|
return func(c *ContextConfig) {
|
|
c.IDs = ids
|
|
}
|
|
}
|
|
|
|
type parseContext struct {
|
|
store []interface{}
|
|
ids IDs
|
|
refs map[string]Reference
|
|
blockOffset int
|
|
blockIndent int
|
|
delimiters *Delimiter
|
|
lastDelimiter *Delimiter
|
|
openedBlocks []Block
|
|
}
|
|
|
|
// NewContext returns a new Context.
|
|
func NewContext(options ...ContextOption) Context {
|
|
cfg := &ContextConfig{
|
|
IDs: newIDs(),
|
|
}
|
|
for _, option := range options {
|
|
option(cfg)
|
|
}
|
|
|
|
return &parseContext{
|
|
store: make([]interface{}, ContextKeyMax+1),
|
|
refs: map[string]Reference{},
|
|
ids: cfg.IDs,
|
|
blockOffset: -1,
|
|
blockIndent: -1,
|
|
delimiters: nil,
|
|
lastDelimiter: nil,
|
|
openedBlocks: []Block{},
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) Get(key ContextKey) interface{} {
|
|
return p.store[key]
|
|
}
|
|
|
|
func (p *parseContext) Set(key ContextKey, value interface{}) {
|
|
p.store[key] = value
|
|
}
|
|
|
|
func (p *parseContext) IDs() IDs {
|
|
return p.ids
|
|
}
|
|
|
|
func (p *parseContext) BlockOffset() int {
|
|
return p.blockOffset
|
|
}
|
|
|
|
func (p *parseContext) SetBlockOffset(v int) {
|
|
p.blockOffset = v
|
|
}
|
|
|
|
func (p *parseContext) BlockIndent() int {
|
|
return p.blockIndent
|
|
}
|
|
|
|
func (p *parseContext) SetBlockIndent(v int) {
|
|
p.blockIndent = v
|
|
}
|
|
|
|
func (p *parseContext) LastDelimiter() *Delimiter {
|
|
return p.lastDelimiter
|
|
}
|
|
|
|
func (p *parseContext) FirstDelimiter() *Delimiter {
|
|
return p.delimiters
|
|
}
|
|
|
|
func (p *parseContext) PushDelimiter(d *Delimiter) {
|
|
if p.delimiters == nil {
|
|
p.delimiters = d
|
|
p.lastDelimiter = d
|
|
} else {
|
|
l := p.lastDelimiter
|
|
p.lastDelimiter = d
|
|
l.NextDelimiter = d
|
|
d.PreviousDelimiter = l
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) RemoveDelimiter(d *Delimiter) {
|
|
if d.PreviousDelimiter == nil {
|
|
p.delimiters = d.NextDelimiter
|
|
} else {
|
|
d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
|
|
if d.NextDelimiter != nil {
|
|
d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
|
|
}
|
|
}
|
|
if d.NextDelimiter == nil {
|
|
p.lastDelimiter = d.PreviousDelimiter
|
|
}
|
|
if p.delimiters != nil {
|
|
p.delimiters.PreviousDelimiter = nil
|
|
}
|
|
if p.lastDelimiter != nil {
|
|
p.lastDelimiter.NextDelimiter = nil
|
|
}
|
|
d.NextDelimiter = nil
|
|
d.PreviousDelimiter = nil
|
|
if d.Length != 0 {
|
|
ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
|
|
} else {
|
|
d.Parent().RemoveChild(d.Parent(), d)
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) ClearDelimiters(bottom ast.Node) {
|
|
if p.lastDelimiter == nil {
|
|
return
|
|
}
|
|
var c ast.Node
|
|
for c = p.lastDelimiter; c != nil && c != bottom; {
|
|
prev := c.PreviousSibling()
|
|
if d, ok := c.(*Delimiter); ok {
|
|
p.RemoveDelimiter(d)
|
|
}
|
|
c = prev
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) AddReference(ref Reference) {
|
|
key := util.ToLinkReference(ref.Label())
|
|
if _, ok := p.refs[key]; !ok {
|
|
p.refs[key] = ref
|
|
}
|
|
}
|
|
|
|
func (p *parseContext) Reference(label string) (Reference, bool) {
|
|
v, ok := p.refs[label]
|
|
return v, ok
|
|
}
|
|
|
|
func (p *parseContext) References() []Reference {
|
|
ret := make([]Reference, 0, len(p.refs))
|
|
for _, v := range p.refs {
|
|
ret = append(ret, v)
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func (p *parseContext) String() string {
|
|
refs := []string{}
|
|
for _, r := range p.refs {
|
|
refs = append(refs, r.String())
|
|
}
|
|
|
|
return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
|
|
}
|
|
|
|
func (p *parseContext) OpenedBlocks() []Block {
|
|
return p.openedBlocks
|
|
}
|
|
|
|
func (p *parseContext) SetOpenedBlocks(v []Block) {
|
|
p.openedBlocks = v
|
|
}
|
|
|
|
func (p *parseContext) LastOpenedBlock() Block {
|
|
if l := len(p.openedBlocks); l != 0 {
|
|
return p.openedBlocks[l-1]
|
|
}
|
|
return Block{}
|
|
}
|
|
|
|
func (p *parseContext) IsInLinkLabel() bool {
|
|
tlist := p.Get(linkLabelStateKey)
|
|
return tlist != nil
|
|
}
|
|
|
|
// State represents parser's state.
|
|
// State is designed to use as a bit flag.
|
|
type State int
|
|
|
|
const (
|
|
none State = 1 << iota
|
|
|
|
// Continue indicates parser can continue parsing.
|
|
Continue
|
|
|
|
// Close indicates parser cannot parse anymore.
|
|
Close
|
|
|
|
// HasChildren indicates parser may have child blocks.
|
|
HasChildren
|
|
|
|
// NoChildren indicates parser does not have child blocks.
|
|
NoChildren
|
|
|
|
// RequireParagraph indicates parser requires that the last node
|
|
// must be a paragraph and is not converted to other nodes by
|
|
// ParagraphTransformers.
|
|
RequireParagraph
|
|
)
|
|
|
|
// A Config struct is a data structure that holds configuration of the Parser.
|
|
type Config struct {
|
|
Options map[OptionName]interface{}
|
|
BlockParsers util.PrioritizedSlice /*<BlockParser>*/
|
|
InlineParsers util.PrioritizedSlice /*<InlineParser>*/
|
|
ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
|
|
ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
|
|
}
|
|
|
|
// NewConfig returns a new Config.
|
|
func NewConfig() *Config {
|
|
return &Config{
|
|
Options: map[OptionName]interface{}{},
|
|
BlockParsers: util.PrioritizedSlice{},
|
|
InlineParsers: util.PrioritizedSlice{},
|
|
ParagraphTransformers: util.PrioritizedSlice{},
|
|
ASTTransformers: util.PrioritizedSlice{},
|
|
}
|
|
}
|
|
|
|
// An Option interface is a functional option type for the Parser.
|
|
type Option interface {
|
|
SetParserOption(*Config)
|
|
}
|
|
|
|
// OptionName is a name of parser options.
|
|
type OptionName string
|
|
|
|
// Attribute is an option name that spacify attributes of elements.
|
|
const optAttribute OptionName = "Attribute"
|
|
|
|
type withAttribute struct {
|
|
}
|
|
|
|
func (o *withAttribute) SetParserOption(c *Config) {
|
|
c.Options[optAttribute] = true
|
|
}
|
|
|
|
// WithAttribute is a functional option that enables custom attributes.
|
|
func WithAttribute() Option {
|
|
return &withAttribute{}
|
|
}
|
|
|
|
// A Parser interface parses Markdown text into AST nodes.
|
|
type Parser interface {
|
|
// Parse parses the given Markdown text into AST nodes.
|
|
Parse(reader text.Reader, opts ...ParseOption) ast.Node
|
|
|
|
// AddOption adds the given option to thie parser.
|
|
AddOptions(...Option)
|
|
}
|
|
|
|
// A SetOptioner interface sets the given option to the object.
|
|
type SetOptioner interface {
|
|
// SetOption sets the given option to the object.
|
|
// Unacceptable options may be passed.
|
|
// Thus implementations must ignore unacceptable options.
|
|
SetOption(name OptionName, value interface{})
|
|
}
|
|
|
|
// A BlockParser interface parses a block level element like Paragraph, List,
|
|
// Blockquote etc.
|
|
type BlockParser interface {
|
|
// Trigger returns a list of characters that triggers Parse method of
|
|
// this parser.
|
|
// If Trigger returns a nil, Open will be called with any lines.
|
|
Trigger() []byte
|
|
|
|
// Open parses the current line and returns a result of parsing.
|
|
//
|
|
// Open must not parse beyond the current line.
|
|
// If Open has been able to parse the current line, Open must advance a reader
|
|
// position by consumed byte length.
|
|
//
|
|
// If Open has not been able to parse the current line, Open should returns
|
|
// (nil, NoChildren). If Open has been able to parse the current line, Open
|
|
// should returns a new Block node and returns HasChildren or NoChildren.
|
|
Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
|
|
|
|
// Continue parses the current line and returns a result of parsing.
|
|
//
|
|
// Continue must not parse beyond the current line.
|
|
// If Continue has been able to parse the current line, Continue must advance
|
|
// a reader position by consumed byte length.
|
|
//
|
|
// If Continue has not been able to parse the current line, Continue should
|
|
// returns Close. If Continue has been able to parse the current line,
|
|
// Continue should returns (Continue | NoChildren) or
|
|
// (Continue | HasChildren)
|
|
Continue(node ast.Node, reader text.Reader, pc Context) State
|
|
|
|
// Close will be called when the parser returns Close.
|
|
Close(node ast.Node, reader text.Reader, pc Context)
|
|
|
|
// CanInterruptParagraph returns true if the parser can interrupt pargraphs,
|
|
// otherwise false.
|
|
CanInterruptParagraph() bool
|
|
|
|
// CanAcceptIndentedLine returns true if the parser can open new node when
|
|
// the given line is being indented more than 3 spaces.
|
|
CanAcceptIndentedLine() bool
|
|
}
|
|
|
|
// An InlineParser interface parses an inline level element like CodeSpan, Link etc.
|
|
type InlineParser interface {
|
|
// Trigger returns a list of characters that triggers Parse method of
|
|
// this parser.
|
|
// Trigger characters must be a punctuation or a halfspace.
|
|
// Halfspaces triggers this parser when character is any spaces characters or
|
|
// a head of line
|
|
Trigger() []byte
|
|
|
|
// Parse parse the given block into an inline node.
|
|
//
|
|
// Parse can parse beyond the current line.
|
|
// If Parse has been able to parse the current line, it must advance a reader
|
|
// position by consumed byte length.
|
|
Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
|
|
}
|
|
|
|
// A CloseBlocker interface is a callback function that will be
|
|
// called when block is closed in the inline parsing.
|
|
type CloseBlocker interface {
|
|
// CloseBlock will be called when a block is closed.
|
|
CloseBlock(parent ast.Node, block text.Reader, pc Context)
|
|
}
|
|
|
|
// A ParagraphTransformer transforms parsed Paragraph nodes.
|
|
// For example, link references are searched in parsed Paragraphs.
|
|
type ParagraphTransformer interface {
|
|
// Transform transforms the given paragraph.
|
|
Transform(node *ast.Paragraph, reader text.Reader, pc Context)
|
|
}
|
|
|
|
// ASTTransformer transforms entire Markdown document AST tree.
|
|
type ASTTransformer interface {
|
|
// Transform transforms the given AST tree.
|
|
Transform(node *ast.Document, reader text.Reader, pc Context)
|
|
}
|
|
|
|
// DefaultBlockParsers returns a new list of default BlockParsers.
|
|
// Priorities of default BlockParsers are:
|
|
//
|
|
// SetextHeadingParser, 100
|
|
// ThematicBreakParser, 200
|
|
// ListParser, 300
|
|
// ListItemParser, 400
|
|
// CodeBlockParser, 500
|
|
// ATXHeadingParser, 600
|
|
// FencedCodeBlockParser, 700
|
|
// BlockquoteParser, 800
|
|
// HTMLBlockParser, 900
|
|
// ParagraphParser, 1000
|
|
func DefaultBlockParsers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(NewSetextHeadingParser(), 100),
|
|
util.Prioritized(NewThematicBreakParser(), 200),
|
|
util.Prioritized(NewListParser(), 300),
|
|
util.Prioritized(NewListItemParser(), 400),
|
|
util.Prioritized(NewCodeBlockParser(), 500),
|
|
util.Prioritized(NewATXHeadingParser(), 600),
|
|
util.Prioritized(NewFencedCodeBlockParser(), 700),
|
|
util.Prioritized(NewBlockquoteParser(), 800),
|
|
util.Prioritized(NewHTMLBlockParser(), 900),
|
|
util.Prioritized(NewParagraphParser(), 1000),
|
|
}
|
|
}
|
|
|
|
// DefaultInlineParsers returns a new list of default InlineParsers.
|
|
// Priorities of default InlineParsers are:
|
|
//
|
|
// CodeSpanParser, 100
|
|
// LinkParser, 200
|
|
// AutoLinkParser, 300
|
|
// RawHTMLParser, 400
|
|
// EmphasisParser, 500
|
|
func DefaultInlineParsers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(NewCodeSpanParser(), 100),
|
|
util.Prioritized(NewLinkParser(), 200),
|
|
util.Prioritized(NewAutoLinkParser(), 300),
|
|
util.Prioritized(NewRawHTMLParser(), 400),
|
|
util.Prioritized(NewEmphasisParser(), 500),
|
|
}
|
|
}
|
|
|
|
// DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
|
|
// Priorities of default ParagraphTransformers are:
|
|
//
|
|
// LinkReferenceParagraphTransformer, 100
|
|
func DefaultParagraphTransformers() []util.PrioritizedValue {
|
|
return []util.PrioritizedValue{
|
|
util.Prioritized(LinkReferenceParagraphTransformer, 100),
|
|
}
|
|
}
|
|
|
|
// A Block struct holds a node and correspond parser pair.
|
|
type Block struct {
|
|
// Node is a BlockNode.
|
|
Node ast.Node
|
|
// Parser is a BlockParser.
|
|
Parser BlockParser
|
|
}
|
|
|
|
type parser struct {
|
|
options map[OptionName]interface{}
|
|
blockParsers [256][]BlockParser
|
|
freeBlockParsers []BlockParser
|
|
inlineParsers [256][]InlineParser
|
|
closeBlockers []CloseBlocker
|
|
paragraphTransformers []ParagraphTransformer
|
|
astTransformers []ASTTransformer
|
|
config *Config
|
|
initSync sync.Once
|
|
}
|
|
|
|
type withBlockParsers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withBlockParsers) SetParserOption(c *Config) {
|
|
c.BlockParsers = append(c.BlockParsers, o.value...)
|
|
}
|
|
|
|
// WithBlockParsers is a functional option that allow you to add
|
|
// BlockParsers to the parser.
|
|
func WithBlockParsers(bs ...util.PrioritizedValue) Option {
|
|
return &withBlockParsers{bs}
|
|
}
|
|
|
|
type withInlineParsers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withInlineParsers) SetParserOption(c *Config) {
|
|
c.InlineParsers = append(c.InlineParsers, o.value...)
|
|
}
|
|
|
|
// WithInlineParsers is a functional option that allow you to add
|
|
// InlineParsers to the parser.
|
|
func WithInlineParsers(bs ...util.PrioritizedValue) Option {
|
|
return &withInlineParsers{bs}
|
|
}
|
|
|
|
type withParagraphTransformers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withParagraphTransformers) SetParserOption(c *Config) {
|
|
c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
|
|
}
|
|
|
|
// WithParagraphTransformers is a functional option that allow you to add
|
|
// ParagraphTransformers to the parser.
|
|
func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
|
|
return &withParagraphTransformers{ps}
|
|
}
|
|
|
|
type withASTTransformers struct {
|
|
value []util.PrioritizedValue
|
|
}
|
|
|
|
func (o *withASTTransformers) SetParserOption(c *Config) {
|
|
c.ASTTransformers = append(c.ASTTransformers, o.value...)
|
|
}
|
|
|
|
// WithASTTransformers is a functional option that allow you to add
|
|
// ASTTransformers to the parser.
|
|
func WithASTTransformers(ps ...util.PrioritizedValue) Option {
|
|
return &withASTTransformers{ps}
|
|
}
|
|
|
|
type withOption struct {
|
|
name OptionName
|
|
value interface{}
|
|
}
|
|
|
|
func (o *withOption) SetParserOption(c *Config) {
|
|
c.Options[o.name] = o.value
|
|
}
|
|
|
|
// WithOption is a functional option that allow you to set
|
|
// an arbitrary option to the parser.
|
|
func WithOption(name OptionName, value interface{}) Option {
|
|
return &withOption{name, value}
|
|
}
|
|
|
|
// NewParser returns a new Parser with given options.
|
|
func NewParser(options ...Option) Parser {
|
|
config := NewConfig()
|
|
for _, opt := range options {
|
|
opt.SetParserOption(config)
|
|
}
|
|
|
|
p := &parser{
|
|
options: map[OptionName]interface{}{},
|
|
config: config,
|
|
}
|
|
|
|
return p
|
|
}
|
|
|
|
func (p *parser) AddOptions(opts ...Option) {
|
|
for _, opt := range opts {
|
|
opt.SetParserOption(p.config)
|
|
}
|
|
}
|
|
|
|
func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
bp, ok := v.Value.(BlockParser)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
|
|
}
|
|
tcs := bp.Trigger()
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
if tcs == nil {
|
|
p.freeBlockParsers = append(p.freeBlockParsers, bp)
|
|
} else {
|
|
for _, tc := range tcs {
|
|
if p.blockParsers[tc] == nil {
|
|
p.blockParsers[tc] = []BlockParser{}
|
|
}
|
|
p.blockParsers[tc] = append(p.blockParsers[tc], bp)
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
ip, ok := v.Value.(InlineParser)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
|
|
}
|
|
tcs := ip.Trigger()
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
if cb, ok := ip.(CloseBlocker); ok {
|
|
p.closeBlockers = append(p.closeBlockers, cb)
|
|
}
|
|
for _, tc := range tcs {
|
|
if p.inlineParsers[tc] == nil {
|
|
p.inlineParsers[tc] = []InlineParser{}
|
|
}
|
|
p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
|
|
}
|
|
}
|
|
|
|
func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
pt, ok := v.Value.(ParagraphTransformer)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
|
|
}
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
p.paragraphTransformers = append(p.paragraphTransformers, pt)
|
|
}
|
|
|
|
func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
|
|
at, ok := v.Value.(ASTTransformer)
|
|
if !ok {
|
|
panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
|
|
}
|
|
so, ok := v.Value.(SetOptioner)
|
|
if ok {
|
|
for oname, ovalue := range options {
|
|
so.SetOption(oname, ovalue)
|
|
}
|
|
}
|
|
p.astTransformers = append(p.astTransformers, at)
|
|
}
|
|
|
|
// A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
|
|
type ParseConfig struct {
|
|
Context Context
|
|
}
|
|
|
|
// A ParseOption is a functional option type for the Parser.Parse.
|
|
type ParseOption func(c *ParseConfig)
|
|
|
|
// WithContext is a functional option that allow you to override
|
|
// a default context.
|
|
func WithContext(context Context) ParseOption {
|
|
return func(c *ParseConfig) {
|
|
c.Context = context
|
|
}
|
|
}
|
|
|
|
func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
|
|
p.initSync.Do(func() {
|
|
p.config.BlockParsers.Sort()
|
|
for _, v := range p.config.BlockParsers {
|
|
p.addBlockParser(v, p.config.Options)
|
|
}
|
|
for i := range p.blockParsers {
|
|
if p.blockParsers[i] != nil {
|
|
p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
|
|
}
|
|
}
|
|
|
|
p.config.InlineParsers.Sort()
|
|
for _, v := range p.config.InlineParsers {
|
|
p.addInlineParser(v, p.config.Options)
|
|
}
|
|
p.config.ParagraphTransformers.Sort()
|
|
for _, v := range p.config.ParagraphTransformers {
|
|
p.addParagraphTransformer(v, p.config.Options)
|
|
}
|
|
p.config.ASTTransformers.Sort()
|
|
for _, v := range p.config.ASTTransformers {
|
|
p.addASTTransformer(v, p.config.Options)
|
|
}
|
|
p.config = nil
|
|
})
|
|
c := &ParseConfig{}
|
|
for _, opt := range opts {
|
|
opt(c)
|
|
}
|
|
if c.Context == nil {
|
|
c.Context = NewContext()
|
|
}
|
|
pc := c.Context
|
|
root := ast.NewDocument()
|
|
p.parseBlocks(root, reader, pc)
|
|
|
|
blockReader := text.NewBlockReader(reader.Source(), nil)
|
|
p.walkBlock(root, func(node ast.Node) {
|
|
p.parseBlock(blockReader, node, pc)
|
|
})
|
|
for _, at := range p.astTransformers {
|
|
at.Transform(root, reader, pc)
|
|
}
|
|
// root.Dump(reader.Source(), 0)
|
|
return root
|
|
}
|
|
|
|
func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
|
|
for _, pt := range p.paragraphTransformers {
|
|
pt.Transform(node, reader, pc)
|
|
if node.Parent() == nil {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
|
|
blocks := pc.OpenedBlocks()
|
|
for i := from; i >= to; i-- {
|
|
node := blocks[i].Node
|
|
blocks[i].Parser.Close(blocks[i].Node, reader, pc)
|
|
paragraph, ok := node.(*ast.Paragraph)
|
|
if ok && node.Parent() != nil {
|
|
p.transformParagraph(paragraph, reader, pc)
|
|
}
|
|
}
|
|
if from == len(blocks)-1 {
|
|
blocks = blocks[0:to]
|
|
} else {
|
|
blocks = append(blocks[0:to], blocks[from+1:]...)
|
|
}
|
|
pc.SetOpenedBlocks(blocks)
|
|
}
|
|
|
|
type blockOpenResult int
|
|
|
|
const (
|
|
paragraphContinuation blockOpenResult = iota + 1
|
|
newBlocksOpened
|
|
noBlocksOpened
|
|
)
|
|
|
|
func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
|
|
result := blockOpenResult(noBlocksOpened)
|
|
continuable := false
|
|
lastBlock := pc.LastOpenedBlock()
|
|
if lastBlock.Node != nil {
|
|
continuable = ast.IsParagraph(lastBlock.Node)
|
|
}
|
|
retry:
|
|
var bps []BlockParser
|
|
line, _ := reader.PeekLine()
|
|
w, pos := util.IndentWidth(line, reader.LineOffset())
|
|
if w >= len(line) {
|
|
pc.SetBlockOffset(-1)
|
|
pc.SetBlockIndent(-1)
|
|
} else {
|
|
pc.SetBlockOffset(pos)
|
|
pc.SetBlockIndent(w)
|
|
}
|
|
if line == nil || line[0] == '\n' {
|
|
goto continuable
|
|
}
|
|
bps = p.freeBlockParsers
|
|
if pos < len(line) {
|
|
bps = p.blockParsers[line[pos]]
|
|
if bps == nil {
|
|
bps = p.freeBlockParsers
|
|
}
|
|
}
|
|
if bps == nil {
|
|
goto continuable
|
|
}
|
|
|
|
for _, bp := range bps {
|
|
if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
|
|
continue
|
|
}
|
|
if w > 3 && !bp.CanAcceptIndentedLine() {
|
|
continue
|
|
}
|
|
lastBlock = pc.LastOpenedBlock()
|
|
last := lastBlock.Node
|
|
node, state := bp.Open(parent, reader, pc)
|
|
if node != nil {
|
|
// Parser requires last node to be a paragraph.
|
|
// With table extension:
|
|
//
|
|
// 0
|
|
// -:
|
|
// -
|
|
//
|
|
// '-' on 3rd line seems a Setext heading because 1st and 2nd lines
|
|
// are being paragraph when the Settext heading parser tries to parse the 3rd
|
|
// line.
|
|
// But 1st line and 2nd line are a table. Thus this paragraph will be transformed
|
|
// by a paragraph transformer. So this text should be converted to a table and
|
|
// an empty list.
|
|
if state&RequireParagraph != 0 {
|
|
if last == parent.LastChild() {
|
|
// Opened paragraph may be transformed by ParagraphTransformers in
|
|
// closeBlocks().
|
|
lastBlock.Parser.Close(last, reader, pc)
|
|
blocks := pc.OpenedBlocks()
|
|
pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
|
|
if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
|
|
// Paragraph has been transformed.
|
|
// So this parser is considered as failing.
|
|
continuable = false
|
|
goto retry
|
|
}
|
|
}
|
|
}
|
|
node.SetBlankPreviousLines(blankLine)
|
|
if last != nil && last.Parent() == nil {
|
|
lastPos := len(pc.OpenedBlocks()) - 1
|
|
p.closeBlocks(lastPos, lastPos, reader, pc)
|
|
}
|
|
parent.AppendChild(parent, node)
|
|
result = newBlocksOpened
|
|
be := Block{node, bp}
|
|
pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
|
|
if state&HasChildren != 0 {
|
|
parent = node
|
|
goto retry // try child block
|
|
}
|
|
break // no children, can not open more blocks on this line
|
|
}
|
|
}
|
|
|
|
continuable:
|
|
if result == noBlocksOpened && continuable {
|
|
state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
|
|
if state&Continue != 0 {
|
|
result = paragraphContinuation
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
type lineStat struct {
|
|
lineNum int
|
|
level int
|
|
isBlank bool
|
|
}
|
|
|
|
func isBlankLine(lineNum, level int, stats []lineStat) bool {
|
|
ret := true
|
|
for i := len(stats) - 1 - level; i >= 0; i-- {
|
|
ret = false
|
|
s := stats[i]
|
|
if s.lineNum == lineNum {
|
|
if s.level < level && s.isBlank {
|
|
return true
|
|
} else if s.level == level {
|
|
return s.isBlank
|
|
}
|
|
}
|
|
if s.lineNum < lineNum {
|
|
return ret
|
|
}
|
|
}
|
|
return ret
|
|
}
|
|
|
|
func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
|
|
pc.SetOpenedBlocks([]Block{})
|
|
blankLines := make([]lineStat, 0, 128)
|
|
isBlank := false
|
|
for { // process blocks separated by blank lines
|
|
_, lines, ok := reader.SkipBlankLines()
|
|
if !ok {
|
|
return
|
|
}
|
|
lineNum, _ := reader.Position()
|
|
if lines != 0 {
|
|
blankLines = blankLines[0:0]
|
|
l := len(pc.OpenedBlocks())
|
|
for i := 0; i < l; i++ {
|
|
blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
|
|
}
|
|
}
|
|
isBlank = isBlankLine(lineNum-1, 0, blankLines)
|
|
// first, we try to open blocks
|
|
if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
|
|
return
|
|
}
|
|
reader.AdvanceLine()
|
|
for { // process opened blocks line by line
|
|
openedBlocks := pc.OpenedBlocks()
|
|
l := len(openedBlocks)
|
|
if l == 0 {
|
|
break
|
|
}
|
|
lastIndex := l - 1
|
|
for i := 0; i < l; i++ {
|
|
be := openedBlocks[i]
|
|
line, _ := reader.PeekLine()
|
|
if line == nil {
|
|
p.closeBlocks(lastIndex, 0, reader, pc)
|
|
reader.AdvanceLine()
|
|
return
|
|
}
|
|
lineNum, _ := reader.Position()
|
|
blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
|
|
// If node is a paragraph, p.openBlocks determines whether it is continuable.
|
|
// So we do not process paragraphs here.
|
|
if !ast.IsParagraph(be.Node) {
|
|
state := be.Parser.Continue(be.Node, reader, pc)
|
|
if state&Continue != 0 {
|
|
// When current node is a container block and has no children,
|
|
// we try to open new child nodes
|
|
if state&HasChildren != 0 && i == lastIndex {
|
|
isBlank = isBlankLine(lineNum-1, i, blankLines)
|
|
p.openBlocks(be.Node, isBlank, reader, pc)
|
|
break
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
// current node may be closed or lazy continuation
|
|
isBlank = isBlankLine(lineNum-1, i, blankLines)
|
|
thisParent := parent
|
|
if i != 0 {
|
|
thisParent = openedBlocks[i-1].Node
|
|
}
|
|
lastNode := openedBlocks[lastIndex].Node
|
|
result := p.openBlocks(thisParent, isBlank, reader, pc)
|
|
if result != paragraphContinuation {
|
|
// lastNode is a paragraph and was transformed by the paragraph
|
|
// transformers.
|
|
if openedBlocks[lastIndex].Node != lastNode {
|
|
lastIndex--
|
|
}
|
|
p.closeBlocks(lastIndex, i, reader, pc)
|
|
}
|
|
break
|
|
}
|
|
|
|
reader.AdvanceLine()
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
|
|
for c := block.FirstChild(); c != nil; c = c.NextSibling() {
|
|
p.walkBlock(c, cb)
|
|
}
|
|
cb(block)
|
|
}
|
|
|
|
func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
|
|
if parent.IsRaw() {
|
|
return
|
|
}
|
|
escaped := false
|
|
source := block.Source()
|
|
block.Reset(parent.Lines())
|
|
for {
|
|
retry:
|
|
line, _ := block.PeekLine()
|
|
if line == nil {
|
|
break
|
|
}
|
|
lineLength := len(line)
|
|
hardlineBreak := false
|
|
softLinebreak := line[lineLength-1] == '\n'
|
|
if lineLength >= 2 && line[lineLength-2] == '\\' && softLinebreak { // ends with \\n
|
|
lineLength -= 2
|
|
hardlineBreak = true
|
|
|
|
} else if lineLength >= 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && softLinebreak { // ends with \\r\n
|
|
lineLength -= 3
|
|
hardlineBreak = true
|
|
} else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && softLinebreak { // ends with [space][space]\n
|
|
lineLength -= 3
|
|
hardlineBreak = true
|
|
} else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && softLinebreak { // ends with [space][space]\r\n
|
|
lineLength -= 4
|
|
hardlineBreak = true
|
|
}
|
|
|
|
l, startPosition := block.Position()
|
|
n := 0
|
|
for i := 0; i < lineLength; i++ {
|
|
c := line[i]
|
|
if c == '\n' {
|
|
break
|
|
}
|
|
isSpace := util.IsSpace(c)
|
|
isPunct := util.IsPunct(c)
|
|
if (isPunct && !escaped) || isSpace || i == 0 {
|
|
parserChar := c
|
|
if isSpace || (i == 0 && !isPunct) {
|
|
parserChar = ' '
|
|
}
|
|
ips := p.inlineParsers[parserChar]
|
|
if ips != nil {
|
|
block.Advance(n)
|
|
n = 0
|
|
savedLine, savedPosition := block.Position()
|
|
if i != 0 {
|
|
_, currentPosition := block.Position()
|
|
ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
|
|
_, startPosition = block.Position()
|
|
}
|
|
var inlineNode ast.Node
|
|
for _, ip := range ips {
|
|
inlineNode = ip.Parse(parent, block, pc)
|
|
if inlineNode != nil {
|
|
break
|
|
}
|
|
block.SetPosition(savedLine, savedPosition)
|
|
}
|
|
if inlineNode != nil {
|
|
parent.AppendChild(parent, inlineNode)
|
|
goto retry
|
|
}
|
|
}
|
|
}
|
|
if escaped {
|
|
escaped = false
|
|
n++
|
|
continue
|
|
}
|
|
|
|
if c == '\\' {
|
|
escaped = true
|
|
n++
|
|
continue
|
|
}
|
|
|
|
escaped = false
|
|
n++
|
|
}
|
|
if n != 0 {
|
|
block.Advance(n)
|
|
}
|
|
currentL, currentPosition := block.Position()
|
|
if l != currentL {
|
|
continue
|
|
}
|
|
diff := startPosition.Between(currentPosition)
|
|
stop := diff.Stop
|
|
rest := diff.WithStop(stop)
|
|
text := ast.NewTextSegment(rest.TrimRightSpace(source))
|
|
text.SetSoftLineBreak(softLinebreak)
|
|
text.SetHardLineBreak(hardlineBreak)
|
|
parent.AppendChild(parent, text)
|
|
block.AdvanceLine()
|
|
}
|
|
|
|
ProcessDelimiters(nil, pc)
|
|
for _, ip := range p.closeBlockers {
|
|
ip.CloseBlock(parent, block, pc)
|
|
}
|
|
}
|