Move stripLinebreak to a struct field in the HTML parser

This commit is contained in:
Tulir Asokan 2019-04-09 01:03:10 +03:00
parent 887e2e232a
commit 927be9bdcd

View File

@ -39,6 +39,8 @@ var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([
type htmlParser struct { type htmlParser struct {
room *rooms.Room room *rooms.Room
keepLinebreak bool
} }
func AdjustStyleBold(style tcell.Style) tcell.Style { func AdjustStyleBold(style tcell.Style) tcell.Style {
@ -78,8 +80,8 @@ func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string
return "" return ""
} }
func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) listToEntity(node *html.Node) messages.HTMLEntity {
children := parser.nodeToEntities(node.FirstChild, stripLinebreak) children := parser.nodeToEntities(node.FirstChild)
ordered := node.Data == "ol" ordered := node.Data == "ol"
start := 1 start := 1
if ordered { if ordered {
@ -100,10 +102,10 @@ func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) mes
return messages.NewListEntity(ordered, start, listItems) return messages.NewListEntity(ordered, start, listItems)
} }
func (parser *htmlParser) basicFormatToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) basicFormatToEntity(node *html.Node) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{ entity := &messages.BaseHTMLEntity{
Tag: node.Data, Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak), Children: parser.nodeToEntities(node.FirstChild),
} }
switch node.Data { switch node.Data {
case "b", "strong": case "b", "strong":
@ -149,24 +151,24 @@ func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string)
return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true
} }
func (parser *htmlParser) headerToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) headerToEntity(node *html.Node) messages.HTMLEntity {
length := int(node.Data[1] - '0') length := int(node.Data[1] - '0')
prefix := strings.Repeat("#", length) + " " prefix := strings.Repeat("#", length) + " "
return (&messages.BaseHTMLEntity{ return (&messages.BaseHTMLEntity{
Tag: node.Data, Tag: node.Data,
Text: prefix, Text: prefix,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak), Children: parser.nodeToEntities(node.FirstChild),
}).AdjustStyle(AdjustStyleBold) }).AdjustStyle(AdjustStyleBold)
} }
func (parser *htmlParser) blockquoteToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) blockquoteToEntity(node *html.Node) messages.HTMLEntity {
return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild, stripLinebreak)) return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild))
} }
func (parser *htmlParser) linkToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) linkToEntity(node *html.Node) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{ entity := &messages.BaseHTMLEntity{
Tag: "a", Tag: "a",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak), Children: parser.nodeToEntities(node.FirstChild),
} }
href := parser.getAttribute(node, "href") href := parser.getAttribute(node, "href")
if len(href) == 0 { if len(href) == 0 {
@ -263,26 +265,28 @@ func (parser *htmlParser) codeblockToEntity(node *html.Node) messages.HTMLEntity
} }
} }
} }
parser.keepLinebreak = true
text := (&messages.BaseHTMLEntity{ text := (&messages.BaseHTMLEntity{
Children: parser.nodeToEntities(node.FirstChild, false), Children: parser.nodeToEntities(node.FirstChild),
}).PlainText() }).PlainText()
parser.keepLinebreak = false
return parser.syntaxHighlight(text, lang) return parser.syntaxHighlight(text, lang)
} }
func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) tagNodeToEntity(node *html.Node) messages.HTMLEntity {
switch node.Data { switch node.Data {
case "blockquote": case "blockquote":
return parser.blockquoteToEntity(node, stripLinebreak) return parser.blockquoteToEntity(node)
case "ol", "ul": case "ol", "ul":
return parser.listToEntity(node, stripLinebreak) return parser.listToEntity(node)
case "h1", "h2", "h3", "h4", "h5", "h6": case "h1", "h2", "h3", "h4", "h5", "h6":
return parser.headerToEntity(node, stripLinebreak) return parser.headerToEntity(node)
case "br": case "br":
return messages.NewBreakEntity() return messages.NewBreakEntity()
case "b", "strong", "i", "em", "s", "del", "u", "ins", "font": case "b", "strong", "i", "em", "s", "del", "u", "ins", "font":
return parser.basicFormatToEntity(node, stripLinebreak) return parser.basicFormatToEntity(node)
case "a": case "a":
return parser.linkToEntity(node, stripLinebreak) return parser.linkToEntity(node)
case "img": case "img":
return parser.imageToEntity(node) return parser.imageToEntity(node)
case "pre": case "pre":
@ -290,16 +294,16 @@ func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool)
default: default:
return &messages.BaseHTMLEntity{ return &messages.BaseHTMLEntity{
Tag: node.Data, Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak), Children: parser.nodeToEntities(node.FirstChild),
Block: parser.isBlockTag(node.Data), Block: parser.isBlockTag(node.Data),
} }
} }
} }
func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity { func (parser *htmlParser) singleNodeToEntity(node *html.Node) messages.HTMLEntity {
switch node.Type { switch node.Type {
case html.TextNode: case html.TextNode:
if stripLinebreak { if !parser.keepLinebreak {
node.Data = strings.ReplaceAll(node.Data, "\n", "") node.Data = strings.ReplaceAll(node.Data, "\n", "")
} }
return &messages.BaseHTMLEntity{ return &messages.BaseHTMLEntity{
@ -307,14 +311,14 @@ func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak boo
Text: node.Data, Text: node.Data,
} }
case html.ElementNode: case html.ElementNode:
return parser.tagNodeToEntity(node, stripLinebreak) return parser.tagNodeToEntity(node)
case html.DocumentNode: case html.DocumentNode:
if node.FirstChild.Data == "html" && node.FirstChild.NextSibling == nil { if node.FirstChild.Data == "html" && node.FirstChild.NextSibling == nil {
return parser.singleNodeToEntity(node.FirstChild, stripLinebreak) return parser.singleNodeToEntity(node.FirstChild)
} }
return &messages.BaseHTMLEntity{ return &messages.BaseHTMLEntity{
Tag: "html", Tag: "html",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak), Children: parser.nodeToEntities(node.FirstChild),
Block: true, Block: true,
} }
default: default:
@ -322,9 +326,9 @@ func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak boo
} }
} }
func (parser *htmlParser) nodeToEntities(node *html.Node, stripLinebreak bool) (entities []messages.HTMLEntity) { func (parser *htmlParser) nodeToEntities(node *html.Node) (entities []messages.HTMLEntity) {
for ; node != nil; node = node.NextSibling { for ; node != nil; node = node.NextSibling {
if entity := parser.singleNodeToEntity(node, stripLinebreak); entity != nil { if entity := parser.singleNodeToEntity(node); entity != nil {
entities = append(entities, entity) entities = append(entities, entity)
} }
} }
@ -344,7 +348,7 @@ func (parser *htmlParser) isBlockTag(tag string) bool {
func (parser *htmlParser) Parse(htmlData string) messages.HTMLEntity { func (parser *htmlParser) Parse(htmlData string) messages.HTMLEntity {
node, _ := html.Parse(strings.NewReader(htmlData)) node, _ := html.Parse(strings.NewReader(htmlData))
return parser.singleNodeToEntity(node, true) return parser.singleNodeToEntity(node)
} }
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage. // ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
@ -352,7 +356,7 @@ func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname st
htmlData := evt.Content.FormattedBody htmlData := evt.Content.FormattedBody
htmlData = strings.Replace(htmlData, "\t", " ", -1) htmlData = strings.Replace(htmlData, "\t", " ", -1)
parser := htmlParser{room} parser := htmlParser{room: room}
root := parser.Parse(htmlData) root := parser.Parse(htmlData)
root.(*messages.BaseHTMLEntity).Block = false root.(*messages.BaseHTMLEntity).Block = false