gomuks/ui/messages/parser/htmlparser.go

384 lines
11 KiB
Go
Raw Normal View History

2018-04-13 23:34:25 +02:00
// gomuks - A terminal Matrix client written in Go.
2019-01-17 13:13:25 +01:00
// Copyright (C) 2019 Tulir Asokan
2018-04-13 23:34:25 +02:00
//
// This program is free software: you can redistribute it and/or modify
2019-01-17 13:13:25 +01:00
// it under the terms of the GNU Affero General Public License as published by
2018-04-13 23:34:25 +02:00
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2019-01-17 13:13:25 +01:00
// GNU Affero General Public License for more details.
2018-04-13 23:34:25 +02:00
//
2019-01-17 13:13:25 +01:00
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
2018-04-13 23:34:25 +02:00
package parser
2018-04-13 23:34:25 +02:00
import (
"regexp"
2019-01-17 13:13:25 +01:00
"strconv"
2018-04-13 23:34:25 +02:00
"strings"
2019-04-07 19:13:23 +02:00
"github.com/alecthomas/chroma"
"github.com/alecthomas/chroma/lexers"
"github.com/alecthomas/chroma/styles"
"github.com/lucasb-eyer/go-colorful"
2018-06-01 23:44:21 +02:00
"golang.org/x/net/html"
2019-01-17 13:13:25 +01:00
"maunium.net/go/mautrix"
"maunium.net/go/tcell"
"maunium.net/go/gomuks/matrix/rooms"
"maunium.net/go/gomuks/ui/messages"
"maunium.net/go/gomuks/ui/widget"
2018-04-13 23:34:25 +02:00
)
var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([#@!].*)")
2018-04-13 23:34:25 +02:00
2018-05-31 15:59:40 +02:00
type htmlParser struct {
room *rooms.Room
}
func AdjustStyleBold(style tcell.Style) tcell.Style {
2018-05-31 15:59:40 +02:00
return style.Bold(true)
}
func AdjustStyleItalic(style tcell.Style) tcell.Style {
2018-05-31 15:59:40 +02:00
return style.Italic(true)
}
func AdjustStyleUnderline(style tcell.Style) tcell.Style {
2018-05-31 15:59:40 +02:00
return style.Underline(true)
}
func AdjustStyleStrikethrough(style tcell.Style) tcell.Style {
2018-05-31 15:59:40 +02:00
return style.Strikethrough(true)
2018-04-13 23:34:25 +02:00
}
2019-03-26 21:09:10 +01:00
func AdjustStyleTextColor(color tcell.Color) func(tcell.Style) tcell.Style {
return func(style tcell.Style) tcell.Style {
return style.Foreground(color)
}
}
2019-03-26 21:09:10 +01:00
func AdjustStyleBackgroundColor(color tcell.Color) func(tcell.Style) tcell.Style {
return func(style tcell.Style) tcell.Style {
return style.Background(color)
}
}
2018-06-01 23:28:21 +02:00
func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string {
for _, attr := range node.Attr {
if attr.Key == attribute {
return attr.Val
}
}
return ""
}
func (parser *htmlParser) listToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
children := parser.nodeToEntities(node.FirstChild, stripLinebreak)
2018-05-31 15:59:40 +02:00
ordered := node.Data == "ol"
start := 1
2018-05-31 15:59:40 +02:00
if ordered {
if startRaw := parser.getAttribute(node, "start"); len(startRaw) > 0 {
var err error
start, err = strconv.Atoi(startRaw)
if err != nil {
start = 1
}
2018-06-01 23:28:21 +02:00
}
}
listItems := children[:0]
for _, child := range children {
if child.GetTag() == "li" {
listItems = append(listItems, child)
2018-05-31 15:59:40 +02:00
}
2019-04-07 02:22:51 +02:00
}
return messages.NewListEntity(ordered, start, listItems)
2018-04-13 23:34:25 +02:00
}
func (parser *htmlParser) basicFormatToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
}
2018-05-31 15:59:40 +02:00
switch node.Data {
case "b", "strong":
2019-04-07 02:22:51 +02:00
entity.AdjustStyle(AdjustStyleBold)
2018-05-31 15:59:40 +02:00
case "i", "em":
2019-04-07 02:22:51 +02:00
entity.AdjustStyle(AdjustStyleItalic)
2018-05-31 15:59:40 +02:00
case "s", "del":
2019-04-07 02:22:51 +02:00
entity.AdjustStyle(AdjustStyleStrikethrough)
2018-05-31 15:59:40 +02:00
case "u", "ins":
2019-04-07 02:22:51 +02:00
entity.AdjustStyle(AdjustStyleUnderline)
case "font":
fgColor, ok := parser.parseColor(node, "data-mx-color", "color")
if ok {
entity.AdjustStyle(AdjustStyleTextColor(fgColor))
}
bgColor, ok := parser.parseColor(node, "data-mx-bg-color", "background-color")
if ok {
entity.AdjustStyle(AdjustStyleBackgroundColor(bgColor))
}
}
2019-04-07 02:22:51 +02:00
return entity
}
2019-03-26 21:09:10 +01:00
func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string) (color tcell.Color, ok bool) {
hex := parser.getAttribute(node, mainName)
if len(hex) == 0 {
2019-03-26 21:09:10 +01:00
hex = parser.getAttribute(node, altName)
if len(hex) == 0 {
2019-03-26 21:09:10 +01:00
return
}
}
2019-03-26 21:09:10 +01:00
cful, err := colorful.Hex(hex)
if err != nil {
2019-03-26 21:09:10 +01:00
color2, found := ColorMap[strings.ToLower(hex)]
if !found {
return
}
2019-03-26 21:09:10 +01:00
cful, _ = colorful.MakeColor(color2)
}
2019-03-26 21:09:10 +01:00
r, g, b := cful.RGB255()
return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true
}
func (parser *htmlParser) headerToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
2018-05-31 15:59:40 +02:00
length := int(node.Data[1] - '0')
prefix := strings.Repeat("#", length) + " "
return (&messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: node.Data,
Text: prefix,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
}).AdjustStyle(AdjustStyleBold)
2018-05-31 15:59:40 +02:00
}
func (parser *htmlParser) blockquoteToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
return messages.NewBlockquoteEntity(parser.nodeToEntities(node.FirstChild, stripLinebreak))
2018-05-31 15:59:40 +02:00
}
func (parser *htmlParser) linkToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: "a",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
}
2018-06-01 23:28:21 +02:00
href := parser.getAttribute(node, "href")
2018-05-31 15:59:40 +02:00
if len(href) == 0 {
2019-04-07 02:22:51 +02:00
return entity
2018-05-31 15:59:40 +02:00
}
match := matrixToURL.FindStringSubmatch(href)
if len(match) == 2 {
2019-04-07 02:22:51 +02:00
entity.Children = nil
2018-05-31 15:59:40 +02:00
pillTarget := match[1]
2019-04-07 02:22:51 +02:00
entity.Text = pillTarget
2018-05-31 15:59:40 +02:00
if pillTarget[0] == '@' {
if member := parser.room.GetMember(pillTarget); member != nil {
2019-04-07 02:22:51 +02:00
entity.Text = member.Displayname
entity.Style = entity.Style.Foreground(widget.GetHashColor(pillTarget))
2018-05-31 15:59:40 +02:00
}
}
}
2019-04-07 19:13:23 +02:00
// TODO add click action and underline on hover for links
2019-04-07 02:22:51 +02:00
return entity
2018-04-13 23:34:25 +02:00
}
func (parser *htmlParser) imageToEntity(node *html.Node) messages.HTMLEntity {
2019-04-07 19:13:23 +02:00
alt := parser.getAttribute(node, "alt")
if len(alt) == 0 {
alt = parser.getAttribute(node, "title")
if len(alt) == 0 {
alt = "[inline image]"
}
}
entity := &messages.BaseHTMLEntity{
2019-04-07 19:13:23 +02:00
Tag: "img",
Text: alt,
}
// TODO add click action and underline on hover for inline images
return entity
}
func colourToColor(colour chroma.Colour) tcell.Color {
if !colour.IsSet() {
return tcell.ColorDefault
}
return tcell.NewRGBColor(int32(colour.Red()), int32(colour.Green()), int32(colour.Blue()))
}
func styleEntryToStyle(se chroma.StyleEntry) tcell.Style {
return tcell.StyleDefault.
Bold(se.Bold == chroma.Yes).
Italic(se.Italic == chroma.Yes).
Underline(se.Underline == chroma.Yes).
Foreground(colourToColor(se.Colour)).
Background(colourToColor(se.Background))
}
func (parser *htmlParser) syntaxHighlight(text, language string) messages.HTMLEntity {
2019-04-07 19:13:23 +02:00
lexer := lexers.Get(language)
if lexer == nil {
return nil
}
iter, err := lexer.Tokenise(nil, text)
if err != nil {
return nil
}
style := styles.SolarizedDark
tokens := iter.Tokens()
children := make([]messages.HTMLEntity, len(tokens))
2019-04-07 19:13:23 +02:00
for i, token := range tokens {
if token.Value == "\n" {
children[i] = &messages.BaseHTMLEntity{Block: true, Tag: "br"}
2019-04-07 19:13:23 +02:00
} else {
children[i] = &messages.BaseHTMLEntity{
2019-04-07 19:13:23 +02:00
Tag: token.Type.String(),
Text: token.Value,
Style: styleEntryToStyle(style.Get(token.Type)),
DefaultHeight: 1,
}
}
}
return &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: "pre",
Block: true,
2019-04-07 19:13:23 +02:00
Children: children,
}
}
func (parser *htmlParser) codeblockToEntity(node *html.Node) messages.HTMLEntity {
entity := &messages.BaseHTMLEntity{
2019-04-07 19:13:23 +02:00
Tag: "pre",
Block: true,
}
// TODO allow disabling syntax highlighting
if node.FirstChild.Type == html.ElementNode && node.FirstChild.Data == "code" {
text := (&messages.BaseHTMLEntity{
2019-04-07 19:13:23 +02:00
Children: parser.nodeToEntities(node.FirstChild.FirstChild, false),
}).PlainText()
attr := parser.getAttribute(node.FirstChild, "class")
var lang string
for _, class := range strings.Split(attr, " ") {
if strings.HasPrefix(class, "language-") {
lang = class[len("language-"):]
break
}
}
if len(lang) != 0 {
if parsed := parser.syntaxHighlight(text, lang); parsed != nil {
return parsed
}
}
2019-04-07 02:22:51 +02:00
}
2019-04-07 19:13:23 +02:00
entity.Children = parser.nodeToEntities(node.FirstChild, false)
return entity
2019-04-07 02:22:51 +02:00
}
func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
2018-05-31 15:59:40 +02:00
switch node.Data {
case "blockquote":
2019-04-07 02:22:51 +02:00
return parser.blockquoteToEntity(node, stripLinebreak)
2018-05-31 15:59:40 +02:00
case "ol", "ul":
return parser.listToEntity(node, stripLinebreak)
case "h1", "h2", "h3", "h4", "h5", "h6":
2019-04-07 02:22:51 +02:00
return parser.headerToEntity(node, stripLinebreak)
2018-05-31 15:59:40 +02:00
case "br":
return messages.NewBreakEntity()
2019-04-07 02:22:51 +02:00
case "b", "strong", "i", "em", "s", "del", "u", "ins", "font":
return parser.basicFormatToEntity(node, stripLinebreak)
case "a":
2019-04-07 02:22:51 +02:00
return parser.linkToEntity(node, stripLinebreak)
2019-04-07 19:13:23 +02:00
case "img":
return parser.imageToEntity(node)
2018-05-31 15:59:40 +02:00
case "pre":
2019-04-07 02:22:51 +02:00
return parser.codeblockToEntity(node)
2018-05-31 15:59:40 +02:00
default:
return &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: node.Data,
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Block: parser.isBlockTag(node.Data),
}
}
2018-04-13 23:34:25 +02:00
}
func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak bool) messages.HTMLEntity {
2018-05-31 15:59:40 +02:00
switch node.Type {
case html.TextNode:
if stripLinebreak {
node.Data = strings.Replace(node.Data, "\n", "", -1)
}
return &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: "text",
Text: node.Data,
}
2018-05-31 15:59:40 +02:00
case html.ElementNode:
2019-04-07 02:22:51 +02:00
return parser.tagNodeToEntity(node, stripLinebreak)
2018-05-31 15:59:40 +02:00
case html.DocumentNode:
2019-04-07 17:21:38 +02:00
if node.FirstChild.Data == "html" && node.FirstChild.NextSibling == nil {
return parser.singleNodeToEntity(node.FirstChild, stripLinebreak)
}
return &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: "html",
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
Block: true,
}
2018-05-31 15:59:40 +02:00
default:
2019-04-07 02:22:51 +02:00
return nil
}
2018-04-13 23:34:25 +02:00
}
func (parser *htmlParser) nodeToEntities(node *html.Node, stripLinebreak bool) (entities []messages.HTMLEntity) {
2018-05-31 15:59:40 +02:00
for ; node != nil; node = node.NextSibling {
2019-04-07 02:22:51 +02:00
if entity := parser.singleNodeToEntity(node, stripLinebreak); entity != nil {
entities = append(entities, entity)
}
}
2018-05-31 15:59:40 +02:00
return
}
var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "li", "pre", "blockquote", "div", "hr", "table"}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) isBlockTag(tag string) bool {
for _, blockTag := range BlockTags {
if tag == blockTag {
return true
}
2018-05-31 15:59:40 +02:00
}
return false
}
func (parser *htmlParser) Parse(htmlData string) messages.HTMLEntity {
2018-05-31 15:59:40 +02:00
node, _ := html.Parse(strings.NewReader(htmlData))
2019-04-07 02:22:51 +02:00
return parser.singleNodeToEntity(node, true)
2018-04-13 23:34:25 +02:00
}
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname string) messages.HTMLEntity {
htmlData := evt.Content.FormattedBody
htmlData = strings.Replace(htmlData, "\t", " ", -1)
2018-04-13 23:34:25 +02:00
2018-05-31 15:59:40 +02:00
parser := htmlParser{room}
2019-04-07 02:22:51 +02:00
root := parser.Parse(htmlData)
root.(*messages.BaseHTMLEntity).Block = false
2018-04-13 23:34:25 +02:00
2018-11-13 23:00:35 +01:00
if evt.Content.MsgType == mautrix.MsgEmote {
root = &messages.BaseHTMLEntity{
2019-04-07 02:22:51 +02:00
Tag: "emote",
Children: []messages.HTMLEntity{
messages.NewHTMLTextEntity("* "),
messages.NewHTMLTextEntity("* ").AdjustStyle(AdjustStyleTextColor(widget.GetHashColor(evt.Sender))),
messages.NewHTMLTextEntity(" "),
2019-04-07 02:22:51 +02:00
root,
},
}
2018-05-31 15:59:40 +02:00
}
2019-04-07 02:22:51 +02:00
return root
2018-04-13 23:34:25 +02:00
}