2018-04-13 23:34:25 +02:00
|
|
|
// gomuks - A terminal Matrix client written in Go.
|
2019-01-17 13:13:25 +01:00
|
|
|
// Copyright (C) 2019 Tulir Asokan
|
2018-04-13 23:34:25 +02:00
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
2019-01-17 13:13:25 +01:00
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
2018-04-13 23:34:25 +02:00
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
2019-01-17 13:13:25 +01:00
|
|
|
// GNU Affero General Public License for more details.
|
2018-04-13 23:34:25 +02:00
|
|
|
//
|
2019-01-17 13:13:25 +01:00
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
2018-04-13 23:34:25 +02:00
|
|
|
|
2018-04-14 10:50:18 +02:00
|
|
|
package parser
|
2018-04-13 23:34:25 +02:00
|
|
|
|
|
|
|
import (
|
2018-04-14 10:44:07 +02:00
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
"regexp"
|
2019-01-17 13:13:25 +01:00
|
|
|
"strconv"
|
2018-04-13 23:34:25 +02:00
|
|
|
"strings"
|
|
|
|
|
2018-09-05 09:55:48 +02:00
|
|
|
"github.com/lucasb-eyer/go-colorful"
|
2018-06-01 23:44:21 +02:00
|
|
|
"golang.org/x/net/html"
|
2019-01-17 13:13:25 +01:00
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
"maunium.net/go/gomuks/ui/messages"
|
2019-01-17 13:13:25 +01:00
|
|
|
"maunium.net/go/mautrix"
|
|
|
|
"maunium.net/go/tcell"
|
|
|
|
|
2018-04-14 10:44:07 +02:00
|
|
|
"maunium.net/go/gomuks/matrix/rooms"
|
|
|
|
"maunium.net/go/gomuks/ui/widget"
|
2018-04-13 23:34:25 +02:00
|
|
|
)
|
|
|
|
|
2018-04-14 10:44:07 +02:00
|
|
|
var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([#@!].*)")
|
2018-04-13 23:34:25 +02:00
|
|
|
|
2018-05-31 15:59:40 +02:00
|
|
|
type htmlParser struct {
|
|
|
|
room *rooms.Room
|
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2018-11-13 23:28:53 +01:00
|
|
|
func AdjustStyleBold(style tcell.Style) tcell.Style {
|
2018-05-31 15:59:40 +02:00
|
|
|
return style.Bold(true)
|
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2018-11-13 23:28:53 +01:00
|
|
|
func AdjustStyleItalic(style tcell.Style) tcell.Style {
|
2018-05-31 15:59:40 +02:00
|
|
|
return style.Italic(true)
|
|
|
|
}
|
|
|
|
|
2018-11-13 23:28:53 +01:00
|
|
|
func AdjustStyleUnderline(style tcell.Style) tcell.Style {
|
2018-05-31 15:59:40 +02:00
|
|
|
return style.Underline(true)
|
|
|
|
}
|
|
|
|
|
2018-11-13 23:28:53 +01:00
|
|
|
func AdjustStyleStrikethrough(style tcell.Style) tcell.Style {
|
2018-05-31 15:59:40 +02:00
|
|
|
return style.Strikethrough(true)
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
2019-03-26 21:09:10 +01:00
|
|
|
func AdjustStyleTextColor(color tcell.Color) func(tcell.Style) tcell.Style {
|
2018-11-13 23:28:53 +01:00
|
|
|
return func(style tcell.Style) tcell.Style {
|
|
|
|
return style.Foreground(color)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-26 21:09:10 +01:00
|
|
|
func AdjustStyleBackgroundColor(color tcell.Color) func(tcell.Style) tcell.Style {
|
|
|
|
return func(style tcell.Style) tcell.Style {
|
|
|
|
return style.Background(color)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-06-01 23:28:21 +02:00
|
|
|
func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string {
|
|
|
|
for _, attr := range node.Attr {
|
|
|
|
if attr.Key == attribute {
|
|
|
|
return attr.Val
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
|
|
|
|
func digits(num int) int {
|
2018-11-13 23:00:35 +01:00
|
|
|
if num <= 0 {
|
|
|
|
return 0
|
|
|
|
}
|
2018-06-01 23:44:21 +02:00
|
|
|
return int(math.Floor(math.Log10(float64(num))) + 1)
|
2018-06-01 23:28:21 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) listToTString(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
2018-05-31 15:59:40 +02:00
|
|
|
ordered := node.Data == "ol"
|
2019-04-07 02:22:51 +02:00
|
|
|
listItems := parser.nodeToEntities(node.FirstChild, stripLinebreak)
|
2018-06-01 23:28:21 +02:00
|
|
|
counter := 1
|
|
|
|
indentLength := 0
|
2018-05-31 15:59:40 +02:00
|
|
|
if ordered {
|
2018-06-01 23:28:21 +02:00
|
|
|
start := parser.getAttribute(node, "start")
|
|
|
|
if len(start) > 0 {
|
|
|
|
counter, _ = strconv.Atoi(start)
|
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
longestIndex := (counter - 1) + len(listItems)
|
2018-06-01 23:28:21 +02:00
|
|
|
indentLength = digits(longestIndex)
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
var children []*messages.HTMLEntity
|
|
|
|
for _, child := range listItems {
|
|
|
|
if child.Tag != "li" {
|
2018-05-31 15:59:40 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
var prefix string
|
|
|
|
if ordered {
|
2018-06-01 23:28:21 +02:00
|
|
|
indexPadding := indentLength - digits(counter)
|
|
|
|
prefix = fmt.Sprintf("%d. %s", counter, strings.Repeat(" ", indexPadding))
|
2018-05-31 15:59:40 +02:00
|
|
|
} else {
|
|
|
|
prefix = "● "
|
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
child.Text = prefix + child.Text
|
|
|
|
child.Block = true
|
|
|
|
child.Indent = indentLength + 2
|
|
|
|
children = append(children, child)
|
2018-05-31 15:59:40 +02:00
|
|
|
counter++
|
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: node.Data,
|
|
|
|
Text: "",
|
|
|
|
Style: tcell.StyleDefault,
|
|
|
|
Children: children,
|
|
|
|
Block: true,
|
|
|
|
Indent: 0,
|
|
|
|
}
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) basicFormatToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
|
|
|
entity := &messages.HTMLEntity{
|
|
|
|
Tag: node.Data,
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
switch node.Data {
|
|
|
|
case "b", "strong":
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.AdjustStyle(AdjustStyleBold)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "i", "em":
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.AdjustStyle(AdjustStyleItalic)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "s", "del":
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.AdjustStyle(AdjustStyleStrikethrough)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "u", "ins":
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.AdjustStyle(AdjustStyleUnderline)
|
|
|
|
case "font":
|
|
|
|
fgColor, ok := parser.parseColor(node, "data-mx-color", "color")
|
|
|
|
if ok {
|
|
|
|
entity.AdjustStyle(AdjustStyleTextColor(fgColor))
|
|
|
|
}
|
|
|
|
bgColor, ok := parser.parseColor(node, "data-mx-bg-color", "background-color")
|
|
|
|
if ok {
|
|
|
|
entity.AdjustStyle(AdjustStyleBackgroundColor(bgColor))
|
|
|
|
}
|
2018-04-18 13:20:57 +02:00
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
return entity
|
2018-04-18 13:20:57 +02:00
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2019-03-26 21:09:10 +01:00
|
|
|
func (parser *htmlParser) parseColor(node *html.Node, mainName, altName string) (color tcell.Color, ok bool) {
|
|
|
|
hex := parser.getAttribute(node, mainName)
|
2018-06-11 18:38:19 +02:00
|
|
|
if len(hex) == 0 {
|
2019-03-26 21:09:10 +01:00
|
|
|
hex = parser.getAttribute(node, altName)
|
2018-11-13 23:28:53 +01:00
|
|
|
if len(hex) == 0 {
|
2019-03-26 21:09:10 +01:00
|
|
|
return
|
2018-11-13 23:28:53 +01:00
|
|
|
}
|
2018-06-11 18:38:19 +02:00
|
|
|
}
|
|
|
|
|
2019-03-26 21:09:10 +01:00
|
|
|
cful, err := colorful.Hex(hex)
|
2018-06-11 18:38:19 +02:00
|
|
|
if err != nil {
|
2019-03-26 21:09:10 +01:00
|
|
|
color2, found := ColorMap[strings.ToLower(hex)]
|
|
|
|
if !found {
|
|
|
|
return
|
2018-11-13 23:28:53 +01:00
|
|
|
}
|
2019-03-26 21:09:10 +01:00
|
|
|
cful, _ = colorful.MakeColor(color2)
|
2018-06-11 18:38:19 +02:00
|
|
|
}
|
|
|
|
|
2019-03-26 21:09:10 +01:00
|
|
|
r, g, b := cful.RGB255()
|
|
|
|
return tcell.NewRGBColor(int32(r), int32(g), int32(b)), true
|
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) headerToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
2018-05-31 15:59:40 +02:00
|
|
|
length := int(node.Data[1] - '0')
|
|
|
|
prefix := strings.Repeat("#", length) + " "
|
2019-04-07 02:22:51 +02:00
|
|
|
return (&messages.HTMLEntity{
|
|
|
|
Tag: node.Data,
|
|
|
|
Text: prefix,
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
}).AdjustStyle(AdjustStyleBold)
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) blockquoteToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: "blockquote",
|
|
|
|
Text: ">",
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
Block: true,
|
|
|
|
Indent: 2,
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) linkToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
|
|
|
entity := &messages.HTMLEntity{
|
|
|
|
Tag: "a",
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
}
|
2018-06-01 23:28:21 +02:00
|
|
|
href := parser.getAttribute(node, "href")
|
2018-05-31 15:59:40 +02:00
|
|
|
if len(href) == 0 {
|
2019-04-07 02:22:51 +02:00
|
|
|
return entity
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
|
|
|
match := matrixToURL.FindStringSubmatch(href)
|
|
|
|
if len(match) == 2 {
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.Children = nil
|
2018-05-31 15:59:40 +02:00
|
|
|
pillTarget := match[1]
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.Text = pillTarget
|
2018-05-31 15:59:40 +02:00
|
|
|
if pillTarget[0] == '@' {
|
|
|
|
if member := parser.room.GetMember(pillTarget); member != nil {
|
2019-04-07 02:22:51 +02:00
|
|
|
entity.Text = member.Displayname
|
|
|
|
entity.Style = entity.Style.Foreground(widget.GetHashColor(pillTarget))
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
// TODO add click action for links
|
|
|
|
return entity
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) codeblockToEntity(node *html.Node) *messages.HTMLEntity {
|
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: "pre",
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, false),
|
|
|
|
Block: true,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (parser *htmlParser) tagNodeToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
2018-05-31 15:59:40 +02:00
|
|
|
switch node.Data {
|
|
|
|
case "blockquote":
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.blockquoteToEntity(node, stripLinebreak)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "ol", "ul":
|
|
|
|
return parser.listToTString(node, stripLinebreak)
|
2018-04-14 10:44:07 +02:00
|
|
|
case "h1", "h2", "h3", "h4", "h5", "h6":
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.headerToEntity(node, stripLinebreak)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "br":
|
2019-04-07 02:22:51 +02:00
|
|
|
return &messages.HTMLEntity{Tag: "br", Block: true}
|
|
|
|
case "b", "strong", "i", "em", "s", "del", "u", "ins", "font":
|
|
|
|
return parser.basicFormatToEntity(node, stripLinebreak)
|
2018-04-14 10:44:07 +02:00
|
|
|
case "a":
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.linkToEntity(node, stripLinebreak)
|
2018-05-31 15:59:40 +02:00
|
|
|
case "pre":
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.codeblockToEntity(node)
|
2018-05-31 15:59:40 +02:00
|
|
|
default:
|
2019-04-07 02:22:51 +02:00
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: node.Data,
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
Block: parser.isBlockTag(node.Data),
|
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) singleNodeToEntity(node *html.Node, stripLinebreak bool) *messages.HTMLEntity {
|
2018-05-31 15:59:40 +02:00
|
|
|
switch node.Type {
|
|
|
|
case html.TextNode:
|
|
|
|
if stripLinebreak {
|
|
|
|
node.Data = strings.Replace(node.Data, "\n", "", -1)
|
|
|
|
}
|
2019-04-07 02:22:51 +02:00
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: "text",
|
|
|
|
Text: node.Data,
|
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
case html.ElementNode:
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.tagNodeToEntity(node, stripLinebreak)
|
2018-05-31 15:59:40 +02:00
|
|
|
case html.DocumentNode:
|
2019-04-07 02:22:51 +02:00
|
|
|
return &messages.HTMLEntity{
|
|
|
|
Tag: "html",
|
|
|
|
Children: parser.nodeToEntities(node.FirstChild, stripLinebreak),
|
|
|
|
Block: true,
|
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
default:
|
2019-04-07 02:22:51 +02:00
|
|
|
return nil
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) nodeToEntities(node *html.Node, stripLinebreak bool) (entities []*messages.HTMLEntity) {
|
2018-05-31 15:59:40 +02:00
|
|
|
for ; node != nil; node = node.NextSibling {
|
2019-04-07 02:22:51 +02:00
|
|
|
if entity := parser.singleNodeToEntity(node, stripLinebreak); entity != nil {
|
|
|
|
entities = append(entities, entity)
|
|
|
|
}
|
2018-04-16 11:04:00 +02:00
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2018-05-31 15:59:40 +02:00
|
|
|
func (parser *htmlParser) isBlockTag(tag string) bool {
|
|
|
|
for _, blockTag := range BlockTags {
|
|
|
|
if tag == blockTag {
|
|
|
|
return true
|
2018-04-14 10:44:07 +02:00
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
func (parser *htmlParser) Parse(htmlData string) *messages.HTMLEntity {
|
2018-05-31 15:59:40 +02:00
|
|
|
node, _ := html.Parse(strings.NewReader(htmlData))
|
2019-04-07 02:22:51 +02:00
|
|
|
return parser.singleNodeToEntity(node, true)
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
|
2019-04-07 02:22:51 +02:00
|
|
|
func ParseHTMLMessage(room *rooms.Room, evt *mautrix.Event, senderDisplayname string) *messages.HTMLEntity {
|
2018-09-05 09:55:48 +02:00
|
|
|
htmlData := evt.Content.FormattedBody
|
2018-04-24 21:08:57 +02:00
|
|
|
htmlData = strings.Replace(htmlData, "\t", " ", -1)
|
2018-04-13 23:34:25 +02:00
|
|
|
|
2018-05-31 15:59:40 +02:00
|
|
|
parser := htmlParser{room}
|
2019-04-07 02:22:51 +02:00
|
|
|
root := parser.Parse(htmlData)
|
|
|
|
root.Block = false
|
2018-04-13 23:34:25 +02:00
|
|
|
|
2018-11-13 23:00:35 +01:00
|
|
|
if evt.Content.MsgType == mautrix.MsgEmote {
|
2019-04-07 02:22:51 +02:00
|
|
|
root = &messages.HTMLEntity{
|
|
|
|
Tag: "emote",
|
|
|
|
Children: []*messages.HTMLEntity{
|
|
|
|
{Text: "* "},
|
|
|
|
{Text: senderDisplayname, Style: tcell.StyleDefault.Foreground(widget.GetHashColor(evt.Sender))},
|
|
|
|
{Text: " "},
|
|
|
|
root,
|
|
|
|
},
|
|
|
|
}
|
2018-05-31 15:59:40 +02:00
|
|
|
}
|
2018-04-14 10:44:07 +02:00
|
|
|
|
2019-04-07 02:22:51 +02:00
|
|
|
return root
|
2018-04-13 23:34:25 +02:00
|
|
|
}
|