gomuks/ui/messages/parser/htmlparser.go

269 lines
8.0 KiB
Go
Raw Normal View History

2018-04-13 23:34:25 +02:00
// gomuks - A terminal Matrix client written in Go.
// Copyright (C) 2018 Tulir Asokan
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package parser
2018-04-13 23:34:25 +02:00
import (
"fmt"
"math"
"regexp"
2018-04-13 23:34:25 +02:00
"strings"
2018-06-01 23:44:21 +02:00
"golang.org/x/net/html"
2018-04-13 23:34:25 +02:00
"maunium.net/go/gomatrix"
"maunium.net/go/gomuks/matrix/rooms"
2018-04-13 23:34:25 +02:00
"maunium.net/go/gomuks/ui/messages/tstring"
"maunium.net/go/gomuks/ui/widget"
2018-04-13 23:34:25 +02:00
"maunium.net/go/tcell"
2018-06-01 23:28:21 +02:00
"strconv"
2018-04-13 23:34:25 +02:00
)
var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([#@!].*)")
2018-04-13 23:34:25 +02:00
2018-05-31 15:59:40 +02:00
type htmlParser struct {
room *rooms.Room
}
2018-05-31 15:59:40 +02:00
type taggedTString struct {
tstring.TString
tag string
}
2018-05-31 15:59:40 +02:00
var AdjustStyleBold = func(style tcell.Style) tcell.Style {
return style.Bold(true)
}
2018-05-31 15:59:40 +02:00
var AdjustStyleItalic = func(style tcell.Style) tcell.Style {
return style.Italic(true)
}
var AdjustStyleUnderline = func(style tcell.Style) tcell.Style {
return style.Underline(true)
}
var AdjustStyleStrikethrough = func(style tcell.Style) tcell.Style {
return style.Strikethrough(true)
2018-04-13 23:34:25 +02:00
}
2018-06-01 23:28:21 +02:00
func (parser *htmlParser) getAttribute(node *html.Node, attribute string) string {
for _, attr := range node.Attr {
if attr.Key == attribute {
return attr.Val
}
}
return ""
}
func digits(num int) int {
2018-06-01 23:44:21 +02:00
return int(math.Floor(math.Log10(float64(num))) + 1)
2018-06-01 23:28:21 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) listToTString(node *html.Node, stripLinebreak bool) tstring.TString {
ordered := node.Data == "ol"
taggedChildren := parser.nodeToTaggedTStrings(node.FirstChild, stripLinebreak)
2018-06-01 23:28:21 +02:00
counter := 1
indentLength := 0
2018-05-31 15:59:40 +02:00
if ordered {
2018-06-01 23:28:21 +02:00
start := parser.getAttribute(node, "start")
if len(start) > 0 {
counter, _ = strconv.Atoi(start)
}
longestIndex := (counter - 1) + len(taggedChildren)
indentLength = digits(longestIndex)
}
2018-06-01 23:28:21 +02:00
indent := strings.Repeat(" ", indentLength+2)
2018-05-31 15:59:40 +02:00
var children []tstring.TString
for _, child := range taggedChildren {
if child.tag != "li" {
continue
}
var prefix string
if ordered {
2018-06-01 23:28:21 +02:00
indexPadding := indentLength - digits(counter)
prefix = fmt.Sprintf("%d. %s", counter, strings.Repeat(" ", indexPadding))
2018-05-31 15:59:40 +02:00
} else {
prefix = "● "
}
str := child.TString.Prepend(prefix)
counter++
parts := str.Split('\n')
for i, part := range parts[1:] {
2018-06-01 23:28:21 +02:00
parts[i+1] = part.Prepend(indent)
2018-05-31 15:59:40 +02:00
}
str = tstring.Join(parts, "\n")
children = append(children, str)
}
return tstring.Join(children, "\n")
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) basicFormatToTString(node *html.Node, stripLinebreak bool) tstring.TString {
str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
switch node.Data {
case "b", "strong":
str.AdjustStyleFull(AdjustStyleBold)
case "i", "em":
str.AdjustStyleFull(AdjustStyleItalic)
case "s", "del":
str.AdjustStyleFull(AdjustStyleStrikethrough)
case "u", "ins":
str.AdjustStyleFull(AdjustStyleUnderline)
}
2018-05-31 15:59:40 +02:00
return str
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) headerToTString(node *html.Node, stripLinebreak bool) tstring.TString {
children := parser.nodeToTStrings(node.FirstChild, stripLinebreak)
length := int(node.Data[1] - '0')
prefix := strings.Repeat("#", length) + " "
return tstring.Join(children, "").Prepend(prefix)
}
func (parser *htmlParser) blockquoteToTString(node *html.Node, stripLinebreak bool) tstring.TString {
str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
childrenArr := str.TrimSpace().Split('\n')
for index, child := range childrenArr {
childrenArr[index] = child.Prepend("> ")
}
2018-05-31 15:59:40 +02:00
return tstring.Join(childrenArr, "\n")
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) linkToTString(node *html.Node, stripLinebreak bool) tstring.TString {
str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
2018-06-01 23:28:21 +02:00
href := parser.getAttribute(node, "href")
2018-05-31 15:59:40 +02:00
if len(href) == 0 {
return str
}
match := matrixToURL.FindStringSubmatch(href)
if len(match) == 2 {
pillTarget := match[1]
if pillTarget[0] == '@' {
if member := parser.room.GetMember(pillTarget); member != nil {
return tstring.NewColorTString(member.DisplayName, widget.GetHashColor(member.UserID))
}
}
return tstring.NewTString(pillTarget)
}
2018-05-31 15:59:40 +02:00
return str.Append(fmt.Sprintf(" (%s)", href))
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) tagToTString(node *html.Node, stripLinebreak bool) tstring.TString {
switch node.Data {
case "blockquote":
return parser.blockquoteToTString(node, stripLinebreak)
case "ol", "ul":
return parser.listToTString(node, stripLinebreak)
case "h1", "h2", "h3", "h4", "h5", "h6":
2018-05-31 15:59:40 +02:00
return parser.headerToTString(node, stripLinebreak)
case "br":
return tstring.NewTString("\n")
case "b", "strong", "i", "em", "s", "del", "u", "ins":
return parser.basicFormatToTString(node, stripLinebreak)
case "a":
2018-05-31 15:59:40 +02:00
return parser.linkToTString(node, stripLinebreak)
case "p":
return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak).Append("\n")
case "pre":
return parser.nodeToTString(node.FirstChild, false)
default:
return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
}
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) singleNodeToTString(node *html.Node, stripLinebreak bool) taggedTString {
switch node.Type {
case html.TextNode:
if stripLinebreak {
node.Data = strings.Replace(node.Data, "\n", "", -1)
}
return taggedTString{tstring.NewTString(node.Data), "text"}
case html.ElementNode:
return taggedTString{parser.tagToTString(node, stripLinebreak), node.Data}
case html.DocumentNode:
return taggedTString{parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak), "html"}
default:
return taggedTString{tstring.NewBlankTString(), "unknown"}
}
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) nodeToTaggedTStrings(node *html.Node, stripLinebreak bool) (strs []taggedTString) {
for ; node != nil; node = node.NextSibling {
strs = append(strs, parser.singleNodeToTString(node, stripLinebreak))
}
2018-05-31 15:59:40 +02:00
return
}
var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) isBlockTag(tag string) bool {
for _, blockTag := range BlockTags {
if tag == blockTag {
return true
}
2018-05-31 15:59:40 +02:00
}
return false
}
func (parser *htmlParser) nodeToTagAwareTString(node *html.Node, stripLinebreak bool) tstring.TString {
strs := parser.nodeToTaggedTStrings(node, stripLinebreak)
output := tstring.NewBlankTString()
2018-06-01 23:28:21 +02:00
for _, str := range strs {
2018-05-31 15:59:40 +02:00
tstr := str.TString
2018-06-01 23:28:21 +02:00
if parser.isBlockTag(str.tag) {
tstr = tstr.Prepend("\n").Append("\n")
}
2018-05-31 15:59:40 +02:00
output = output.AppendTString(tstr)
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
return output.TrimSpace()
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) nodeToTStrings(node *html.Node, stripLinebreak bool) (strs []tstring.TString) {
for ; node != nil; node = node.NextSibling {
strs = append(strs, parser.singleNodeToTString(node, stripLinebreak).TString)
}
2018-05-31 15:59:40 +02:00
return
2018-04-13 23:34:25 +02:00
}
2018-05-31 15:59:40 +02:00
func (parser *htmlParser) nodeToTString(node *html.Node, stripLinebreak bool) tstring.TString {
return tstring.Join(parser.nodeToTStrings(node, stripLinebreak), "")
}
func (parser *htmlParser) Parse(htmlData string) tstring.TString {
node, _ := html.Parse(strings.NewReader(htmlData))
return parser.nodeToTagAwareTString(node, true)
2018-04-13 23:34:25 +02:00
}
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
func ParseHTMLMessage(room *rooms.Room, evt *gomatrix.Event, senderDisplayname string) tstring.TString {
2018-04-13 23:34:25 +02:00
htmlData, _ := evt.Content["formatted_body"].(string)
htmlData = strings.Replace(htmlData, "\t", " ", -1)
2018-04-13 23:34:25 +02:00
2018-05-31 15:59:40 +02:00
parser := htmlParser{room}
str := parser.Parse(htmlData)
2018-04-13 23:34:25 +02:00
2018-05-31 15:59:40 +02:00
msgtype, _ := evt.Content["msgtype"].(string)
if msgtype == "m.emote" {
str = tstring.Join([]tstring.TString{
tstring.NewTString("* "),
tstring.NewColorTString(senderDisplayname, widget.GetHashColor(evt.Sender)),
tstring.NewTString(" "),
str,
}, "")
}
2018-05-31 15:59:40 +02:00
return str
2018-04-13 23:34:25 +02:00
}