Rewrite HTML parser
This commit is contained in:
		@@ -1,3 +0,0 @@
 | 
			
		||||
// Package htmlparser contains a HTML parsing system similar to html.parser.HTMLParser in Python 3.
 | 
			
		||||
// The parser uses x/net/html.Tokenizer in the background.
 | 
			
		||||
package htmlparser
 | 
			
		||||
@@ -1,142 +0,0 @@
 | 
			
		||||
// gomuks - A terminal Matrix client written in Go.
 | 
			
		||||
// Copyright (C) 2018 Tulir Asokan
 | 
			
		||||
//
 | 
			
		||||
// This program is free software: you can redistribute it and/or modify
 | 
			
		||||
// it under the terms of the GNU General Public License as published by
 | 
			
		||||
// the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
// (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
//
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
package htmlparser
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"io"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"golang.org/x/net/html"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
// HTMLProcessor contains the functions to process parsed HTML data.
 | 
			
		||||
type HTMLProcessor interface {
 | 
			
		||||
	// Preprocess is called before the parsing is started.
 | 
			
		||||
	Preprocess()
 | 
			
		||||
 | 
			
		||||
	// HandleStartTag is called with the tag name and attributes when
 | 
			
		||||
	// the parser encounters a StartTagToken, except if the tag is
 | 
			
		||||
	// always self-closing.
 | 
			
		||||
	HandleStartTag(tagName string, attrs map[string]string)
 | 
			
		||||
	// HandleSelfClosingTag is called with the tag name and attributes
 | 
			
		||||
	// when the parser encounters a SelfClosingTagToken OR a StartTagToken
 | 
			
		||||
	// with a tag that's always self-closing.
 | 
			
		||||
	HandleSelfClosingTag(tagName string, attrs map[string]string)
 | 
			
		||||
	// HandleText is called with the text when the parser encounters
 | 
			
		||||
	// a TextToken.
 | 
			
		||||
	HandleText(text string)
 | 
			
		||||
	// HandleEndTag is called with the tag name when the parser encounters
 | 
			
		||||
	// an EndTagToken.
 | 
			
		||||
	HandleEndTag(tagName string)
 | 
			
		||||
 | 
			
		||||
	// ReceiveError is called with the error when the parser encounters
 | 
			
		||||
	// an ErrorToken that IS NOT io.EOF.
 | 
			
		||||
	ReceiveError(err error)
 | 
			
		||||
 | 
			
		||||
	// Postprocess is called after parsing is completed successfully.
 | 
			
		||||
	// An unsuccessful parsing will trigger a ReceiveError() call.
 | 
			
		||||
	Postprocess()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// HTMLParser wraps a net/html.Tokenizer and a HTMLProcessor to call
 | 
			
		||||
// the HTMLProcessor with data from the Tokenizer.
 | 
			
		||||
type HTMLParser struct {
 | 
			
		||||
	*html.Tokenizer
 | 
			
		||||
	processor HTMLProcessor
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewHTMLParserFromTokenizer creates a new HTMLParser from an existing html Tokenizer.
 | 
			
		||||
func NewHTMLParserFromTokenizer(z *html.Tokenizer, processor HTMLProcessor) HTMLParser {
 | 
			
		||||
	return HTMLParser{
 | 
			
		||||
		z,
 | 
			
		||||
		processor,
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewHTMLParserFromReader creates a Tokenizer with the given io.Reader and
 | 
			
		||||
// then uses that to create a new HTMLParser.
 | 
			
		||||
func NewHTMLParserFromReader(reader io.Reader, processor HTMLProcessor) HTMLParser {
 | 
			
		||||
	return NewHTMLParserFromTokenizer(html.NewTokenizer(reader), processor)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// NewHTMLParserFromString creates a Tokenizer with a reader of the given
 | 
			
		||||
// string and then uses that to create a new HTMLParser.
 | 
			
		||||
func NewHTMLParserFromString(html string, processor HTMLProcessor) HTMLParser {
 | 
			
		||||
	return NewHTMLParserFromReader(strings.NewReader(html), processor)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// SelfClosingTags is the list of tags that always call
 | 
			
		||||
// HTMLProcessor.HandleSelfClosingTag() even if it is encountered
 | 
			
		||||
// as a html.StartTagToken rather than html.SelfClosingTagToken.
 | 
			
		||||
var SelfClosingTags = []string{"img", "br", "hr", "area", "base", "basefont", "input", "link", "meta"}
 | 
			
		||||
 | 
			
		||||
func (parser HTMLParser) mapAttrs() map[string]string {
 | 
			
		||||
	attrs := make(map[string]string)
 | 
			
		||||
	hasMore := true
 | 
			
		||||
	for hasMore {
 | 
			
		||||
		var key, val []byte
 | 
			
		||||
		key, val, hasMore = parser.TagAttr()
 | 
			
		||||
		attrs[string(key)] = string(val)
 | 
			
		||||
	}
 | 
			
		||||
	return attrs
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser HTMLParser) isSelfClosing(tag string) bool {
 | 
			
		||||
	for _, selfClosingTag := range SelfClosingTags {
 | 
			
		||||
		if tag == selfClosingTag {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Process parses the HTML using the tokenizer in this parser and
 | 
			
		||||
// calls the appropriate functions of the HTML processor.
 | 
			
		||||
func (parser HTMLParser) Process() {
 | 
			
		||||
	parser.processor.Preprocess()
 | 
			
		||||
Loop:
 | 
			
		||||
	for {
 | 
			
		||||
		tt := parser.Next()
 | 
			
		||||
		switch tt {
 | 
			
		||||
		case html.ErrorToken:
 | 
			
		||||
			if parser.Err() != io.EOF {
 | 
			
		||||
				parser.processor.ReceiveError(parser.Err())
 | 
			
		||||
				return
 | 
			
		||||
			}
 | 
			
		||||
			break Loop
 | 
			
		||||
		case html.TextToken:
 | 
			
		||||
			parser.processor.HandleText(string(parser.Text()))
 | 
			
		||||
		case html.StartTagToken, html.SelfClosingTagToken:
 | 
			
		||||
			tagb, _ := parser.TagName()
 | 
			
		||||
			attrs := parser.mapAttrs()
 | 
			
		||||
			tag := string(tagb)
 | 
			
		||||
 | 
			
		||||
			selfClosing := tt == html.SelfClosingTagToken || parser.isSelfClosing(tag)
 | 
			
		||||
 | 
			
		||||
			if selfClosing {
 | 
			
		||||
				parser.processor.HandleSelfClosingTag(tag, attrs)
 | 
			
		||||
			} else {
 | 
			
		||||
				parser.processor.HandleStartTag(tag, attrs)
 | 
			
		||||
			}
 | 
			
		||||
		case html.EndTagToken:
 | 
			
		||||
			tagb, _ := parser.TagName()
 | 
			
		||||
			parser.processor.HandleEndTag(string(tagb))
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	parser.processor.Postprocess()
 | 
			
		||||
}
 | 
			
		||||
@@ -18,185 +18,240 @@ package parser
 | 
			
		||||
 | 
			
		||||
import (
 | 
			
		||||
	"fmt"
 | 
			
		||||
	"io"
 | 
			
		||||
	"math"
 | 
			
		||||
	"regexp"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"maunium.net/go/gomatrix"
 | 
			
		||||
	"maunium.net/go/gomuks/debug"
 | 
			
		||||
	"maunium.net/go/gomuks/lib/htmlparser"
 | 
			
		||||
	"maunium.net/go/gomuks/matrix/rooms"
 | 
			
		||||
	"maunium.net/go/gomuks/ui/messages/tstring"
 | 
			
		||||
	"maunium.net/go/gomuks/ui/widget"
 | 
			
		||||
	"maunium.net/go/tcell"
 | 
			
		||||
	"golang.org/x/net/html"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
var matrixToURL = regexp.MustCompile("^(?:https?://)?(?:www\\.)?matrix\\.to/#/([#@!].*)")
 | 
			
		||||
 | 
			
		||||
type MatrixHTMLProcessor struct {
 | 
			
		||||
	text tstring.TString
 | 
			
		||||
 | 
			
		||||
	senderID string
 | 
			
		||||
	sender   string
 | 
			
		||||
	msgtype  string
 | 
			
		||||
 | 
			
		||||
	indent    string
 | 
			
		||||
	listType  string
 | 
			
		||||
	lineIsNew bool
 | 
			
		||||
	openTags  *TagArray
 | 
			
		||||
 | 
			
		||||
type htmlParser struct {
 | 
			
		||||
	room *rooms.Room
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) newline() {
 | 
			
		||||
	if !parser.lineIsNew {
 | 
			
		||||
		parser.text = parser.text.Append("\n" + parser.indent)
 | 
			
		||||
		parser.lineIsNew = true
 | 
			
		||||
	}
 | 
			
		||||
type taggedTString struct {
 | 
			
		||||
	tstring.TString
 | 
			
		||||
	tag string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) Preprocess() {
 | 
			
		||||
	if parser.msgtype == "m.emote" {
 | 
			
		||||
		parser.text = tstring.NewColorTString(fmt.Sprintf("* %s ", parser.sender), widget.GetHashColor(parser.senderID))
 | 
			
		||||
	}
 | 
			
		||||
var AdjustStyleBold = func(style tcell.Style) tcell.Style {
 | 
			
		||||
	return style.Bold(true)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) HandleText(text string) {
 | 
			
		||||
	style := tcell.StyleDefault
 | 
			
		||||
	for _, tag := range *parser.openTags {
 | 
			
		||||
		switch tag.Tag {
 | 
			
		||||
		case "b", "strong":
 | 
			
		||||
			style = style.Bold(true)
 | 
			
		||||
		case "i", "em":
 | 
			
		||||
			style = style.Italic(true)
 | 
			
		||||
		case "s", "del":
 | 
			
		||||
			style = style.Strikethrough(true)
 | 
			
		||||
		case "u", "ins":
 | 
			
		||||
			style = style.Underline(true)
 | 
			
		||||
		case "a":
 | 
			
		||||
			tag.Text += text
 | 
			
		||||
			return
 | 
			
		||||
		}
 | 
			
		||||
var AdjustStyleItalic = func(style tcell.Style) tcell.Style {
 | 
			
		||||
	return style.Italic(true)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	if !parser.openTags.Has("pre", "code") {
 | 
			
		||||
		text = strings.Replace(text, "\n", "", -1)
 | 
			
		||||
	}
 | 
			
		||||
	parser.text = parser.text.AppendStyle(text, style)
 | 
			
		||||
	parser.lineIsNew = false
 | 
			
		||||
var AdjustStyleUnderline = func(style tcell.Style) tcell.Style {
 | 
			
		||||
	return style.Underline(true)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) HandleStartTag(tagName string, attrs map[string]string) {
 | 
			
		||||
	tag := &TagWithMeta{Tag: tagName}
 | 
			
		||||
	switch tag.Tag {
 | 
			
		||||
	case "h1", "h2", "h3", "h4", "h5", "h6":
 | 
			
		||||
		length := int(tag.Tag[1] - '0')
 | 
			
		||||
		parser.text = parser.text.Append(strings.Repeat("#", length) + " ")
 | 
			
		||||
		parser.lineIsNew = false
 | 
			
		||||
	case "a":
 | 
			
		||||
		tag.Meta, _ = attrs["href"]
 | 
			
		||||
	case "ol", "ul":
 | 
			
		||||
		parser.listType = tag.Tag
 | 
			
		||||
	case "li":
 | 
			
		||||
		indentSize := 2
 | 
			
		||||
		if parser.listType == "ol" {
 | 
			
		||||
			list := parser.openTags.Get(parser.listType)
 | 
			
		||||
			list.Counter++
 | 
			
		||||
			parser.text = parser.text.Append(fmt.Sprintf("%d. ", list.Counter))
 | 
			
		||||
			indentSize = int(math.Log10(float64(list.Counter))+1) + len(". ")
 | 
			
		||||
var AdjustStyleStrikethrough = func(style tcell.Style) tcell.Style {
 | 
			
		||||
	return style.Strikethrough(true)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) listToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	ordered := node.Data == "ol"
 | 
			
		||||
	taggedChildren := parser.nodeToTaggedTStrings(node.FirstChild, stripLinebreak)
 | 
			
		||||
	paddingLength := 0
 | 
			
		||||
	if ordered {
 | 
			
		||||
		paddingLength = int(math.Floor(math.Log10(float64(len(taggedChildren)))) + 1)
 | 
			
		||||
	}
 | 
			
		||||
	padding := strings.Repeat(" ", paddingLength+2)
 | 
			
		||||
	var children []tstring.TString
 | 
			
		||||
	counter := 1
 | 
			
		||||
	for _, child := range taggedChildren {
 | 
			
		||||
		if child.tag != "li" {
 | 
			
		||||
			continue
 | 
			
		||||
		}
 | 
			
		||||
		var prefix string
 | 
			
		||||
		if ordered {
 | 
			
		||||
			prefix = fmt.Sprintf("%*d. ", paddingLength, counter)
 | 
			
		||||
		} else {
 | 
			
		||||
			parser.text = parser.text.Append("* ")
 | 
			
		||||
			prefix = "● "
 | 
			
		||||
		}
 | 
			
		||||
		parser.indent += strings.Repeat(" ", indentSize)
 | 
			
		||||
		parser.lineIsNew = false
 | 
			
		||||
	case "blockquote":
 | 
			
		||||
		parser.indent += "> "
 | 
			
		||||
		parser.text = parser.text.Append("> ")
 | 
			
		||||
		parser.lineIsNew = false
 | 
			
		||||
		str := child.TString.Prepend(prefix)
 | 
			
		||||
		counter++
 | 
			
		||||
		parts := str.Split('\n')
 | 
			
		||||
		for i, part := range parts[1:] {
 | 
			
		||||
			parts[i+1] = part.Prepend(padding)
 | 
			
		||||
		}
 | 
			
		||||
	parser.openTags.PushMeta(tag)
 | 
			
		||||
		str = tstring.Join(parts, "\n")
 | 
			
		||||
		children = append(children, str)
 | 
			
		||||
	}
 | 
			
		||||
	return tstring.Join(children, "\n")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) HandleSelfClosingTag(tagName string, attrs map[string]string) {
 | 
			
		||||
	if tagName == "br" {
 | 
			
		||||
		parser.newline()
 | 
			
		||||
func (parser *htmlParser) basicFormatToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
 | 
			
		||||
	switch node.Data {
 | 
			
		||||
	case "b", "strong":
 | 
			
		||||
		str.AdjustStyleFull(AdjustStyleBold)
 | 
			
		||||
	case "i", "em":
 | 
			
		||||
		str.AdjustStyleFull(AdjustStyleItalic)
 | 
			
		||||
	case "s", "del":
 | 
			
		||||
		str.AdjustStyleFull(AdjustStyleStrikethrough)
 | 
			
		||||
	case "u", "ins":
 | 
			
		||||
		str.AdjustStyleFull(AdjustStyleUnderline)
 | 
			
		||||
	}
 | 
			
		||||
	return str
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) HandleEndTag(tagName string) {
 | 
			
		||||
	tag := parser.openTags.Pop(tagName)
 | 
			
		||||
	if tag == nil {
 | 
			
		||||
		return
 | 
			
		||||
func (parser *htmlParser) headerToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	children := parser.nodeToTStrings(node.FirstChild, stripLinebreak)
 | 
			
		||||
	length := int(node.Data[1] - '0')
 | 
			
		||||
	prefix := strings.Repeat("#", length) + " "
 | 
			
		||||
	return tstring.Join(children, "").Prepend(prefix)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
	switch tag.Tag {
 | 
			
		||||
	case "li", "blockquote":
 | 
			
		||||
		indentSize := 2
 | 
			
		||||
		if tag.Tag == "li" && parser.listType == "ol" {
 | 
			
		||||
			list := parser.openTags.Get(parser.listType)
 | 
			
		||||
			indentSize = int(math.Log10(float64(list.Counter))+1) + len(". ")
 | 
			
		||||
func (parser *htmlParser) blockquoteToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
 | 
			
		||||
	childrenArr := str.TrimSpace().Split('\n')
 | 
			
		||||
	for index, child := range childrenArr {
 | 
			
		||||
		childrenArr[index] = child.Prepend("> ")
 | 
			
		||||
	}
 | 
			
		||||
		if len(parser.indent) >= indentSize {
 | 
			
		||||
			parser.indent = parser.indent[0 : len(parser.indent)-indentSize]
 | 
			
		||||
	return tstring.Join(childrenArr, "\n")
 | 
			
		||||
}
 | 
			
		||||
		// TODO this newline is sometimes not good
 | 
			
		||||
		parser.newline()
 | 
			
		||||
	case "a":
 | 
			
		||||
		match := matrixToURL.FindStringSubmatch(tag.Meta)
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) linkToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	str := parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
 | 
			
		||||
	var href string
 | 
			
		||||
	for _, attr := range node.Attr {
 | 
			
		||||
		if attr.Key == "href" {
 | 
			
		||||
			href = attr.Val
 | 
			
		||||
			break
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	if len(href) == 0 {
 | 
			
		||||
		return str
 | 
			
		||||
	}
 | 
			
		||||
	match := matrixToURL.FindStringSubmatch(href)
 | 
			
		||||
	if len(match) == 2 {
 | 
			
		||||
		pillTarget := match[1]
 | 
			
		||||
		if pillTarget[0] == '@' {
 | 
			
		||||
			if member := parser.room.GetMember(pillTarget); member != nil {
 | 
			
		||||
					parser.text = parser.text.AppendColor(member.DisplayName, widget.GetHashColor(member.UserID))
 | 
			
		||||
				} else {
 | 
			
		||||
					parser.text = parser.text.Append(pillTarget)
 | 
			
		||||
				return tstring.NewColorTString(member.DisplayName, widget.GetHashColor(member.UserID))
 | 
			
		||||
			}
 | 
			
		||||
			} else {
 | 
			
		||||
				parser.text = parser.text.Append(pillTarget)
 | 
			
		||||
		}
 | 
			
		||||
		} else {
 | 
			
		||||
			// TODO make text clickable rather than printing URL
 | 
			
		||||
			parser.text = parser.text.Append(fmt.Sprintf("%s (%s)", tag.Text, tag.Meta))
 | 
			
		||||
		return tstring.NewTString(pillTarget)
 | 
			
		||||
	}
 | 
			
		||||
		parser.lineIsNew = false
 | 
			
		||||
	case "p", "pre", "ol", "ul", "h1", "h2", "h3", "h4", "h5", "h6", "div":
 | 
			
		||||
		// parser.newline()
 | 
			
		||||
	return str.Append(fmt.Sprintf(" (%s)", href))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) tagToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	switch node.Data {
 | 
			
		||||
	case "blockquote":
 | 
			
		||||
		return parser.blockquoteToTString(node, stripLinebreak)
 | 
			
		||||
	case "ol", "ul":
 | 
			
		||||
		return parser.listToTString(node, stripLinebreak)
 | 
			
		||||
	case "h1", "h2", "h3", "h4", "h5", "h6":
 | 
			
		||||
		return parser.headerToTString(node, stripLinebreak)
 | 
			
		||||
	case "br":
 | 
			
		||||
		return tstring.NewTString("\n")
 | 
			
		||||
	case "b", "strong", "i", "em", "s", "del", "u", "ins":
 | 
			
		||||
		return parser.basicFormatToTString(node, stripLinebreak)
 | 
			
		||||
	case "a":
 | 
			
		||||
		return parser.linkToTString(node, stripLinebreak)
 | 
			
		||||
	case "p":
 | 
			
		||||
		return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak).Append("\n")
 | 
			
		||||
	case "pre":
 | 
			
		||||
		return parser.nodeToTString(node.FirstChild, false)
 | 
			
		||||
	default:
 | 
			
		||||
		return parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) ReceiveError(err error) {
 | 
			
		||||
	if err != io.EOF {
 | 
			
		||||
		debug.Print("Unexpected error parsing HTML:", err)
 | 
			
		||||
func (parser *htmlParser) singleNodeToTString(node *html.Node, stripLinebreak bool) taggedTString {
 | 
			
		||||
	switch node.Type {
 | 
			
		||||
	case html.TextNode:
 | 
			
		||||
		if stripLinebreak {
 | 
			
		||||
			node.Data = strings.Replace(node.Data, "\n", "", -1)
 | 
			
		||||
		}
 | 
			
		||||
		return taggedTString{tstring.NewTString(node.Data), "text"}
 | 
			
		||||
	case html.ElementNode:
 | 
			
		||||
		return taggedTString{parser.tagToTString(node, stripLinebreak), node.Data}
 | 
			
		||||
	case html.DocumentNode:
 | 
			
		||||
		return taggedTString{parser.nodeToTagAwareTString(node.FirstChild, stripLinebreak), "html"}
 | 
			
		||||
	default:
 | 
			
		||||
		return taggedTString{tstring.NewBlankTString(), "unknown"}
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *MatrixHTMLProcessor) Postprocess() {
 | 
			
		||||
	if len(parser.text) > 0 && parser.text[len(parser.text)-1].Char == '\n' {
 | 
			
		||||
		parser.text = parser.text[:len(parser.text)-1]
 | 
			
		||||
func (parser *htmlParser) nodeToTaggedTStrings(node *html.Node, stripLinebreak bool) (strs []taggedTString) {
 | 
			
		||||
	for ; node != nil; node = node.NextSibling {
 | 
			
		||||
		strs = append(strs, parser.singleNodeToTString(node, stripLinebreak))
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
var BlockTags = []string{"p", "h1", "h2", "h3", "h4", "h5", "h6", "ol", "ul", "pre", "blockquote", "div", "hr", "table"}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) isBlockTag(tag string) bool {
 | 
			
		||||
	for _, blockTag := range BlockTags {
 | 
			
		||||
		if tag == blockTag {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) nodeToTagAwareTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	strs := parser.nodeToTaggedTStrings(node, stripLinebreak)
 | 
			
		||||
	output := tstring.NewBlankTString()
 | 
			
		||||
	for i, str := range strs {
 | 
			
		||||
		tstr := str.TString
 | 
			
		||||
		curIsBlock := parser.isBlockTag(str.tag)
 | 
			
		||||
		if i > 0 && curIsBlock {
 | 
			
		||||
			tstr = tstr.Prepend("\n")
 | 
			
		||||
		}
 | 
			
		||||
		if curIsBlock && len(strs) < i+1 {
 | 
			
		||||
			tstr = tstr.Append("\n")
 | 
			
		||||
		}
 | 
			
		||||
		output = output.AppendTString(tstr)
 | 
			
		||||
	}
 | 
			
		||||
	return output.TrimSpace()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) nodeToTStrings(node *html.Node, stripLinebreak bool) (strs []tstring.TString) {
 | 
			
		||||
	for ; node != nil; node = node.NextSibling {
 | 
			
		||||
		strs = append(strs, parser.singleNodeToTString(node, stripLinebreak).TString)
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) nodeToTString(node *html.Node, stripLinebreak bool) tstring.TString {
 | 
			
		||||
	return tstring.Join(parser.nodeToTStrings(node, stripLinebreak), "")
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (parser *htmlParser) Parse(htmlData string) tstring.TString {
 | 
			
		||||
	node, _ := html.Parse(strings.NewReader(htmlData))
 | 
			
		||||
	return parser.nodeToTagAwareTString(node, true)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ParseHTMLMessage parses a HTML-formatted Matrix event into a UIMessage.
 | 
			
		||||
func ParseHTMLMessage(room *rooms.Room, evt *gomatrix.Event, senderDisplayname string) tstring.TString {
 | 
			
		||||
	htmlData, _ := evt.Content["formatted_body"].(string)
 | 
			
		||||
	htmlData = strings.Replace(htmlData, "\t", "    ", -1)
 | 
			
		||||
 | 
			
		||||
	parser := htmlParser{room}
 | 
			
		||||
	str := parser.Parse(htmlData)
 | 
			
		||||
 | 
			
		||||
	msgtype, _ := evt.Content["msgtype"].(string)
 | 
			
		||||
 | 
			
		||||
	processor := &MatrixHTMLProcessor{
 | 
			
		||||
		room:      room,
 | 
			
		||||
		text:      tstring.NewBlankTString(),
 | 
			
		||||
		msgtype:   msgtype,
 | 
			
		||||
		senderID:  evt.Sender,
 | 
			
		||||
		sender:    senderDisplayname,
 | 
			
		||||
		indent:    "",
 | 
			
		||||
		listType:  "",
 | 
			
		||||
		lineIsNew: true,
 | 
			
		||||
		openTags:  &TagArray{},
 | 
			
		||||
	if msgtype == "m.emote" {
 | 
			
		||||
		str = tstring.Join([]tstring.TString{
 | 
			
		||||
			tstring.NewTString("* "),
 | 
			
		||||
			tstring.NewColorTString(senderDisplayname, widget.GetHashColor(evt.Sender)),
 | 
			
		||||
			tstring.NewTString(" "),
 | 
			
		||||
			str,
 | 
			
		||||
		}, "")
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	parser := htmlparser.NewHTMLParserFromString(htmlData, processor)
 | 
			
		||||
	parser.Process()
 | 
			
		||||
 | 
			
		||||
	return processor.text
 | 
			
		||||
	return str
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,100 +0,0 @@
 | 
			
		||||
// gomuks - A terminal Matrix client written in Go.
 | 
			
		||||
// Copyright (C) 2018 Tulir Asokan
 | 
			
		||||
//
 | 
			
		||||
// This program is free software: you can redistribute it and/or modify
 | 
			
		||||
// it under the terms of the GNU General Public License as published by
 | 
			
		||||
// the Free Software Foundation, either version 3 of the License, or
 | 
			
		||||
// (at your option) any later version.
 | 
			
		||||
//
 | 
			
		||||
// This program is distributed in the hope that it will be useful,
 | 
			
		||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
// GNU General Public License for more details.
 | 
			
		||||
//
 | 
			
		||||
// You should have received a copy of the GNU General Public License
 | 
			
		||||
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
			
		||||
 | 
			
		||||
package parser
 | 
			
		||||
 | 
			
		||||
// TagWithMeta is an open HTML tag with some metadata (e.g. list index, a href value).
 | 
			
		||||
type TagWithMeta struct {
 | 
			
		||||
	Tag     string
 | 
			
		||||
	Counter int
 | 
			
		||||
	Meta    string
 | 
			
		||||
	Text    string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// BlankTag is a blank TagWithMeta object.
 | 
			
		||||
var BlankTag = &TagWithMeta{}
 | 
			
		||||
 | 
			
		||||
// TagArray is a reversed queue for remembering what HTML tags are open.
 | 
			
		||||
type TagArray []*TagWithMeta
 | 
			
		||||
 | 
			
		||||
// Push adds the given tag to the array.
 | 
			
		||||
func (ta *TagArray) Push(tag string) {
 | 
			
		||||
	ta.PushMeta(&TagWithMeta{Tag: tag})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// PushMeta adds the given tag to the array.
 | 
			
		||||
func (ta *TagArray) PushMeta(tag *TagWithMeta) {
 | 
			
		||||
	*ta = append(*ta, BlankTag)
 | 
			
		||||
	copy((*ta)[1:], *ta)
 | 
			
		||||
	(*ta)[0] = tag
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Pop removes the given tag from the array.
 | 
			
		||||
func (ta *TagArray) Pop(tag string) (removed *TagWithMeta) {
 | 
			
		||||
	if len(*ta) == 0 {
 | 
			
		||||
		return
 | 
			
		||||
	} else if (*ta)[0].Tag == tag {
 | 
			
		||||
		// This is the default case and is lighter than append(), so we handle it separately.
 | 
			
		||||
		removed = (*ta)[0]
 | 
			
		||||
		*ta = (*ta)[1:]
 | 
			
		||||
	} else if index := ta.Index(tag); index != -1 {
 | 
			
		||||
		removed = (*ta)[index]
 | 
			
		||||
		*ta = append((*ta)[:index], (*ta)[index+1:]...)
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Index returns the first index where the given tag is, or -1 if it's not in the list.
 | 
			
		||||
func (ta *TagArray) Index(tag string) int {
 | 
			
		||||
	return ta.IndexAfter(tag, -1)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// IndexAfter returns the first index after the given index where the given tag is,
 | 
			
		||||
// or -1 if the given tag is not on the list after the given index.
 | 
			
		||||
func (ta *TagArray) IndexAfter(tag string, after int) int {
 | 
			
		||||
	for i := after + 1; i < len(*ta); i++ {
 | 
			
		||||
		if (*ta)[i].Tag == tag {
 | 
			
		||||
			return i
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return -1
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Get returns the first occurrence of the given tag, or nil if it's not in the list.
 | 
			
		||||
func (ta *TagArray) Get(tag string) *TagWithMeta {
 | 
			
		||||
	return ta.GetAfter(tag, -1)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// GetAfter returns the first occurrence of the given tag, or nil if the given
 | 
			
		||||
// tag is not on the list after the given index.
 | 
			
		||||
func (ta *TagArray) GetAfter(tag string, after int) *TagWithMeta {
 | 
			
		||||
	for i := after + 1; i < len(*ta); i++ {
 | 
			
		||||
		if (*ta)[i].Tag == tag {
 | 
			
		||||
			return (*ta)[i]
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return nil
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Has returns whether or not the list has at least one of the given tags.
 | 
			
		||||
func (ta *TagArray) Has(tags ...string) bool {
 | 
			
		||||
	for _, tag := range tags {
 | 
			
		||||
		if index := ta.Index(tag); index != -1 {
 | 
			
		||||
			return true
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return false
 | 
			
		||||
}
 | 
			
		||||
@@ -21,6 +21,7 @@ import (
 | 
			
		||||
 | 
			
		||||
	"github.com/mattn/go-runewidth"
 | 
			
		||||
	"maunium.net/go/tcell"
 | 
			
		||||
	"unicode"
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
type TString []Cell
 | 
			
		||||
@@ -53,17 +54,66 @@ func NewStyleTString(str string, style tcell.Style) TString {
 | 
			
		||||
	return newStr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) AppendTString(data TString) TString {
 | 
			
		||||
	return append(str, data...)
 | 
			
		||||
func Join(strings []TString, separator string) TString {
 | 
			
		||||
	if len(strings) == 0 {
 | 
			
		||||
		return NewBlankTString()
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	out := strings[0]
 | 
			
		||||
	strings = strings[1:]
 | 
			
		||||
 | 
			
		||||
	if len(separator) == 0 {
 | 
			
		||||
		return out.AppendTString(strings...)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	for _, str := range strings {
 | 
			
		||||
		out = append(out, str.Prepend(separator)...)
 | 
			
		||||
	}
 | 
			
		||||
	return out
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) AppendTString(dataList ...TString) TString {
 | 
			
		||||
	newStr := str
 | 
			
		||||
	for _, data := range dataList {
 | 
			
		||||
		newStr = append(newStr, data...)
 | 
			
		||||
	}
 | 
			
		||||
	return newStr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) PrependTString(data TString) TString {
 | 
			
		||||
	return append(data, str...)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) Append(data string) TString {
 | 
			
		||||
	newStr := make(TString, len(str)+len(data))
 | 
			
		||||
	copy(newStr, str)
 | 
			
		||||
	for i, char := range data {
 | 
			
		||||
		newStr[i+len(str)] = NewCell(char)
 | 
			
		||||
	return str.AppendCustom(data, func(r rune) Cell {
 | 
			
		||||
		return NewCell(r)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
	return newStr
 | 
			
		||||
 | 
			
		||||
func (str TString) TrimSpace() TString {
 | 
			
		||||
	return str.Trim(unicode.IsSpace)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) Trim(fn func(rune) bool) TString {
 | 
			
		||||
	return str.TrimLeft(fn).TrimRight(fn)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) TrimLeft(fn func(rune) bool) TString {
 | 
			
		||||
	for index, cell := range str {
 | 
			
		||||
		if !fn(cell.Char) {
 | 
			
		||||
			return append(NewBlankTString(), str[index:]...)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return NewBlankTString()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) TrimRight(fn func(rune) bool) TString {
 | 
			
		||||
	for i := len(str)-1; i >= 0; i-- {
 | 
			
		||||
		if !fn(str[i].Char) {
 | 
			
		||||
			return append(NewBlankTString(), str[:i+1]...)
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	return NewBlankTString()
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) AppendColor(data string, color tcell.Color) TString {
 | 
			
		||||
@@ -87,10 +137,47 @@ func (str TString) AppendCustom(data string, cellCreator func(rune) Cell) TStrin
 | 
			
		||||
	return newStr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) Colorize(from, length int, color tcell.Color) {
 | 
			
		||||
	for i := from; i < from+length; i++ {
 | 
			
		||||
		str[i].Style = str[i].Style.Foreground(color)
 | 
			
		||||
func (str TString) Prepend(data string) TString {
 | 
			
		||||
	return str.PrependCustom(data, func(r rune) Cell {
 | 
			
		||||
		return NewCell(r)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) PrependColor(data string, color tcell.Color) TString {
 | 
			
		||||
	return str.PrependCustom(data, func(r rune) Cell {
 | 
			
		||||
		return NewColorCell(r, color)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) PrependStyle(data string, style tcell.Style) TString {
 | 
			
		||||
	return str.PrependCustom(data, func(r rune) Cell {
 | 
			
		||||
		return NewStyleCell(r, style)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) PrependCustom(data string, cellCreator func(rune) Cell) TString {
 | 
			
		||||
	newStr := make(TString, len(str)+len(data))
 | 
			
		||||
	copy(newStr[len(data):], str)
 | 
			
		||||
	for i, char := range data {
 | 
			
		||||
		newStr[i] = cellCreator(char)
 | 
			
		||||
	}
 | 
			
		||||
	return newStr
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) Colorize(from, length int, color tcell.Color) {
 | 
			
		||||
	str.AdjustStyle(from, length, func(style tcell.Style) tcell.Style {
 | 
			
		||||
		return style.Foreground(color)
 | 
			
		||||
	})
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) AdjustStyle(from, length int, fn func(tcell.Style) tcell.Style) {
 | 
			
		||||
	for i := from; i < from+length; i++ {
 | 
			
		||||
		str[i].Style = fn(str[i].Style)
 | 
			
		||||
	}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) AdjustStyleFull(fn func(tcell.Style) tcell.Style) {
 | 
			
		||||
	str.AdjustStyle(0, len(str), fn)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
func (str TString) Draw(screen tcell.Screen, x, y int) {
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user