flattenthtml is a Go package that helps you access to specific nodes in a HTML document directly without a need for traversing all nodes.
go get github.com/seinshah/flattenhtmlUse built-in or custom flatteners to access HTML document nodes directly
using your desired selectors. Whether you want to access all div nodes
(based on the tag name) or all elements with class attributes, or all
elements with class value as container, and so on.
flattenhtml currently supports the following flatteners out of the box:
TagFlattener: flattens all nodes based on their tag name.
You can build a custom in-house flattener by implementing
*flattenhtml.Flattener interface. If your implementation is generic and
can be used by others, please consider contributing it to this package.
package main
import (
"fmt"
"log"
"strings"
"github.com/seinshah/flattenhtml"
)
func main() {
// HTML document to be flattened.
html := `
<html>
<head>
<title>flattenhtml</title>
</head>
<body>
<div class="container" id="target">
<div class="row">
<div class="col-md-6">
<h1>flattenhtml</h1>
<p>flattens HTML documents</p>
</div>
<div class="col-md-6">
<h1>flattenhtml</h1>
<p>flattens HTML documents</p>
</div>
</div>
</div>
</body>
</html>
`
nm, err := flattenhtml.NewNodeManagerFromReader(strings.NewReader(html))
if err != nil {
log.Fatal(err)
}
mc, err := nm.Parse(flattenhtml.NewTagFlattener())
if err != nil {
log.Fatal(err)
}
tf, err := mc.SelectFlattener(&flattenhtml.TagFlattener{})
if err != nil {
log.Fatal(err)
}
divs := tf.SelectNodes("div")
divs.
Filter(flattenhtml.WithAttributeValueAs("class", "container")).
Each(func(n *flattenhtml.Node) {
val, _ := n.Attribute("id")
fmt.Println(val)
// Output:
// target
})
}