V
V
vgrabkowot2020-07-17 18:26:24
go
vgrabkowot, 2020-07-17 18:26:24

How to parse a link using net/html?

There is a library here https://godoc.org/golang.org/x/net/html

s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
    log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
    if n.Type == html.ElementNode && n.Data == "a" {
        for _, a := range n.Attr {
            if a.Key == "href" {
                fmt.Println(a.Val)
                break
            }
        }
    }
    for c := n.FirstChild; c != nil; c = c.NextSibling {
        f(c)
    }
}
f(doc)

It parses the "a" element with the "href" parameter and returns
foo
/bar/baz


I want to receive
Foo
BarBaz

Answer the question

In order to leave comments, you need to log in

1 answer(s)
D
Daniil Maslov, 2020-07-17
@vgrabkowot

package main

import (
  "bytes"
  "fmt"
  "log"
  "strings"

  "golang.org/x/net/html"
)

func collectText(n *html.Node, buf *bytes.Buffer) {
  if n.Type == html.TextNode {
    buf.WriteString(n.Data)
  }
  for c := n.FirstChild; c != nil; c = c.NextSibling {
    collectText(c, buf)
  }
}

func main() {
  s := `<p>Links:</p><ul><li><a href="foo">Foo</a><li><a href="/bar/baz">BarBaz</a></ul>`
  doc, err := html.Parse(strings.NewReader(s))
  if err != nil {
    log.Fatal(err)
  }
  var f func(*html.Node)
  f = func(n *html.Node) {
    if n.Type == html.ElementNode && n.Data == "a" {
      text := &bytes.Buffer{}
      collectText(n, text)
      fmt.Println(text)
    }
    for c := n.FirstChild; c != nil; c = c.NextSibling {
      f(c)
    }
  }
  f(doc)
}

https://play.golang.org/p/agnPMns-cjR

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question