Answer the question
In order to leave comments, you need to log in
Why does http.Get() hang in Go?
There is a task consisting in downloading a large number of pages. At first I used the goquery library to make it easier to get the text of the blocks I needed, but I noticed that in some situations the program simply stopped, without errors, without stopping the program, it just hung. I thought that the problem was in this library, I abandoned it and began to get the page code in the standard Go way - http.Get(url) but the problem did not disappear. It was only possible to find out that the program stops at the http.Get() call. What could be the problem?
func getPage(url string)(HTML string, err error){
res, err := http.Get(url)
if err != nil {
return "", err
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
return "", err
}
lenp := len(body)
return string(body[:lenp]), nil
}
Answer the question
In order to leave comments, you need to log in
I would hang up the timeout (reduce it to an acceptable one).
If no response is received within N seconds, return an error.
Timeout is created by adding a custom http.Transport to the http.Client constructor (lines 15-20).
package main
import (
"bytes"
"errors"
"fmt"
"io/ioutil"
"net"
"net/http"
"time"
)
func getPage(url string, timeout time.Duration) (HTML string, e error) {
client := &http.Client{
Transport: &http.Transport{
Dial: func(network, addr string) (net.Conn, error) {
return net.DialTimeout(network, addr, timeout)
},
ResponseHeaderTimeout: timeout,
},
}
req, e := http.NewRequest("GET", url, nil)
if e != nil {
return "", errors.New(fmt.Sprintf(`http.NewRequest failed: %s`, e.Error()))
}
resp, e := client.Do(req)
if e != nil {
return "", errors.New(fmt.Sprintf("client.Do failed: %s)", e.Error()))
}
defer resp.Body.Close()
bodyAsBytes, e := ioutil.ReadAll(resp.Body)
if e != nil {
return "", errors.New(fmt.Sprintf("ioutil.ReadAll failed: %s)", e.Error()))
}
bodyAsBuffer := bytes.NewBuffer(bodyAsBytes)
return bodyAsBuffer.String(), nil
}
func main() {
HTML, e := getPage("http://google.com/", time.Duration(1*time.Second))
if e != nil {
fmt.Printf("[ERROR] %s\n", e.Error())
} else {
fmt.Printf("[INFO] %s\n", HTML)
}
}
>>> HTML, e := getPage("http://google.com/", time.Duration(1*time.Nanosecond))
[ERROR] client.Do failed: Get http://google.com/: i/o timeout)
HTML, e := getPage("http://google.com/", time.Duration(5*time.Second))
[INFO] <!doctype html><html itemscope="" itemtype="http://schema.org/WebPage"><head><meta content="........
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question