Answer the question
In order to leave comments, you need to log in
Why is my code panicking, can't it just work quietly?
I've been heroically trying all night to figure out how to synchronize with channels rather than mutexes. I have a terrible and incomprehensible code on mutexes, but it parses about 3k url per second. I decided to rewrite it at the same time having dealt with the channels. Now, if you start another routine in one routine, then somewhere after 200 routines, panic begins.
var storageUrl map[string]int
var storageEmail map[string]int
func main() {
storageUrl = make(map[string]int)
storageEmail = make(map[string]int)
queueLoadUrl := make(chan string)
queueParseUrlHtml := make(chan string)
queueParseEmailHtml := make(chan string)
queueStorageUrl := make(chan string)
queueStorageEmail := make(chan string)
//defer func() {
// if r := recover(); r != nil {
// ReplacationFileSystem()
// os.Exit(1)
// }
//}()
go func() {
for {
msg := <-queueLoadUrl
_, ok := storageUrl[msg]
if !ok {
queueStorageUrl <- msg
//если убрать здесь рутину то всё работает на ура но очень медленно
go func() {
queueParseUrlHtml <- parse.LoadUrl(msg)
}()
}
}
}()
//эта рутина занята поиском других url в спарсенных документах
go func() {
for {
msg := <-queueParseUrlHtml
go func() {
//передаём страницу на поиск емейлов.
queueParseEmailHtml <- msg
//парсим все ссылки и отдаём на загрузку.
//что то вроде рекурсии
for _, value := range parse.UrlHtml(msg) {
queueLoadUrl <- value
}
}()
}
}()
//эта рутина ищет email в спарсенных url
go func() {
for {
msg := <-queueParseEmailHtml
go func() {
for _, value := range parse.EmailHtml(msg) {
queueStorageEmail <- value
}
}()
}
}()
//эта рутина обновляет данные в Storage
go func() {
for {
select {
case msg1 := <-queueStorageUrl:
storageUrl[msg1] = 0
case msg2 := <-queueStorageEmail:
storageEmail[msg2] = 0
}
}
}()
//рутина занята репликацией.
go func() {
for {
time.Sleep(5000 * time.Millisecond)
ReplacationFileSystem()
}
}()
//получает команду
for {
reader := bufio.NewReader(os.Stdin)
fmt.Print("Фраза или url: ")
text, _ := reader.ReadString('\n')
fmt.Println("Начал парсинг")
queueLoadUrl <- text
}
}
func ReplacationFileSystem() {
var buf bytes.Buffer
for key, _ := range storageEmail {
buf.Write([]byte(key + "\n"))
}
file, _ := os.Create("email.txt")
defer file.Close()
file.Write(buf.Bytes())
}
var Host string
func LoadUrl(urls string) string {
u, err := url.Parse(strings.TrimSpace(urls))
if err != nil {
return ""
}
rw := &sync.RWMutex{}
if u.Host == "" {
rw.RLock()
u.Host = Host
rw.RUnlock()
} else {
rw.Lock()
Host = u.Host
rw.Unlock()
}
if u.Scheme == "" {
u.Scheme = "http"
}
res, err := http.Get(u.String())
if err != nil {
return ""
}
d, err := ioutil.ReadAll(res.Body)
res.Body.Close()
if err != nil {
return ""
}
return string(d)
}
func UrlHtml(s string) map[int]string {
urls := map[int]string{}
count := 0
doc, err := html.Parse(strings.NewReader(s))
if err != nil {
log.Fatal(err)
}
var f func(*html.Node)
f = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key == "href" {
urls[count] = a.Val
count++
break
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
f(c)
}
}
f(doc)
return urls
}
func EmailHtml(str string) []string {
r := regexp.MustCompile("([a-z0-9_\\.\\-]+)\\@(([a-z0-9\\-])+\\.)+([a-z]{2,6})")
return r.FindAllString(str, -1)
}
goroutine 26385 [GC assist wait]:
net/url.(*URL).String(0x1648e2c0, 0x0, 0x0)
C:/Go/src/net/url/url.go:663 +0x2f
parser/parse.LoadUrl(0x19345270, 0x43, 0x0, 0x0)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/parse/href.
go:37 +0x10e
main.main.func1.1(0x10a164c0, 0x19345270, 0x43)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:45
+0x29
created by main.main.func1
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8
goroutine 26386 [GC assist wait]:
net/url.(*URL).String(0x1648e300, 0x0, 0x0)
C:/Go/src/net/url/url.go:663 +0x2f
parser/parse.LoadUrl(0x19345400, 0x4e, 0x0, 0x0)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/parse/href.
go:37 +0x10e
main.main.func1.1(0x10a164c0, 0x19345400, 0x4e)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:45
+0x29
created by main.main.func1
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8
goroutine 26387 [runnable]:
main.main.func1.1(0x10a164c0, 0x15176540, 0x31)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:44
created by main.main.func1
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8
goroutine 26388 [runnable]:
main.main.func1.1(0x10a164c0, 0x19345450, 0x50)
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:44
created by main.main.func1
C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8
Answer the question
In order to leave comments, you need to log in
I thank FireGM for the earlier answer, which says that you need to manually control the number of routines. Oh I wish I had read this last night...
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question