V
V
Vladimir Grabko2016-06-12 03:41:59
go
Vladimir Grabko, 2016-06-12 03:41:59

Why is my code panicking, can't it just work quietly?

I've been heroically trying all night to figure out how to synchronize with channels rather than mutexes. I have a terrible and incomprehensible code on mutexes, but it parses about 3k url per second. I decided to rewrite it at the same time having dealt with the channels. Now, if you start another routine in one routine, then somewhere after 200 routines, panic begins.

var storageUrl map[string]int
var storageEmail map[string]int

func main() {
  storageUrl = make(map[string]int)
  storageEmail = make(map[string]int)
  queueLoadUrl := make(chan string)
  queueParseUrlHtml := make(chan string)
  queueParseEmailHtml := make(chan string)

  queueStorageUrl := make(chan string)
  queueStorageEmail := make(chan string)
  

  //defer func() {
  //	if r := recover(); r != nil {
  //		ReplacationFileSystem()
  //		os.Exit(1)
  //	}
  //}()


  go func() {
    for {
      msg := <-queueLoadUrl
      _, ok := storageUrl[msg]
      if !ok {
        queueStorageUrl <- msg
                               //если убрать здесь рутину то всё работает на ура но очень медленно
        go func() {
          queueParseUrlHtml <- parse.LoadUrl(msg)
        }()

      }
    }
  }()

  //эта рутина занята поиском других url в спарсенных документах
  go func() {
    for {
      msg := <-queueParseUrlHtml
      go func() {
        //передаём страницу на поиск емейлов.
        queueParseEmailHtml <- msg
        //парсим все ссылки и отдаём на загрузку.
        //что то вроде рекурсии
        for _, value := range parse.UrlHtml(msg) {
          queueLoadUrl <- value
        }
      }()
    }
  }()

  //эта рутина ищет email в спарсенных url
  go func() {
    for {
      msg := <-queueParseEmailHtml
      go func() {
        for _, value := range parse.EmailHtml(msg) {
          queueStorageEmail <- value
        }
      }()
    }
  }()

  //эта рутина обновляет данные в Storage
  go func() {
    for {
      select {
      case msg1 := <-queueStorageUrl:
        storageUrl[msg1] = 0
      case msg2 := <-queueStorageEmail:
        storageEmail[msg2] = 0
      }
    }
  }()

  //рутина занята репликацией.

  go func() {
    for {
      time.Sleep(5000 * time.Millisecond)
      ReplacationFileSystem()
    }
  }()

  //получает команду
  for {
    reader := bufio.NewReader(os.Stdin)
    fmt.Print("Фраза или url: ")
    text, _ := reader.ReadString('\n')
    fmt.Println("Начал парсинг")
    queueLoadUrl <- text
  }
}

func ReplacationFileSystem() {
  var buf bytes.Buffer
  for key, _ := range storageEmail {
    buf.Write([]byte(key + "\n"))
  }
  file, _ := os.Create("email.txt")
  defer file.Close()
  file.Write(buf.Bytes())
}

An error with the description in the code above. And I almost forgot the parse package
var Host string

func LoadUrl(urls string) string {
  u, err := url.Parse(strings.TrimSpace(urls))
  if err != nil {
    return ""
  }
  rw := &sync.RWMutex{}
  if u.Host == "" {
    rw.RLock()
    u.Host = Host
    rw.RUnlock()
  } else {
    rw.Lock()
    Host = u.Host
    rw.Unlock()
  }

  if u.Scheme == "" {
    u.Scheme = "http"
  }

  res, err := http.Get(u.String())
  if err != nil {
    return ""
  }
  d, err := ioutil.ReadAll(res.Body)
  res.Body.Close()
  if err != nil {
    return ""
  }
  return string(d)
}

func UrlHtml(s string) map[int]string {
  urls := map[int]string{}
  count := 0

  doc, err := html.Parse(strings.NewReader(s))
  if err != nil {
    log.Fatal(err)
  }
  var f func(*html.Node)
  f = func(n *html.Node) {
    if n.Type == html.ElementNode && n.Data == "a" {
      for _, a := range n.Attr {
        if a.Key == "href" {
          urls[count] = a.Val
          count++
          break
        }
      }
    }
    for c := n.FirstChild; c != nil; c = c.NextSibling {
      f(c)
    }
  }
  f(doc)

  return urls
}

func EmailHtml(str string) []string {
  r := regexp.MustCompile("([a-z0-9_\\.\\-]+)\\@(([a-z0-9\\-])+\\.)+([a-z]{2,6})")
  return r.FindAllString(str, -1)
}

goroutine 26385 [GC assist wait]:
net/url.(*URL).String(0x1648e2c0, 0x0, 0x0)
        C:/Go/src/net/url/url.go:663 +0x2f
parser/parse.LoadUrl(0x19345270, 0x43, 0x0, 0x0)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/parse/href.
go:37 +0x10e
main.main.func1.1(0x10a164c0, 0x19345270, 0x43)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:45
+0x29
created by main.main.func1
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8

goroutine 26386 [GC assist wait]:
net/url.(*URL).String(0x1648e300, 0x0, 0x0)
        C:/Go/src/net/url/url.go:663 +0x2f
parser/parse.LoadUrl(0x19345400, 0x4e, 0x0, 0x0)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/parse/href.
go:37 +0x10e
main.main.func1.1(0x10a164c0, 0x19345400, 0x4e)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:45
+0x29
created by main.main.func1
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8

goroutine 26387 [runnable]:
main.main.func1.1(0x10a164c0, 0x15176540, 0x31)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:44
created by main.main.func1
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8

goroutine 26388 [runnable]:
main.main.func1.1(0x10a164c0, 0x19345450, 0x50)
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:44
created by main.main.func1
        C:/Documents and Settings/lucifer/Рабочий стол/go/src/parser/main.go:46
+0xd8

Answer the question

In order to leave comments, you need to log in

1 answer(s)
V
Vladimir Grabko, 2016-06-12
@VGrabko

I thank FireGM for the earlier answer, which says that you need to manually control the number of routines. Oh I wish I had read this last night...

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question