Answer the question
In order to leave comments, you need to log in
How to avoid exit code -1073741819 (0xC0000005) error?
how to fix this error? It's definitely in the code, on Ubuntu it also gives an error, something related to memory and recursion.
The code itself:
import sys
from PyQt5.QtCore import QEventLoop,QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.load(QUrl(url))
while self.html is None:
self.app.processEvents(QEventLoop.ExcludeUserInputEvents |
QEventLoop.ExcludeSocketNotifiers |
QEventLoop.WaitForMoreEvents)
self.app.quit()
def _callable(self, data):
self.html = data
def _loadFinished(self, result):
self.page().toHtml(self._callable)
if __name__ == '__main__':
print("Collecting data...")
for p in range(3):
html = Render(f'https://hh.ru/search/vacancy?area=&fromSearchLine=true&st=searchVacancy&text=&page={p}').html
# get data from page
bs4 = BeautifulSoup(html, "lxml")
for elem in bs4.find_all("div", "vacancy-serp-item"):
print(elem.find('a', 'bloko-link').text)
Answer the question
In order to leave comments, you need to log in
PyQt does not expect you to create a QApplication multiple times. Because of this, apparently, an error occurs with memory, since somewhere there may be an access to an invalid pointer.
It seems to me that Selenium is better for your purposes than PyQt.
import sys
from PyQt5.QtCore import QEventLoop, QUrl
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self):
self.html = None
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._loadFinished)
def _callable(self, data):
self.html = data
def _loadFinished(self, result):
self.page().toHtml(self._callable)
def get_data(self, url):
self.html = None
self.load(QUrl(url))
while self.html is None:
self.app.processEvents(QEventLoop.ExcludeUserInputEvents |
QEventLoop.ExcludeSocketNotifiers |
QEventLoop.WaitForMoreEvents)
return self.html
if __name__ == '__main__':
print("Collecting data...")
render = Render()
for p in range(3):
html = render.get_data(f'https://hh.ru/search/vacancy?area=&fromSearchLine=true&st=searchVacancy&text=&page={p}')
# get data from page
bs4 = BeautifulSoup(html, "lxml")
for elem in bs4.find_all("div", "vacancy-serp-item"):
print(elem.find('a', 'bloko-link').text)
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question