E
E
eellazy2021-03-22 16:33:24
Python
eellazy, 2021-03-22 16:33:24

Why does a thread in QThreads crash?

Hello!
Maybe someone has run into a problem. It has the latest Python and the latest version of the Beautiful soup library.
I want to start parsing pages in the stream. The flow without Beautiful soup works with a bang. But as soon as I substitute the parsing functionality, an error pops up
QThread: Destroyed while thread is still running

. The most interesting thing is that when I start it, I go through two iterations in a loop, and then it crashes with an error

Thread Creation Class

class ParserController:
    def __init__(self, model, view):
        self._model = model
        self._view = view

    def query_add(self, data):
        if self._model.insert_data(data):
            print('Запись добавлена в базу')
        else:
            print('Запись НЕ добавлена в базу')

    def start_parse_autovia(self):
        self.thread_1 = QtCore.QThread()
        self.autovia_thread = ParseAutovia()
        self.autovia_thread.moveToThread(self.thread_1)
        self.thread_1.started.connect(self.autovia_thread.run)
        self.autovia_thread.query.connect(self.query_add)
        self.autovia_thread.finished.connect(self.thread_1.quit)
        self.thread_1.start()


The class of the parser itself
class ParseAutovia(QtCore.QObject):
    finished = QtCore.pyqtSignal()
    query = QtCore.pyqtSignal(dict)

    START_PAGE = 'https://www.autovia.sk/osobne-auta'
    PAGINATION_PAGE = 'https://www.autovia.sk/osobne-auta/?p[page]='

    def get_data(self, html):
        # Get title
        try:
            title = html.find('h1').text.strip()
        except:
            title = ''

        # Get phone
        try:
            phone = html.find('div', class_='resp-buttons').find_all('a')[0]['href']
        except:
            phone = ''

        # Get name
        try:
            name = html.find('div', class_='resp-contact-top').find('span', class_='resp-subject').text.strip()
        except:
            name = ''

        # Get location
        try:
            location = html.find('div', class_='resp-contact-bottom').find('div', class_='resp-location').find('span').text.strip()
        except:
            location = ''

        # Get date
        try:
            date = html.find('div', class_='resp-meta').find_all('div', class_='col-6')[0].text.strip()
            date = date.replace('Aktualizované: ', '')
        except:
            date = ''

        # Get login
        try:  # Логин продавца
            login = html.find('div', class_='resp-contact-top').find('a', class_='resp-subject').text.strip()
        except:
            login = ''

        # Get login link
        try:
            login_link = html.find('div', class_='resp-contact-top').find('a', class_='resp-subject')['href']
        except:
            login_link = ''

        # Get page link
        try:
            page_link = html.find('meta', attrs={'property': 'og:url'})['content']
        except:
            page_link = ''

        data = {
            'title': title,
            'phone': phone,
            'name': name,
            'location': location,
            'date': date,
            'login': login,
            'login_link': login_link,
            'page_link': page_link
        }

        return data

    def get_html(self, url):
        r = requests.get(url)
        soup = BeautifulSoup(r.text, 'lxml')
        return soup

    def get_pagination_count(self):
        soup = self.get_html(self.START_PAGE)
        count = soup.find('div', class_='resp-pager').find('label', class_='resp-after').text.strip()
        count = int(count.replace('z', '').replace(' ', ''))
        return count

    def get_pagination_links(self, pagination):
        url = self.PAGINATION_PAGE + str(pagination)
        soup = self.get_html(url)

        links = []
        items = soup.find('section', class_='resp-search-results').find_all('div', class_='resp-item')

        for item in items:
            link = item.find('h2').find('a').get('href')
            links.append(link)

        return links

    def run(self):
        pagination_count = self.get_pagination_count()
        pagination = 1

        while pagination <= pagination_count:
            pagination_links = self.get_pagination_links(pagination)

            for url in pagination_links:
                html = self.get_html(url)
                data = self.get_data(html)

                self.query.emit(data)
                print(data['phone'])

            pagination += 1

        self.finished.emit()


If you run this in a thread, it will work without problems
def run(self):
        for i in range(100):
            print(i)
            i += 1
            time.sleep(2)


Attached is the entry point to the program
import sys

from PyQt5 import QtWidgets
from model.ParserModel import ParserModel
from controller.ParserController import ParserController


class AppParser(QtWidgets.QApplication):
    def __init__(self, sys_args):
        super(AppParser, self).__init__(sys_args)
        self._model = ParserModel()
        self._controller = ParserController(self._model)

def main():
    app = AppParser(sys.argv)
    sys.exit(app.exec())

if __name__ == '__main__':
    main()

Answer the question

In order to leave comments, you need to log in

1 answer(s)
H
HemulGM, 2021-03-22
@HemulGM

You keep the program running, otherwise, after starting the threads, it will go to completion and now you get an error

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question