Z
Z
zedooox2017-10-29 21:29:43
Python
zedooox, 2017-10-29 21:29:43

How to fix multithreaded script error in python?

There is a script:

from concurrent.futures import ThreadPoolExecutor, Future, TimeoutError
from urllib.parse import urlparse
from threading import RLock
import threading
import requests
import urllib3
import time

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
lock = threading.Lock()

with open('lists/0.txt') as lines:
    domains = iter(['http://' + line.strip() for line in lines])

db = set()
def worker(domain):
    try:
        r = requests.head(domain, verify=False, allow_redirects=True, timeout=3)
        parsed_uri = urlparse(r.url)
        if parsed_uri.netloc.startswith('www.'):
            parsed_domain = '{uri.scheme}://{}'.format(parsed_uri.netloc[4:], uri=parsed_uri)
        else:
            parsed_domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
        tmp_domain = '{uri.netloc}'.format(uri=parsed_uri)
        r.raise_for_status()
    except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError, requests.exceptions.TooManyRedirects, requests.exceptions.Timeout):
        if tmp_domain not in db:
            with lock:
                with open('error.txt', 'a') as f:
                    f.write(parsed_domain + '\n')
                db.add(tmp_domain)
        return
    except:
        if tmp_domain not in db:
            with lock:
                with open('unknow.txt', 'a') as f:
                    f.write(parsed_domain + '\n')
                db.add(tmp_domain)
        return
    
    if tmp_domain not in db:
        with lock:
            with open('good.txt', 'a') as f:
                f.write(parsed_domain + '\n')
            db.add(tmp_domain)

def task_queue(task, iterator, concurrency=10):
    def submit():
        try:
            with lock:
                obj = next(iterator)
        except StopIteration:
            return

        stats['delayed'] += 1
        future = executor.submit(task, obj)
        future.add_done_callback(upload_done)

    def upload_done(future):
        with io_lock:
            submit()
            stats['delayed'] -= 1
            stats['done'] += 1

    io_lock = RLock()
    executor = ThreadPoolExecutor(concurrency)
    stats = {'done': 0, 'delayed': 0}

    for _ in range(concurrency):
        submit()

    return stats

stats = task_queue(worker, domains, concurrency=150)
while True:
    #print('\rdone {done}, in work: {delayed}  '.format(**stats))
    if stats['delayed'] == 0:
        break
    time.sleep(0.2)

The code works, but it seems to me that it misses positive responses. Please tell me how to fix.

Answer the question

In order to leave comments, you need to log in

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question