C
C
Cyril2020-02-23 10:20:41
JavaScript
Cyril, 2020-02-23 10:20:41

Node js, fs - does not save the file after executing all queues during long work, why?

I am making a site parser, there is a list of links to products.
Parsing works through the async/queue queue , if you give the parser 200 pages, then the parsing ends normally, and at the end the data file is saved normally, and if you give 1200 pages (all), then the parsing passes, but the data file is not created at all. What could be the problem, and how can I track it through debugging?

Code from the page parsing file

ps - если это гавнокод и где-то можно было сделать лучше, прошу напишите - мне это будет полезно
const fs = require("fs");
const chalk = require('chalk');
const log = console.log;
const cheerio = require("cheerio");
const queue = require("async/queue");
const getHtml = require('./getHtml.js');
const delay = require('./delay.js');

const proxyList = [
  "host1:port:login:pass",
  "host2:port:login:pass",
  "host3:port:login:pass",
];

let i = 0;
let parseCount = 0;
let proxyListCount = proxyList.length;
let data = [];
let curProdId = '';
let mathTable = {
  "Производитель": "manufacturer",
  "Страна производитель": "countryManufacture",
  "Вид парфюмерной продукции": "typePP",
  "Тип аромата": 'flavorType',
  "Начальная нота": "topNote",
  "Нота сердца": "heartNote",
  "Конечная нота": "finalNote",
  "Классификация": "classification",
  "Год выпуска": "year",
  "Объем": "bulk"
}
let prodAttr = {}

let getImgList = ($, selector, glueChar) => {
  let links = [];
  let id = '';
  $(selector).each((i, e) => {
    id = $(e).attr('data-subscribe').split('"')[1];
    links.push(`https://images.kz.prom.st/${id}_w640_h640.jpg`);
  })
  return links.join(glueChar);
}

let q = queue(function (_data, callback) {

  let extOpt = {
    url: _data.url,
    proxy: _data.proxy,
  }
  getHtml.request(extOpt)
    .then(html => {
      let $ = cheerio.load(html);
      curProdId = $("h1.cs-title .cs-online-edit__link").attr("data-edit-id");
      let attrName = "";
      let attrVal = "";
      prodAttr = {
        "manufacturer": "",
        "countryManufacture": "",
        "typePP": "",
        'flavorType': "",
        "topNote": "",
        "heartNote": "",
        "finalNote": "",
        "classification": "",
        "year": "",
        "bulk": ""
      }

      temp = {
        "id": curProdId,
        "name": $("h1.cs-title .cs-title__text").text().replace(/\s\d+|\(\d.*.\)/gi, ''),
        "tag": $(".cs-product__container .cs-product__label").text(),
        "price": $(".cs-product__container .b-product-cost__price").text().replace(/\D+/g, ""),
        "imgList": getImgList($, '.cs-product__visual img', ', '),
        "desc": $(".cs-tab-list .b-user-content").html()
      };

      $('.b-product-info tbody tr').first().remove(); 
      $('.b-product-info tbody tr').each((i, e) => {
        attrName = $(e).find('td').first().text().trim(); 
        attrVal = $(e).find('td').last().text().trim();
        if (attrName == "Объем") {
          attrVal = attrVal.replace(/\D+\d*/gi, '');
        }
        prodAttr[mathTable[attrName]] = attrVal;
        temp = Object.assign(temp, prodAttr);
      })
      data.push(temp);
      parseCount++;
      log(
        chalk.black.bgBlue(parseCount),
        '[' + chalk.blue(extOpt.proxy.split('@')[1].split(':')[0]) + ']',
        chalk.green('Обработан продукт:', temp.name)
      );
      callback();
    })
    .catch(err => console.warn(chalk.red(err)));
}, 5);

q.drain(function () {
  log(chalk.black.bgGreen("Все очереди отработаны"));
  fs.writeFileSync('data.json', JSON.stringify(data));
});

q.error(function (err, task) {
  console.error("task experienced an error");
});

let startParsePages = links => {
  links.forEach(async link => {
    await delay(500, 1000);
    i = (i > proxyListCount - 1) ? 0 : i;
    let currentProxy = proxyList[i];
    [host, port, login, password] = currentProxy.split(':');
    let data = {
      url: link,
      proxy: `http://${login}:${password}@${host}:${port}`
    };
    q.push(data);
    i++;
  });
}

fs.readFile('allLinksProduct.txt', 'utf8', function (err, contents) {
  let links = contents.split('\n');
  fs.writeFileSync('data.json', '[');
  startParsePages(links);
});

Answer the question

In order to leave comments, you need to log in

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question