N
N
Nurlan2016-12-21 16:52:54
Node.js
Nurlan, 2016-12-21 16:52:54

How to traverse multiple pages in nighmarejs and save them to pdf?

You need to bypass several pages in the LC and save the results in pdf. The problem is that it doesn't save.
I log in, go to the internal page, pull out links to pages that need to be saved, go around them, get the title for the file name and want to save it to pdf, but nothing happens.
Here is the code:

var fs = require('fs');
var Nightmare = require('nightmare');
var ms = Nightmare({
    show: false,
    width: 921,
    height: 900
});

var page_params = Object;
var page=ms.goto('http://domain')
    .type('form input#login', 'login')
    .type('form input#password', 'password')
    .click('form [type=submit]')
    .wait(1000);

page.goto('http://domain/url')
    .wait(1000)
    .title()
    .then(function (title) {
        title = title.replace("/", "");
        page_params.title = title
        console.log(title)
        if (!fs.existsSync(title)){
            fs.mkdirSync(title);
        }
        page.evaluate(function () {
            var urls=[];
            $(".passedClass a.head_type-test").map(function (index, element) {
                urls.push('domain' + $(element).attr('href'));
            })
            return urls;
        })
            .then(function (urls) {
                console.log('map urls');
                urls.forEach(function(url,index){
                    console.log(url)
                    page.goto(url)
                        .wait(1000)
                        .title()
                        .then(function(per_title){
                            page_params.per_title = per_title
                            page_params.path=page_params.title+'/'+index+' '+page_params.per_title+'.pdf'

                            console.log(page_params)
                            //ничего не сохраняет, console.log выше отработал, значит до сюда дошли
                            page.pdf(page_params.path)
                        })


                })
            },page_params)
    },page_params)

I've been conjuring all day, I haven't made any crazy modifications to the code. I hope for your help.

Answer the question

In order to leave comments, you need to log in

1 answer(s)
T
trubel, 2016-12-21
@daager

the error is that you forgot that nodejs is asynchronous and nightmare does not wait for the file to be saved, it immediately goes to another address, and with such code it’s really easy to go crazy :)
on a similar task, I used async/await:

var Nightmare = require('nightmare');
var browser = Nightmare();

async function login() {
  // заходим
}

async function getLinks() {
  var result = [];
  // вытаскиваем ссылки
  await browser
    .goto('url')
    ...
    .then(function(data){
      result = data.slice();
    });
  return result
}

async function savePDF(url) {
  // сохраняем
  await browser
    .goto(url)
    .pdf(...);
}

async function run() {
  await login();
  var links = await getLinks();
  for (var i=0; i<links .length: i++) {
    await savePDF(links[i]);
  }
}

await signals the engine to wait for execution to finish and not move on.
async specifies that the function is asynchronous, and only functions with async can use await

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question