P
P
Pavel Chuev2016-02-08 21:45:29
JavaScript
Pavel Chuev, 2016-02-08 21:45:29

How to download images from the site as they arrive?

There is one board with images that are generated on it in the following format: 2016-02-08-808706.jpeg(png or jpg) (for example). and each added image changes the link by +1 to the current one: 2016-02-08-808706(+1).jpeg(png or jpg). The following code only downloads images whose numbers are specified in the files variable.

var http = require('http');
var fs = require('fs');
function nowDate(date) {
    var date = new Date();

    var dd = date.getDate();
    if (dd < 10) dd = '0' + dd;

    var mm = date.getMonth() + 1;
    if (mm < 10) mm = '0' + mm;

    var yy = date.getFullYear();

    return yy + '-' + mm + '-' + dd;
}

var uploadDir = "C:\/Node\/";

var files = [
    808707+1
];

files.forEach( function (fileName) {
    var file = fs.createWriteStream(uploadDir + fileName+'.jpeg');
    var request = http.get("http://example/images/"+nowDate()+'-'+fileName+'.jpeg', function (response) {
        response.pipe(file);
    });
})

The essence of the question: how to make the numbers of images formed cyclically and downloaded as they arrive on the board (that is, empty images are not created). And yet, since the images on the board are in jpeg, jpg, png formats, how can they also be downloaded using the same algorithm?

Answer the question

In order to leave comments, you need to log in

3 answer(s)
O
OVK2015, 2016-02-09
@AllDecay

After filing

var path = require('path');
var request = require('request');
var http = require('http');
var fs = require('fs');

var uploadDir = 'C:\/Node\/';
var urlPrefix = 'http://e-shuushuu.net/images/';
var regExpImageURLWrapper = new RegExp('(?:<img src="\/images\/thumbs\/)(.*?)"', 'gim');
var pageAmount = 2;

function getCurrentImage(imageUrl)
{
  var fileName = path.join(uploadDir, imageUrl);

  var newRequest = request(urlPrefix + imageUrl);
  newRequest.pause();
  newRequest.on('response', function(resp) 
  {
    if((resp.statusCode === 200) && (resp.headers['content-type'].match('image')))
    {			
      newRequest.pipe(fs.createWriteStream(fileName));
      newRequest.resume();
      console.log(`Загружаем: ${imageUrl}`);			
    }
    else
    {
      var message =  
        '\n********************************************************\n' +
        'Ошибка загрузки файла ' + urlPrefix + imageUrl + '\n' +
        'Код ошибки: ' + resp.statusCode + ' ' + resp.statusMessage + '; ' +
        'Mime-type: ' + resp.headers['content-type'] + '\n' +
        '********************************************************\n';
      console.log(`${message}`);
      newRequest.end();			
    }				
  });	
  newRequest.on('end', function()
  {		
    console.log(`Загрузка завершена: ${imageUrl}`);			
  });
}

for(var page_counter = 1; page_counter < pageAmount; page_counter++)
{
    var newRequest = request
  (
      'http://e-shuushuu.net/?page=' + page_counter,
      function (error, response, body)
      {
          if (!error && response.statusCode == 200)
          {
              while(match = regExpImageURLWrapper.exec(body))
              {	             
                  getCurrentImage(match[1]);
                  // break;
              }
          }
    	}
  );
}

D
Dark Hole, 2016-02-08
@abyrkov

Understood nothing. And jpeg=jpg

P
Pavel Chuev, 2016-02-08
@AllDecay

Still need an answer

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question