T
T
tolikryg2018-04-22 13:19:19
PHP
tolikryg, 2018-04-22 13:19:19

How to remove domain duplication from php parser?

site.com - Simple Domain
Stand I have a problem in the parser, I start parsing, it gives the following: Error: Unable to load page: https://www.site.comhttps://www.site.com/page/dog-...
As I understand it, it substitutes the domain for the link how to remove this error?

<?php

use DiDom\Document;
use DiDom\Query;

class update{
  
  private $history_file = 'history.json';
  
  public $catalog_url = 'https://site.com/new';
  
  public $site = 'https://site.com'; <--- Тут ошибка
  
  public $game_url = '';
  
  private $db = false;
  
  private $ch = false;
  
  public function __construct(){
    
    if(!headers_sent()){
      
      header('Content-Type: text/html; charset=utf-8');
      
    }
    
    if(!is_writable(__DIR__)){
      
      die('Директория скрипта защищена от записи. Поставьте права доступа 777 на нее.');
      
    }
    
    if (version_compare(PHP_VERSION, '5.4.0', '<')){
      
      die("Необходимо обновить PHP как минимум до версии 5.4. Текущая: ".PHP_VERSION);
      
    }
    
  }
  
  public function install(){
    
    if($this->is_installed()){
      
      die('Скрипт уже установлен. Запустите файл update.php для обновления.');
      
    }
    
    $games = $this->get_catalog();
    
    if(!empty($games)){
      
      $keys = [];
      
      foreach($games as $game){
        
        $keys[] = $game['key'];
        
      }
      
      file_put_contents($this->history_file, json_encode($keys));
      
      $this->log('Скрипт установлен.');
      
    }else{
      
      $this->error('Не загружен список игр');
      
    }
    
  }
  
  public function update_games(){
    
    if(!$this->is_installed()){
      
      die('Скрипт еще не установлен. Запустите файл install.php.');
      
    }
    
    if(empty($this->db_config['host']) OR empty($this->db_config['user']) OR empty($this->db_config['password']) OR empty($this->db_config['name'])){
      
      $this->error('Не указаны данные для подключения к БД');
      
    }
    
    try {
      
      $dsn = "mysql:host=".$this->db_config['host'].";dbname=".$this->db_config['name'].";charset=utf8";
      
      $opt = [
          PDO::ATTR_ERRMODE            => PDO::ERRMODE_EXCEPTION,
          PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
          PDO::ATTR_EMULATE_PREPARES   => true,
      ];
      
      $this->db = new PDO($dsn, $this->db_config['user'], $this->db_config['password'], $opt);
      
    } catch (Exception $e) {
      
      $this->db = false;
      
      $this->error('Невозможно подключиться к БД. '.$e->getMessage());
      
    }
    
    $this->db->query('SET @@session.time_zone = '.$this->db->quote(date('P')));
    
    $games = $this->get_catalog();
    
    $games_keys = [];
    
    foreach($games as $g){
      
      $games_keys[] = $g['key'];
      
    }
    
    $history_games = json_decode(file_get_contents($this->history_file), true);
    
    $new_game = $this->db->prepare('INSERT INTO tumd_games (game_id, name, image, description, file, w, h) VALUES (:game_id, :name, :image, :description, :file, 950, 600)');
    
    foreach($games as $game){
      
      if(in_array($game['key'], $history_games)){
        
        continue;
        
      }
      
      $link = $this->site.$game['link']; <--- И тут они связаны
      
      $this->log('Добавление игры: '.$link);
      
      $game_data = $this->get_game($link);
      
      $max_id = $this->db->query('SELECT MAX(game_id) FROM tumd_games')->fetchColumn();
      
      $new_game->execute([
        ':game_id'=>$max_id+1,
        ':name'=>$game_data['name'],
        ':image'=>$game_data['image'],
        ':description'=>$game_data['description'],
        ':file'=>$game_data['file']
        ]);
      
    }
    
    file_put_contents($this->history_file, json_encode($games_keys));
    
  }
  
  private function is_installed(){
    
    if(file_exists($this->history_file) AND filesize($this->history_file) > 1){
      
      return true;
      
    }else{
      
      return false;
      
    }
    
  }
  
  private function get_catalog(){
    
    $page = $this->download($this->catalog_url);
    
    if(!empty($page['ok']) AND !empty($page['content']) AND $document = new Document($page['content'])){
      
      
      
      $games_html = $document->find('.tile');
      
      if(count($games_html) == 0){
        
        $this->error('Не найдены игры в каталоге');
        
      }
      
      $games = [];
      
      foreach($games_html as $gh){
        
        $gh_link = $gh->first('a');
        
        $gh_link = $gh_link->href;
        
        $gh_key = preg_replace('@/([A-z0-9_-]+)(?:|\?.+)@', '$1', $gh_link);
        
        $games[$gh_key] = ['key'=>$gh_key, 'link'=>$gh_link];
        
      }
      
      $games = array_values($games);
      
      return $games;
      
    }else{
      
      $this->error('Невозможно загрузить страницу с каталогом');
      
    }
    
  }
  
  private function get_game($url){
    
    $page = $this->download($url);
    
    if(!empty($page['ok']) AND !empty($page['content']) AND $document = new Document($page['content'])){
      
      $name = $document->first('h1')->text();
      
      $image = $document->first('.subGamePart img')->src;
      
      $description = $document->find('.subGameRest .description p');
      
      $description_par = [];
      
      foreach($description as $d){
        
        $description_par[] = $d->html();
        
      }
      
      $description_par = implode('', $description_par);
      
      $iframe = $document->first('iframe')->src;
      
      $iframe_data = $this->download($this->site.$iframe);
      
      preg_match('@src: "([^"]+)"@', $iframe_data['content'], $iframe_data);
      
      return [
        'name'=>$name,
        'description'=>$description_par,
        'image'=>$image,
        'file'=>$iframe_data[1]
      ];
      
    }else{
      
      $this->error('Невозможно загрузить страницу с игрой: '.$url);
      
    }
    
  }
  
  private function log($data){
    
    file_put_contents('log.txt', date('r')."\t".$data."\n", FILE_APPEND|LOCK_EX);
    
  }
  
  private function error($log=false){
    
    if(!empty($log)){
      
      print('Ошибка: '.$log);
      
      $this->log('Ошибка: '.$log);
      
    }
    
    die();
    
  }
  
  protected function download($link, $options=[]){
    
    try {
      
      if(empty($this->ch)){
        
        $this->ch = curl_init();
        
      }
      
      curl_setopt_array($this->ch, $options+array(
        CURLOPT_URL=>$link,
        CURLOPT_RETURNTRANSFER=>true,
        CURLOPT_FOLLOWLOCATION=>true,
        CURLOPT_MAXREDIRS=>3,
        CURLOPT_SSL_VERIFYPEER=>0,
        CURLOPT_SSL_VERIFYHOST=>0,
        CURLOPT_CONNECTTIMEOUT=>10,
        CURLOPT_TIMEOUT=>15,
        CURLOPT_IPRESOLVE=>CURL_IPRESOLVE_V4,
        CURLOPT_ENCODING=>'',
        CURLOPT_PROTOCOLS=>CURLPROTO_HTTP|CURLPROTO_HTTPS,
        CURLOPT_USERAGENT=>'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
        CURLOPT_HTTPHEADER=>[
          'accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
          'accept-language: ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4',
          'cache-control: no-cache',
          'pragma: no-cache'
        ]
        ));
      
      $curl_result = (string)curl_exec($this->ch);
      
      $curl_info = curl_getinfo($this->ch);
      
      $ok = ($curl_info['http_code'] == 200);
      
      return ['content'=>$curl_result, 'info'=>$curl_info, 'ok'=>$ok];
      
    } catch (Exception $e) {
      return false;
    }
    
  }
  
}

Answer the question

In order to leave comments, you need to log in

1 answer(s)
S
Snewer, 2018-04-22
@Snewer

fix it
on the
$link = $game['link']; <--- И тут они связаны

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question