D
D
Dmitry2016-07-29 13:16:02
PHP
Dmitry, 2016-07-29 13:16:02

What are the errors in the script algorithm? Content Parsing (Simple HTML Dom)?

There is a code that I inherited, I’m just starting to understand php, the problem is the following, it seems to be parsing, creating a SQL database file, making entries in this file of what it parsed, but when I try to import the myAdmin table, it gives an error, and the import fails, maybe the algorithm is not well formed?

<!DOCTYPE html>
<html>
<head>
    <meta http-equiv="content-type" content="text/html; charset=utf-8">
</head>
</html>
<?php

include_once('curl/curl_query.php');
define('_DS_', DIRECTORY_SEPARATOR);
class tdm_parse
{

    private $link = "http://tdme.ru";
    //public $path = __DIR__ . _DS_ . "catalog" . _DS_;
     // private $SHD_path = include('../shd/simple_html_dom.php');
    //private $PHPExcel_path = __DIR__ . 'PHPExcel' . __DIR__ . 'PHPExcel.php' ;
    /**
     * @param $SHD_path - path to library simple html dom
     * @return array|int - array of categories
     */
    private function get_categories(){

        //Получаем список категорий из сайдбара
        $html = new simple_html_dom();
        $html = curl_get('http://tdme.ru/product/');
        $html = str_get_html($html);
        if($categories = $html->find('div.sidebar ul.submenu')[1]->children){
           $parsed_category = array();
           foreach ($categories as $category){
                $parsed_category[] = $category->outertext;
            }
            return $parsed_category;
        }     
    }

    /**
     * @param $categories
     * @return array
     */
    private function get_models($categories){
        $array_of_sub = array();
        $html = new simple_html_dom();
        if (isset($categories)){
        foreach ($categories as $category){
            $parent = new simple_html_dom();
            $parent->load($category);
            $parent = $parent->find('a')[0]->href;
            $html = curl_get($this->link . $parent);
            $html = str_get_html($html);      
            $subcategories = $html->find('ul.category')[0]->children;
            
         foreach ($subcategories as $sub) {
                $array_of_sub[] = $parent . $sub->find('a')[0]->href;
            }}
            return $array_of_sub;
        }}


    private function get_price($subcat){
        $sql_file = fopen('base.sql', 'a+');
        $path_to_images = __DIR__ . _DS_ ."catalog". _DS_ ;
        $manufacturer_id = 25;
        $table_of_goods = new simple_html_dom();     
        $html_sub = file_get_html($this->link . $subcat);
        $html_sub = str_get_html($html_sub);
        $description = new simple_html_dom();
        $description = $html_sub->find('dl.tabs')[0]->find('dd')[0]->find('div.tab-content')[0];
        $description = htmlspecialchars($description->plaintext);    
        $table_of_goods = $html_sub->find('div#modal_table table.mod_t')[0]->children;
        $table_header = $table_of_goods[0]->find('tr')[0];
        $table_body = $table_of_goods[1]->children;
        $numer = array();

        foreach (($table_header->children) as $num => $th_row) {

            switch(trim($th_row->plaintext)){
                case 'Наименование':
                case 'Наименование светильника':
                    $numer["name"] = $num;
                    break;
                case 'Фото':
                case 'Изображение':
                    $numer['image'] = $num;
                    break;
                case 'Артикул':
                    $numer['model'] = $num;
                    break;
            }

        }
        if (!(isset($numer['image']))){
            $numer['image'] = '-1';
            $image_path = $html_sub->find('div.product_info div.image div img')[0]->src;
        }

        $rowspan = 1;


        foreach ($table_body as $table_line){
            $arr_of_tl = $table_line->children;
            if($rowspan == 1){
                $row_bool = 0;
            }
            else {
                $row_bool = 1;
                $rowspan--;
            }
            foreach ($arr_of_tl as $num => $value){
                switch($num + $row_bool){
                    case $numer['image']: {
                        if (isset($value->rowspan)) {
                            $rowspan = (int) $value->rowspan;
                        }
                        if (isset($value->find('img')[0])){
                            $image_path = $value->find('img')[0]->src;}//;
                    }
                        break;
                    case $numer['model']: $article = $value->plaintext;
                        break;
                    case $numer['name']: $name = $value->plaintext;
                        break;
                }
            }
            file_put_contents(__DIR__ . _DS_ . "catalog" . _DS_ . $article, $this->link . $image_path);
            $parsed_string = 'UPDATE tdm_upload SET `name` = \'' . $name . '\' `image` = \'catalog/'.   $article .   '\' `description` = \''.   $description .   '\' WHERE `article` = \'' . $article . "'; \n";
            fwrite($sql_file, $parsed_string);
        }
        $html_sub->clear();

        unset($html_sub);
        unset($table_of_goods);
        unset($description);
        return 1;
    }

    public function insert_into_mysql() {

        $this::Load(__DIR__ . '\shd\simple_html_dom.php');
        $categories = new simple_html_dom();
        $categories = $this->get_categories();
        $subcat = $this->get_models($categories);
        if(count($subcat)){
        foreach ($subcat as $models){
            sleep(1);
            $this->get_price($models);
        }}
        
        var_dump(($subcat));
        return 1;
    }
    public static function Load($pClassName){
        if ((class_exists($pClassName,FALSE))) {
            return FALSE;
        }

        if ((file_exists($pClassName) === FALSE) || (is_readable($pClassName) === FALSE)) {
            return FALSE;
        }

        require_once($pClassName);
    }   

}
$tdm_parse = new tdm_parse();
$tdm_parse->insert_into_mysql();
?>

Answer the question

In order to leave comments, you need to log in

2 answer(s)
T
ThunderCat, 2016-07-29
@demonix26

Why are these sheets here? What is the problem basically? Incorrect sql output? The problem in the text of requests means? What error does it give when importing? Is this error in google? What is the structure of the table? What doesn't match?
Answer (yourself) these questions, I think the problem will be obvious. So far, in addition to the piece of code that you posted for no reason (is it working?), There is not a byte of useful information on the topic in the question.
If the answers to these questions do not help you with the decision - lay them out for us, maybe then it will be clear what to do.
I understand that you just import the sql file through phpmyadmin? Does it result in an error? What mistake?

D
Dmitry, 2016-07-30
@demonix26

In general, I figured it out, reformulated the SQL query, everything works as it should

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question