A
A
Alexey Yarkov2014-09-29 02:15:43
C++ / C#
Alexey Yarkov, 2014-09-29 02:15:43

Where did I go wrong while writing the code?

In general, I am writing a program that will save the page by link with all the files: css, js, img, etc. Got up at the stage of downloading files. Firstly, it does not download all that it found, and secondly, the server response headers are not written to the text field. Throws the "Reference does not point to an instance of an object" error. Error in catch block in Save_File method.
The parser class code is below.

/*
 * Сделано в SharpDevelop.
 * Пользователь: Admin
 * Дата: 22.09.2014
 * Время: 20:42
 * 
 * Для изменения этого шаблона используйте Сервис | Настройка | Кодирование | Правка стандартных заголовков.
 */
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Linq;
using System.Text;
using System.Windows.Forms;
using HtmlAgilityPack;
using System.Web;
using System.IO;
using System.Net;
 
namespace ParserPlus
{
    /// <summary>
    /// Description of Parser.
    /// </summary>
    public class Parser
    {   
        
        
 
        public Parser()
        {
            
        }
 
        
        public string getRequest(string url)
        {
            try
            {
                var httpWebRequest = (HttpWebRequest) WebRequest.Create(url);
                HttpWebResponse myHttpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
                httpWebRequest.AllowAutoRedirect = false;       //Запрещаем автоматический редирект
                httpWebRequest.Method = "GET";              //Можно не указывать, по умолчанию используется GET.
                httpWebRequest.Referer = url; // Реферер. Тут можно указать любой URL
                httpWebRequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36";
                httpWebRequest.Accept = "*/*";
                using (var httpWebResponse = (HttpWebResponse) httpWebRequest.GetResponse())
                {
                    using (var stream = httpWebResponse.GetResponseStream())
                    {
                        using (var reader = new StreamReader(stream, Encoding.GetEncoding(httpWebResponse.CharacterSet)))
                        {
                            return reader.ReadToEnd();
                        }
                    }
                }
            }
            catch
            {
                return String.Empty;
            }
        }
        
        
        public void Save_File(string url, string folderName, string rootfolder, TextBox box)
        {
            string sf = Path.Combine(rootfolder, folderName);
            WebClient webClient = new WebClient();
            webClient.Headers.Add("AllowAutoRedirect", "false");
            webClient.Headers.Add("Referer", url);
            webClient.Headers.Add("UserAgent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.124 Safari/537.36");
            webClient.Headers.Add("Accept", "*/*");
            webClient.Headers.Add("Method", "GET");
            //webClient.Headers.Add();
            try
            {
                webClient.DownloadFile(new Uri(url), sf+"\\"+Path.GetFileName(url));
 
                string[] keys = webClient.ResponseHeaders.AllKeys;
                foreach(var key in keys)
                {
                    box.Text += webClient.ResponseHeaders.GetValues(key).ToString() + "\r\n";
                }
            }
            catch
            {
                box.Text += "Ошибка загрузки " + url + "\r\n";
                box.Text +=  webClient.ResponseHeaders.ToString()+ "\r\n";
            }
        }
 
 
        public void Get_Elements(string Host, HtmlNodeCollection type, string attr, string name, string folder, TextBox box)
        {
            string u;
            foreach (HtmlNode lnk in type) {
                
                if (lnk.Attributes[attr] != null){
                    string href = lnk.GetAttributeValue(attr, "");
                    
                    if (href.IndexOf("http", 0) == -1) {
                        u = "http://"+Host+href;
                    }else{
                        u = href;
                    }
 
                    string n = u.Split("?".ToCharArray())[0];
                    box.Text += n + "\r\n";
                    MainForm f = new MainForm();
                    Save_File(n, name, folder, f.textBoxDebug);
                }
            }
            
        }
        
        public void Get_Page(string PageUrl, TextBox box, string RootFolder)
        {
            Uri uri = new Uri(PageUrl);
            string siteFolder = Path.Combine(RootFolder, uri.Host);
            string u;
 
            // создаем необходимые папки для работы
            if (!File.Exists(RootFolder)){
                Directory.CreateDirectory(RootFolder);
            }
            if (!File.Exists(siteFolder)){
                Directory.CreateDirectory(siteFolder);
            }
            Directory.CreateDirectory(Path.Combine(siteFolder, "images"));
            Directory.CreateDirectory(Path.Combine(siteFolder, "js"));
            Directory.CreateDirectory(Path.Combine(siteFolder, "css"));
            /**************************************/
                        
            var content = getRequest(PageUrl);
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(content);
            HtmlNodeCollection css = doc.DocumentNode.SelectNodes("//link");
            HtmlNodeCollection script = doc.DocumentNode.SelectNodes("//script");
            HtmlNodeCollection img = doc.DocumentNode.SelectNodes("//img");
 
            if (css != null) {
                box.Text += "Загружаем список стилей ("+css.Count+"):\r\n";
                Get_Elements(uri.Host, css, "href", "css", siteFolder, box);
            }else{
                 box.Text += "Не найдено таблиц CSS (((\r\n"+content+"";
            }
            
            if (script != null) {
                box.Text += "Загружаем список подключенных JavaScript ("+script.Count+"):\r\n";
                Get_Elements(uri.Host, script, "src", "js", siteFolder, box);
            }else{
                 box.Text += "Не найдено подключенных JavaScript (((\r\n"+content+"";
            }
            
            
            if (img != null) {
                box.Text += "Загружаем список изображений ("+img.Count+"):\r\n";
                Get_Elements(uri.Host, img, "src", "images", siteFolder, box);
            }else{
                 box.Text += "Не найдено изображений (((\r\n"+content+"";
            }
 
        }
        
 
        public void Replace_Links(string htmlContent)
        {
            
        }
 
    }
}

Answer the question

In order to leave comments, you need to log in

2 answer(s)
V
Vitaly, 2014-09-29
@vipuhoff

It is worth looking in the direction of textBoxDebug, perhaps this variable is not filled correctly at the time of program execution

V
Vitaly Sergeev, 2014-09-30
@enkryptor

There is a version that crashes into box.Text += webClient.ResponseHeaders.ToString()+ "\r\n";
If no response was received, then ResponseHeaders will be null.

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question