Answer the question
In order to leave comments, you need to log in
How to parse a website using QNetworkAccessManager?
I need to find such a piece on the page, and get a link to the picture
<div class="row-fluid">
<strong>
Скачать оригинал:
<a href="pictures/originals/2014/Nature_Highway_in_the_mountains_082434_.jpg" class="original-link" download="pictures/originals/2014/Nature_Highway_in_the_mountains_082434_.jpg" title="Шоссе в сторону гор">Шоссе в сторону гор - 1920x1080</a>
</strong>
</div>
void DownloadHtml::Download()
{
manager = new QNetworkAccessManager(this);
connect(manager, SIGNAL(finished(QNetworkReply*)),
this, SLOT(replyFinished(QNetworkReply*)));
manager->get(QNetworkRequest(QUrl("http://google.com")));
}
void DownloadHtml::replyFinished (QNetworkReply *reply)
{
if(reply->error())
{
qDebug() << "Error!";
qDebug() << reply->errorString();
}
else
{
QFile *file = new QFile("C:/wall/downloaded.txt");
if(file->open(QFile::Append))
{
file->write(reply->readAll());
file->flush();
file->close();
}
delete file;
}
reply->deleteLater();
}
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 Moved</TITLE></HEAD><BODY>
<H1>302 Moved</H1>
The document has moved
<A HREF="http://www.google.ru/?gfe_rd=cr&ei=3cudVPqyMYTzwAOP5oCQCg">here</A>.
</BODY></HTML>
Answer the question
In order to leave comments, you need to log in
Well, you're right about that part. If we try to do it with curl, we get exactly the same thing.
@home-tower:~$ curl -i http://google.com
HTTP/1.1 302 Found
Cache-Control: private
Content-Type: text/html; charset=UTF-8
Location: http://www.google.ru/?gfe_rd=cr&ei=EPmeVPyRK6Or8wf5-IDABA
Content-Length: 258
Date: Sat, 27 Dec 2014 18:23:12 GMT
Server: GFE/2.0
Alternate-Protocol: 80:quic,p=0.002
<HTML><HEAD><meta http-equiv="content-type" content="text/html;charset=utf-8">
<TITLE>302 Moved</TITLE></HEAD><BODY>
<H1>302 Moved</H1>
The document has moved
<A HREF="http://www.google.ru/?gfe_rd=cr&ei=EPmeVPyRK6Or8wf5-IDABA">here</A>.
</BODY></HTML>
[email protected]:~/Projects/htmlparsing$ git init
[email protected]:~/Projects/htmlparsing$ git submodule add https://github.com/lagner/QGumboParser.git lib
Cloning into 'QGumboParser'...
remote: Counting objects: 96, done.
remote: Total 96 (delta 0), reused 0 (delta 0)
Unpacking objects: 100% (96/96), done.
Checking connectivity... done.
[email protected]:~/Projects/htmlparsing$ git submodule update --init --recursive
#include <QCoreApplication>
#include <QDebug>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QNetworkReply>
#include <qgumbodocument.h>
#include <qgumbonode.h>
void requestFinished(QNetworkReply*);
void parseHtml(QString html);
int main(int argc, char *argv[])
{
QCoreApplication a(argc, argv);
QNetworkAccessManager nm;
QObject::connect(&nm, &QNetworkAccessManager::finished, requestFinished);
nm.get(QNetworkRequest(QStringLiteral("http://toster.ru/q/168437")));
return a.exec();
}
void requestFinished(QNetworkReply* rep) {
if (rep->error() == QNetworkReply::NoError) {
QByteArray rawdata = rep->readAll();
QString html = QString::fromUtf8(rawdata);
parseHtml(html);
} else {
qDebug() << "request failed: " << rep->errorString();
}
rep->deleteLater();
QCoreApplication::quit();
}
void parseHtml(QString html) {
try {
QGumboDocument doc = QGumboDocument::parse(html);
QGumboNode root = doc.rootNode();
auto nodes = root.getElementsByTagName(HtmlTag::TITLE);
for (auto& node: nodes) {
qDebug() << "title: " << node.innerText();
}
} catch (...) {
qCritical() << "smth wrong";
}
}
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question