Answer the question
In order to leave comments, you need to log in
How to implement JavaScript parsing of a web page that requires authorization?
Hello.
I need to implement a module (in Python) that collects data from web pages with the following features:
- the HTML code of the page is generated by JavaScript
- Access to the pages is implemented through an authorization mechanism
I, in principle, found a way to run JavaScript to get HTML, which subsequently will be collected by the parser.
The code is based on this tip :
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import base64
from PyQt4.QtGui import *
from PyQt4.QtCore import *
from PyQt4.QtWebKit import *
from PyQt4 import QtNetwork
class Render(QWebPage):
def __init__(self, url):
self.app = QApplication(sys.argv)
username = 'user'
password = 'pass'
base64string = base64.encodestring('%s:%s' % (username, password))[:-1]
authheader = "Basic %s" % base64string
headerKey = QByteArray("Authorization")
headerValue = QByteArray(authheader)
url = QUrl(url)
req = QtNetwork.QNetworkRequest()
req.setRawHeader(headerKey, headerValue)
req.setUrl(url)
QWebPage.__init__(self)
self.loadFinished.connect(self._loadFinished)
self.mainFrame().load(req)
self.app.exec_()
def _loadFinished(self, result):
self.frame = self.mainFrame()
self.app.quit()
def main():
url = 'http://omegaqa.corp.emc.com/omega/Apps/AppsContainer.asp?DefaultAppID=6'
r = Render(url)
html = r.frame.toHtml()
print unicode(html, "utf-8", errors="ignore")
if __name__ == '__main__':
main()
res = requests.get(url, auth=('user', 'password'))
Answer the question
In order to leave comments, you need to log in
You need to use mechanize.Browser() .
He can both log in first and pull the page.
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question