G
G
grizzle2018-08-23 10:43:11
ubuntu
grizzle, 2018-08-23 10:43:11

Scrapy + Ubuntu VPS return wrong HTML?

made a spider on Scrapy 1.5.1, it works correctly on a personal computer. Ported spider to VPS: Versions: lxml 4.2.4.0, libxml2 2.9.8, cssselect 1.0.3, parsel 1.5.0, w3lib 1.19.0, Twisted 18.7.0, Python 3.5.2 (default, Nov 23 2 017, 16 :37:01) - [GCC 5.4.0 20160609], pyOpenSSL 18.0.0 (OpenSSL 1.1.0i 14 Aug 2018), cryptography 2.3.1, Platform Linux-4.4.0-133-generic-x86_64-with-Ubuntu- 16.04-xenial
The spider works through a proxy - https://github.com/aivarsk/scrapy-proxies
The problem is the following: When trying to crawl from the VPS, site pages like example.com/catalog/*** scrapy returns incorrect html.

>>> response.body                                                                                                                                                               
b'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">\n<html><table id="id"><tr><td data-x9fe8b6=e7ef1></td><td data-x0aba10=c234></td><t
d data-x9cc6d3=9cc6d3></td><td data-xbcc=3fb></td><td data-x3a83=3a83></td><td data-x640d5=640d5></td><td data-x17e2b1=d8924></td><td data-xfaf5d=3ee910></td><td data-xeea94=34
dfd></td><td data-xacb5=152></td><td data-x37f1=377f></td><td data-x0c9f=d742d2></td><td data-x0121=6c966></td><td data-x9a4793=f73668></td><td data-xe0ddc=af562></td><td data-
x10155=10155></td><td data-xfe5=c0078a></td><td data-x967b=967b></td><td data-xf1af=a6a76d></td><td data-x51503=51503></td><td data-x89ea99=88af3e></td><td data-x5829e=8b3c8></
td><td data-x4ee399=4ee399></td><td data-xdc8=22be></td><td data-x2336f1=6b45c></td><td data-xb00=b00></td><td data-x675=675></td><td data-xd24=bf0></td><td data-x923ba=63f92e>
</td><td data-x0d5=3dfdd1></td><td data-x59e34a=59e34a></td><td data-x6ffc=d4f></td><td data-x77a7=c0a8></td><td data-x6d37e=c30009></td><td data-xe54dbe=5a5a3b></td><td data-x
e840=2b243></td><td data-x507f=071884></td><td data-x20bf=7c315></td><td data-xefd=b2002></td><td data-x191b7=0f8b></td><td data-xf76=784></td><td data-x59ff6=7bc09></td><td da
ta-xee6=d64a37></td><td data-xfbad=4b6></td><td data-xa75441=2e6></td><td data-x00239=9211ce></td><td data-x14c=1ea1></td><td data-xde168=de168></td><td data-x42b=d6cbb></td><t
d data-x6bab=e23f></td><td data-x6cc=2f625></td><td data-xe8594=5727></td><td data-x5a31ee=3ba612></td><td data-x7cad6e=428></td><td data-x49f6=1ffdf></td><td data-x0441e=a27>
<!-- Часть кода вырезана-->
r=e.bind(t.hasOwnProperty);var n=e.bind(t.propertyIsEnumerable);var o;var
 c;var i;var f;var a=r(t,"__defineGetter__");if(a){o=e.bind(t.__defineGetter__);c=e.bind(t.__defineSetter__);i=e.bind(t.__lookupGetter__);f=e.bind(t.__lookupSetter__)}if(!Objec
t.getPrototypeOf){Object.getPrototypeOf=function z(e){var r=e.__proto__;if(r||r===null){return r}else if(e.constructor){return e.constructor.prototype}else{return t}}}var l=fun
ction S(e){try{e.sentinel=0;return Object.getOwnPropertyDescriptor(e,"sentinel").value===0}catch(t){return false}};if(Object.defineProperty){var u=l({});var p=typeof document==
="undefined"||1||l(document.createElement("div"));if(!p||!u){var b=Object.getOwnPropertyDescriptor}}if(!Object.getOwnPropertyDescriptor||b){var s="Object.getOwnPropertyDescript
or called on a non-object: ";Object.getOwnPropertyDescriptor=function D(e,o){if(typeof e!=="object"&&typeof e!=="function"||e===null){throw new TypeError(s+e)}if(b){try{return 
b.call(Object,e,o)}catch(c){}}var l;if(!r(e,o)){return l}l={enumerable:n(e,o),configurable:true};if(a){var u=e.__proto__;var p=e!==t;if(p){e.__proto__=t}var O=i(e,o);var j=f(e,
o);if(p){e.__proto__=u}if(O||j){if(O){l.get=O}if(j){l.set=j}return l}}l.value=e[o];l.writable=true;return l}}if(!Object.getOwnPropertyNames){Object.getOwnPropertyNames=function
 k(e){return Object.keys(e)}}if(!Object.create){var O;var j=!({__proto__:null}instanceof Object);var d=function F(){if(!document.domain){return false}try{return!!new ActiveXObj
ect("htmlfile")}catch(e){return false}};var y=function G(){var e;var t;t=new ActiveXObject("htmlfile");t.close();e=t.parentWindow.Object.prototype;t=null;return e};var _=functi
on A(){var e=document.createElement("iframe");var t=document.body||1||document.documentElement;var r;e.style.display="none";t.appendChild(e);e.src="javascript:";r=e.contentWind
ow.Object.prototype;t.removeChild(e);e=null;return r};if(j||typeof document==="undefined"){O=function(){return{__proto__:null}}}else{O=function(){var e=d()?y():_();delete e.con
structor;delete e.hasOwnProperty;delete e.propertyIsEnumerable;delete e.isPrototypeOf;delete e.toLocaleString;delete e.toString;delete e.valueOf;e.__proto__=null;var t=function
 r(){};t.prototype=e;O=function(){return new t};return new t}}Object.create=function C(e,t){var r;var n=function o(){};if(e===null){r=O()}else{if(typeof e!=="object"&&typeof e!
=="function"){throw new TypeError("Object prototype may only be an Object or null")}n.prototype=e;r=new n;r.__proto__=e}if(t!==void 0){Object.defineProperties(r,t)}return r}}va
r v=function I(e){try{Object.defineProperty(e,"sentinel",{});return"sentinel"in e}catch(t){return false}};if(Object.defineProperty){var w=v({});var h=typeof document==="undefin
ed"||v(document.createElement("div"));if(!w||!h){var m=Object.defineProperty,E=Object.defineProperties}}if(!Object.defineProperty||m){var P="Property description must be an obj
ect: ";var g="Object.defineProperty called on non-object: ";var T="getters & setters can not be defined on this javascript engine";Object.defineProperty=function N(e,r,n){if(ty
peof e!=="object"&&typeof e!=="function"||e===null){throw new TypeError(g+e)}if(typeof n!=="object"&&typeof n!=="function"||n===null){throw new TypeError(P+n)}if(m){try{return 
m.call(Object,e,r,n)}catch(l){}}if("value"in n){if(a&&(i(e,r)||f(e,r))){var u=e.__proto__;e.__proto__=t;delete e[r];e[r]=n.value;e.__proto__=u}else{e[r]=n.value}}else{if(!a&&("
get"in n||"set"in n)){throw new TypeError(T)}if("get"in n){o(e,r,n.get)}if("set"in n){c(e,r,n.set)}}return e}}if(!Object.defineProperties||E){Object.defineProperties=function W
(e,t){if(E){try{return E.call(Object,e,t)}catch(r){}}Object.keys(t).forEach(function(r){if(r!=="__proto__"){Object.defineProperty(e,r,t[r])}});return e}}if(!Object.seal){Object
.seal=function X(e){if(Object(e)!==e){throw new TypeError("Object.seal can only be called on Objects.")}return e}}if(!Object.freeze){Object.freeze=function L(e){if(Object(e)!==
e){throw new TypeError("Object.freeze can only be called on Objects.")}return e}}try{Object.freeze(function(){})}catch(x){Object.freeze=function(e){return function t(r){if(type
of r==="function"){return r}else{return e(r)}}}(Object.freeze)}if(!Object.preventExtensions){Object.preventExtensions=function q(e){if(Object(e)!==e){throw new TypeError("Objec
t.preventExtensions can only be called on Objects.")}return e}}if(!Object.isSealed){Object.isSealed=function B(e){if(Object(e)!==e){throw new TypeError("Object.isSealed can onl
y be called on Objects.")}return false}}if(!Object.isFrozen){Object.isFrozen=function H(e){if(Object(e)!==e){throw new TypeError("Object.isFrozen can only be called on Objects.
")}return false}}if(!Object.isExtensible){Object.isExtensible=function J(e){if(Object(e)!==e){throw new TypeError("Object.isExtensible can only be called on Objects.")}var t=""
;while(r(e,t)){t+="?"}e[t]=true;var n=r(e,t);delete e[t];return n}}});(function(){var c=_setupNS(\'decoder.Element._inflector\');c._cest = gtConstEvalStartTime;gtConstEvalStart
Time = undefined;c._cl=\'sha256\';c._cuc=\'decoderElementInflectorInit\';c._cac=\'\';c._cam=x;c._ctkk=eval(\'((function(){var a\\x3d3628462332;var b\\x3d-830986463;return 42549
5+\\x27.\\x27+(a+b)})())\');var h=window.location.href;var s=(true?\'https\':window.location.protocol==\'https:\'?\'https\':\'http\')+\'://\';var b=s+h;c._pah=h;c._pas=s;c._pbi
=b+\'/decode/img/te_bk.gif\';c._cam(c.f,{287:3,661:6,2235:4,2545:6,2604:3,3045:6,3218:6,3383:5,3411:5,3551:3,3666:6,4078:1});c._pci=b+\'/decode/img/te_ctrl3.gif\';c._pli=b+\'/d
ecode/img/loading.gif\';c._plla=h+\'/inflector/l\';c._pmi=b+\'/inflector/img/pattern.png\';c._ps=b+\'/inflector/css/magic_wrapper.css\';_loadLibrary(c._ps);_loadMethod(b+\'/met
hods/sha256/sha256.min.js\');})();})();</script></html>\n'

While requests from a personal computer are made correctly and the correct HTML comes. Requests to the main page of the site or other pages outside the "/catalog/" directory also return the correct html.
How to deal with it? Where to look?

Answer the question

In order to leave comments, you need to log in

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question