P
P
PC_Like2020-12-29 00:17:09
Python
PC_Like, 2020-12-29 00:17:09

Python - how to work with arabic, chinese and spanish characters in url?

There is Google Sheets API, GoogleSearchConsole API. The task is to compile the consolidated table by checking the rows of both tables by the URl of the pages.

The following code is written:

1) Loading Google Sheets content into an offline file via API:

# -*- coding: utf8 -*- 
from __future__ import print_function
import pickle
import os.path
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import json

class Sheet:
    """docstring for ClassName"""
    def __init__(self):
        self.SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
        self.SAMPLE_SPREADSHEET_ID = 'list_id'
        self.SAMPLE_RANGE_NAME = 'B2:I759'

    def start(self):
        creds = None
        if os.path.exists('token.pickle'):
            with open('token.pickle', 'rb') as token:
                creds = pickle.load(token)
        if not creds or not creds.valid:
            if creds and creds.expired and creds.refresh_token:
                creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file(
                'credentials.json', self.SCOPES)
                creds = flow.run_local_server(port=0)
            with open('token.pickle', 'wb') as token:
                pickle.dump(creds, token)

        service = build('sheets', 'v4', credentials=creds)

        sheet = service.spreadsheets()
        result = sheet.values().get(spreadsheetId=self.SAMPLE_SPREADSHEET_ID,
                                range=self.SAMPLE_RANGE_NAME).execute()
        values = result.get('values', [])
        
        if not values:
            print('No data found.')
        else:
          data = []
          for row in values:
            data.append({"Страница": str(row[0]),
                "Ссылка": str(row[1]).replace('htpts', 'https'),
                "Регистрации": int(row[3]),
                "FTD": int(row[4]),
                "Deposits": float(row[5].replace(',', '.')),
                "PNL": float(row[7].replace(',', '.'))})
        print(data)
        dump = json.dumps(data)
        output = json.loads(dump)
        f = open('table.txt', 'r+', encoding="utf-8")
        f.write(dump)
        f.close()
        print(dump)


2) Request for GSC, gluing tables:
# -*- coding: utf8 -*- 
from __future__ import print_function
import argparse
import sys
from googleapiclient import sample_tools
import requests as rq
import json
from sheets_test import *

# Declare command-line flags.
argparser = argparse.ArgumentParser(add_help=False)
argparser.add_argument('property_uri', type=str,
                       help=('Site or app URI to query data for (including '
                             'trailing slash).'))
argparser.add_argument('start_date', type=str,
                       help=('Start date of the requested date range in '
                             'YYYY-MM-DD format.'))
argparser.add_argument('end_date', type=str,
                       help=('End date of the requested date range in '
                             'YYYY-MM-DD format.'))
def main(argv):
  service, flags = sample_tools.init(
    argv, 'searchconsole', 'v1', __doc__, __file__, parents=[argparser],
      scope='https://www.googleapis.com/auth/webmasters.readonly')
  request = {'startDate': flags.start_date,
  'endDate': flags.end_date,
  'dimensions': ['page']
  }
  response = execute_request(service, flags.property_uri, request)
  #print(response)
  source = response['rows']
  url = "some_URL"
  data = []
  for item in source:
    data.append({"Сайт": flags.property_uri,
                    "Страница": item['keys'][0],
                    "Клики": item['clicks'],
                    "Показы": item['impressions'],
                    "CTR": item['ctr'],
                    "Позиция": item['position']})
  #print(data)
  #dump = json.dumps(data)
  #output = json.loads(dump)
  sheets = Sheet()
  sheets.start()
  with open('table.txt', 'r', encoding="utf-8") as jsonfile:
    upload = json.load(jsonfile)
  result = []
  for row in upload:
    for item in data:
      if (item["Страница"] == row["Ссылка"]):
        if (item["Клики"] > 0):
          conversions = round(row["Регистрации"] / item["Клики"] * 100, 2)
        else:
          conversions = 0
        if (row["Регистрации"] > 0):
          ftd_percent = round(row["FTD"] / row["Регистрации"] * 100, 2)
          pnl_per_reg = round(row["PNL"] / row["Регистрации"], 2)
        else:
          ftd_percent = 0
          pnl_per_reg = 0
        result.append({"Сайт": item["Сайт"],
          "Страница": item["Страница"],
          "Клики": item["Клики"],
          "Показы": item["Показы"],
          "CTR": round(item["CTR"] * 100, 2),
          "Позиция": round(item["Позиция"], 2),
          "Регистрации": row["Регистрации"],
          "CR, %": conversions,
          "FTD": row["FTD"],
          "% FTD": ftd_percent,
          "Суммы депозитов": row["Deposits"],
          "Прибыль": row["PNL"],
          "Прибыль на регистрацию": pnl_per_reg})
  print(result)
  dump = json.dumps(result)
  payload = json.loads(dump)
  print(payload)
  req = rq.post(url, json = payload)
  status = req.status_code
  print(status)
  print(req.content)
  #print_table(response, 'Queries')

def execute_request(service, property_uri, request):
  """Executes a searchAnalytics.query request.
  Args:
    service: The searchconsole service to use when executing the query.
    property_uri: The site or app URI to request data for.
    request: The request to be executed.
  Returns:
    An array of response rows.
  """
  return service.searchanalytics().query(
      siteUrl=property_uri, body=request).execute()
if __name__ == '__main__':
  main(sys.argv)


And everything would be fine, but the resulting code normally processes only those pages whose URL contains only Latin characters. And in the Table there are pages whose URL comes with apostrophes, Chinese characters and Arabic characters. Tell me what I'm doing wrong. Thanks in advance

Answer the question

In order to leave comments, you need to log in

1 answer(s)
D
Dmitry, 2020-12-29
@dmtrrr

https://docs.python.org/3/library/urllib.parse.htm...

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question