Answer the question
In order to leave comments, you need to log in
How to save parsed images in different folders?
There is a task to parse images from one site. These are real estate images. I save them all in one folder. The question is: how can I save them in different folders according to the property?
The code is now like this:
from urllib.request import urlopen
from urllib.parse import urljoin
from lxml.html import fromstring
import xlsxwriter
URL = 'http://amberstarluxury.pt/index.php/ru/2013-07-22-01-32-36'
ITEM_PATH = '.display .propertyaddress'
def parse_amber():
f = urlopen(URL)
list_html = f.read().decode('utf-8')
list_doc = fromstring(list_html)
ambers = []
num = 0
for elem in list_doc.cssselect(ITEM_PATH):
a = elem.cssselect('a')[0]
href = a.get('href')
name = a.text.strip()
houseUrl = urljoin(URL, href)
amber = {'name':name, 'url':houseUrl}
#идем внутрь
detailHouseHtml = urlopen(houseUrl).read().decode('utf-8')
detailHouseDoc = fromstring(detailHouseHtml)
#селектор галереи, который здесь без класса :( поэтому делаем [1][-1]
galaryList = detailHouseDoc.cssselect('.jwts_tabbertab')[1][-1]
for pic in galaryList:
galImg = pic.cssselect('a')
for picUrl in galImg:
galImgHref = picUrl.get('href')
uopen = urlopen(galImgHref)
stream = uopen.read()
file = open('houses/{0}'.format(galImgHref[-10:]),'wb')
# имена картинок я просто срезаю срезами
file.write(stream)
file.close()
print(galImgHref)
ambers.append(amber)
num += 1
print(num)
return ambers
def export_excel(filename, ambers):
workbook = xlsxwriter.Workbook(filename)
worksheet = workbook.add_worksheet()
bold = workbook.add_format({'bold':True})
field_names = ('Название', 'Описание', 'Ссылка', 'Характеристики')
for i, field in enumerate(field_names):
worksheet.write(0, i, field, bold)
fields = ('name', 'descr', 'url')
for row, amber in enumerate(ambers, start=1):
for col, field in enumerate(fields):
worksheet.write(row, col, amber[field])
for har in amber['har']:
col +=1
worksheet.write(row, col, har)
workbook.close( )
def main():
ambers = parse_amber()
# export_excel('amber.xlsx', ambers)
if __name__ == '__main__':
main()
Answer the question
In order to leave comments, you need to log in
Maybe you need os.mkdir()?
It’s hard to understand in the code, but somewhere you jump from one object to another, create a directory for each object and save the pictures there and so on in a circle.
I did this:
class SaveImg():
def __init__(self, new_dir_name, images_url):
os.chdir('images')
os.mkdir(str(new_dir_name))
os.chdir(str(new_dir_name))
count = 1
for img in images_url:
img_name = str(count) + '.jpg'
try:
urlretrieve(img, img_name)
count += 1
except ContentTooShortError as e:
print(e)
continue
os.chdir('..')
os.chdir('..')
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question