Answer the question
In order to leave comments, you need to log in
How to convert strings from characters '\u0434\u0438…' to a variable in Python 2.7 (Windows 7)?
The string variable contains the code as shown below. I need to convert the text in a variable into readable Russian text so that I can work with the text further. How to do it?
Example:
string = u'\u0434\u0438\u0435\u0442\u0430, \u0434\u0438\u0435\u0442\u0430 \u0434\u043b\u044f \u043f\u043e\u0445\u0443\u0434\u0435\u043d\u0438\u044f'
print string >>>>> диета, диета для похудения
Answer the question
In order to leave comments, you need to log in
Now you can fix it like this
a = u'1a'
a = a.decode('utf-8').encode('cp1251')
print a
a = u'1\u0430'
a = a.encode('UTF-8').decode('UTF-8').encode('cp1251')
print a
#~ Не нужно путать юникод и UTF-8 !!! utf-8, как и cp1251 с точки зрения питона2.* - массив байт.
msg=u'1\u0430'
if len(str(msg))!==0
if isinstance(msg, unicode):
#~ print type(msg)
#~ print encodings(get_codepage(msg))
print msg.encode('UTF-8').decode('UTF-8').encode('cp1251')
else:
if get_codepage(msg)=='UTF-8':
print msg.decode('UTF-8').encode('cp1251')
else:
print msg
encodings = {
'UTF-8': 'utf-8',
'CP1251': 'windows-1251',
'KOI8-R': 'koi8-r',
'IBM866': 'ibm866',
'ISO-8859-5': 'iso-8859-5',
'MAC': 'mac',
}
def get_codepage(str = None):
uppercase = 1
lowercase = 3
utfupper = 5
utflower = 7
codepages = {}
for enc in encodings.keys():
codepages[enc] = 0
if str is not None and len(str) > 0:
last_simb = 0
for simb in str:
simb_ord = ord(simb)
"""non-russian characters"""
if simb_ord < 128 or simb_ord > 256:
continue
"""UTF-8"""
if last_simb == 208 and (143 < simb_ord < 176 or simb_ord == 129):
codepages['UTF-8'] += (utfupper * 2)
if (last_simb == 208 and (simb_ord == 145 or 175 < simb_ord < 192)) \
or (last_simb == 209 and (127 < simb_ord < 144)):
codepages['UTF-8'] += (utflower * 2)
"""CP1251"""
if 223 < simb_ord < 256 or simb_ord == 184:
codepages['CP1251'] += lowercase
if 191 < simb_ord < 224 or simb_ord == 168:
codepages['CP1251'] += uppercase
"""KOI8-R"""
if 191 < simb_ord < 224 or simb_ord == 163:
codepages['KOI8-R'] += lowercase
if 222 < simb_ord < 256 or simb_ord == 179:
codepages['KOI8-R'] += uppercase
"""IBM866"""
if 159 < simb_ord < 176 or 223 < simb_ord < 241:
codepages['IBM866'] += lowercase
if 127 < simb_ord < 160 or simb_ord == 241:
codepages['IBM866'] += uppercase
"""ISO-8859-5"""
if 207 < simb_ord < 240 or simb_ord == 161:
codepages['ISO-8859-5'] += lowercase
if 175 < simb_ord < 208 or simb_ord == 241:
codepages['ISO-8859-5'] += uppercase
"""MAC"""
if 221 < simb_ord < 255:
codepages['MAC'] += lowercase
if 127 < simb_ord < 160:
codepages['MAC'] += uppercase
last_simb = simb_ord
idx = ''
max = 0
for item in codepages:
if codepages[item] > max:
max = codepages[item]
idx = item
return idx
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question