Answer the question
In order to leave comments, you need to log in
Why does the read_csv function in pandas not properly handle integer and decimal numbers from a CSV file?
After writing this code:
import locale
import sys
from locale import atof
locale.setlocale(locale.LC_NUMERIC, '')
'en_GB.UTF-8'
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import export_graphviz
from six import StringIO
from IPython.display import Image
import pydotplus
col_names = ['Project', 'OrderDate', 'orderid', 'ClientID','IsRepeat','IsBlocked','IsManual','AutoDecision','ManualApprove','IsLoan','ShortTermAmount','ShortTermPeriod','LongTermAmount','LongTermPeriod','RequestedAmount','RequestedPeriod','LoanSum','Period','ShortTermScore','LongTermScore']
#dtypes={"Project": bool, "OrderDate": 'str', "orderid": "str", "ClientID" : "str","IsRepeat" :bool,"IsBlocked":bool,"IsManual":bool,"AutoDecision":bool,"ManualApprove":bool,"IsLoan":bool}# "ShortTermAmount":"Int64","ShortTermPeriod":"Int64","LongTermAmount":"Int64","LongTermPeriod":"Int64","RequestedAmount":"Int64","RequestedPeriod":"Int64","LoanSum":"Int64","Period":"Int64","ShortTermScore":"float64","LongTermScore":"float64"}
dtypes={"orderid": object, "ClientID" : object,"RequestedAmount":object,"RequestedPeriod":object}
parse_dates = ['OrderDate']
test = pd.read_csv("/home/man/Test_task.csv",sep=' , ', thousands=',', header=None, dtype=dtypes ,names=col_names, parse_dates=parse_dates, converters = {'Project': lambda x: bool(str(x)) if x != '-' else np.nan, 'IsRepeat': lambda x: bool(str(x)) if x != '-' else np.nan, 'IsBlocked': lambda x: bool(str(x)) if x != '-' else np.nan,'orderid': lambda x: int(x.replace(',','')) if x != '-' else np.nan, 'IsManual': lambda x: bool(str(x)) if x != '-' else np.nan, 'AutoDecision': lambda x: bool(str(x)) if x != '-' else np.nan, 'ManualApprove': lambda x: bool(str(x)) if x != '-' else np.nan, 'IsLoan': lambda x: bool(str(x)) if x != '-' else np.nan})#.fillna(0)
df = pd.DataFrame(data=test)
test.head()
'orderid': lambda x: int(x.replace(',','')) if x != '-' else np.nan,
Answer the question
In order to leave comments, you need to log in
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question