Determine decimal point when reading batsmall data

This commit is contained in:
rasmusvt 2022-08-22 17:02:42 +02:00
parent eb2666b85d
commit 9a3efbf505

View file

@ -71,8 +71,19 @@ def read_batsmall(path):
df: pandas DataFrame containing the data as-is, but without additional NaN-columns.''' df: pandas DataFrame containing the data as-is, but without additional NaN-columns.'''
# FIXME Now it is hardcoded that the decimal is a comma. It should do a check, as datasets can vary depending on the system settings of the machine that does the data conversion # Determine if decimal point is . or ,
df = pd.read_csv(path, skiprows=2, sep='\t', decimal=',') with open(path, 'r') as f:
for i, line in enumerate(f):
if i == 10:
values = line.split()
if len(values[1].split('.')) == 2:
decimal_point = '.'
elif len(values[1].split(',')) == 2:
decimal_point = ','
df = pd.read_csv(path, skiprows=2, sep='\t', decimal=decimal_point)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')] df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
return df return df