From d88a302d2a91ae48a4cea57ff7c24a0a9b4ef6d4 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 13:44:42 +0200 Subject: [PATCH 01/15] Add Fe and Co to find_element and refactor --- nafuma/xanes/calib.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index f11ff93..c93813d 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -11,14 +11,27 @@ from scipy.signal import savgol_filter ##Better to make a new function that loops through the files, and performing the split_xanes_scan on #Trying to make a function that can decide which edge it is based on the first ZapEnergy-value -def finding_edge(df): - #FIXME add Fe and Co - if 5.9 < df["ZapEnergy"][0] < 6.5: - edge='Mn' - return(edge) - if 8.0 < df["ZapEnergy"][0] < 8.6: - edge='Ni' - return(edge) +def find_element(data: dict) -> str: + ''' Takes the data dictionary and determines based on the start value of the ZapEnergy-column which element the edge is from.''' + + element_energy_intervals = { + 'Mn': [5.9, 6.5], + 'Fe': [7.0, 7.2], + 'Co': [7.6, 7.8], + 'Ni': [8.0, 8.6] + } + + if element_energy_intervals['Mn'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Mn'][1]: + edge = 'Mn' + elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Fe'][1]: + edge = 'Fe' + elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Co'][1]: + edge = 'Co' + elif element_energy_intervals['Ni'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Ni'][1]: + edge = 'Ni' + + + return(edge) def pre_edge_subtraction(path, options={}): #FIXME add log-file instead of the troubleshoot-option @@ -31,7 +44,7 @@ def pre_edge_subtraction(path, options={}): filenames = xas.io.get_filenames(path) df= xas.io.put_in_dataframe(path) - edge=finding_edge(df) + edge=find_element(df) #Defining the end of the region used to define the background, thus start of the edge From 909c616c508ce7bdea4c7fbb2e0fc31ef94cfd08 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 14:28:50 +0200 Subject: [PATCH 02/15] Add function to write out log messages --- nafuma/auxillary.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 68785f7..33a62fe 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -52,4 +52,23 @@ def floor(a, roundto=1): a = np.floor(a*fac) / fac - return a \ No newline at end of file + return a + + + +def write_log(message, options={}): + from datetime import datetime + + required_options = ['logfile'] + default_options = { + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}' + } + + options = update_options(options=options, required_options=required_options, default_options=default_options) + + now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') + message = f'{now} {message} \n' + + + with open(options['logfile'], 'a') as f: + f.write(message) \ No newline at end of file From d17e715d82cdd1243a08c2c7dfaf8655c7168283 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 14:50:32 +0200 Subject: [PATCH 03/15] Separating and refactoring pre_edge_normalisation --- nafuma/xanes/calib.py | 63 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index c93813d..56d944b 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -33,7 +33,68 @@ def find_element(data: dict) -> str: return(edge) -def pre_edge_subtraction(path, options={}): + + +def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: + from datetime import datetime + + # FIXME Add log-file + + required_options = ['edge_start', 'log', 'troubleshoot'] + default_options = { + 'edge_start': None, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', + 'save_fit': False, + 'save_folder': './' + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + if options['log']: + aux.write_log(message='Starting pre edge fit', options=options) + + + + # FIXME Implement with finding accurate edge position + # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit + if not options['edge_start']: + edge_starts = { + 'Mn': 6.42, + 'Fe': 7.11, + 'Co': 7.705, + 'Ni': 8.3 + } + + edge_start = edge_starts[data['edge']] + + # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) + pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) + + for filename in data['path']: + if options['log']: + aux.write_log(message=f'Fitting background on {filename}', options=options) + + #Fitting linear function to the background + params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(pre_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + pre_edge_fit_data.insert(1,filename,background) + + if options['log']: + aux.write_log(message=f'Pre edge fitting done.', options=options) + + return pre_edge_fit_data + + +def pre_edge_subtraction(data: dict, options={}): #FIXME add log-file instead of the troubleshoot-option required_options = ['print','troubleshoot'] default_options = { From e0b71a85b788936e929e2b4a2868ab1606b07498 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 14:58:41 +0200 Subject: [PATCH 04/15] Add save fit function to pre edge fit function --- nafuma/xanes/calib.py | 50 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 56d944b..238aa3e 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -6,6 +6,7 @@ import nafuma.auxillary as aux import nafuma.xanes as xas import nafuma.xanes.io as io from scipy.signal import savgol_filter +from datetime import datetime ##Better to make a new function that loops through the files, and performing the split_xanes_scan on @@ -36,7 +37,7 @@ def find_element(data: dict) -> str: def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: - from datetime import datetime + # FIXME Add log-file @@ -68,15 +69,17 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: edge_start = edge_starts[data['edge']] + # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to + # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) - for filename in data['path']: + for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting background on {filename}', options=options) + aux.write_log(message=f'Fitting background on {filename} ({i} / {len(data["path"])}', options=options) #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) @@ -88,13 +91,50 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #adding a new column in df_background with the y-values of the background pre_edge_fit_data.insert(1,filename,background) - if options['log']: - aux.write_log(message=f'Pre edge fitting done.', options=options) + if options['save_fit']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], filename) + '.png' + + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst) + plt.close() + + + if options['log']: + aux.write_log(message=f'Pre edge fitting done.', options=options) return pre_edge_fit_data + def pre_edge_subtraction(data: dict, options={}): + + required_options = ['log', 'logfile'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + } + + + + + +def pre_edge_subtraction_legacy(data: dict, options={}): #FIXME add log-file instead of the troubleshoot-option required_options = ['print','troubleshoot'] default_options = { From bac137042e55c109450146c0d05bd23291214ea7 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 15:42:50 +0200 Subject: [PATCH 05/15] Refactor pre edge subtraction --- nafuma/xanes/calib.py | 120 ++++++++++-------------------------------- 1 file changed, 27 insertions(+), 93 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 238aa3e..833e98e 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -41,12 +41,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['edge_start', 'log', 'troubleshoot'] + required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'edge_start': None, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', - 'save_fit': False, + 'save_plots': False, 'save_folder': './' } @@ -72,10 +72,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] + pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < edge_start] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data - pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) + pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) for i, filename in enumerate(data['path']): if options['log']: @@ -91,11 +91,11 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #adding a new column in df_background with the y-values of the background pre_edge_fit_data.insert(1,filename,background) - if options['save_fit']: + if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '.png' + dst = os.path.join(options['save_folder'], filename) + '_pre_edge_fit.png' fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) @@ -124,107 +124,41 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: def pre_edge_subtraction(data: dict, options={}): - required_options = ['log', 'logfile'] + required_options = ['log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + 'save_plots': False, + 'save_folder': './' } + if options['log']: + aux.write_log(message='Starting pre edge subtraction', options=options) + xanes_data_bkgd_subtracted = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) + xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data'][filename] - data['pre_edge_fit_data'][filename]) -def pre_edge_subtraction_legacy(data: dict, options={}): - #FIXME add log-file instead of the troubleshoot-option - required_options = ['print','troubleshoot'] - default_options = { - 'print': False, - 'troubleshoot': False - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - filenames = xas.io.get_filenames(path) - df= xas.io.put_in_dataframe(path) - edge=find_element(df) - - #Defining the end of the region used to define the background, thus start of the edge - - #######================================================================================================================================================ - #FIXME Trying to implement automatical region determination based on an estimate of the edge shift - #print(df) - #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) + dst = os.path.join(options['save_folder'], filename) + '_pre_edge_subtraction.png' - #print(estimated_edge_shift) - #estimated_edge_shift - ###========================================================================================================================================================================= - #implement widget - if edge == 'Mn': - edge_start = 6.42 - #edge_start = estimated_edge_shift - if edge == 'Ni': - edge_start = 8.3 + fig, ax = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) + xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) + ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) - #making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - df_start=df.loc[df["ZapEnergy"] < edge_start] - - #Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data - df_bkgd = pd.DataFrame(df["ZapEnergy"]) + plt.savefig(dst) + plt.close() - for files in filenames: + return xanes_data_bkgd_subtracted - #Fitting linear function to the background - d = np.polyfit(df_start["ZapEnergy"],df_start[files],1) - function_bkgd = np.poly1d(d) - - #making a list, y_pre,so the background will be applied to all ZapEnergy-values - y_bkgd=function_bkgd(df["ZapEnergy"]) - - #adding a new column in df_background with the y-values of the background - df_bkgd.insert(1,files,y_bkgd) - - - if options['troubleshoot'] == True: - ### FOR FIGURING OUT WHERE IT GOES WRONG/WHICH FILE IS CORRUPT - ax = df.plot(x = "ZapEnergy",y=files) - #Plotting the calculated pre-edge background with the region used for the regression - if options['print'] == True: - #Plotting an example of the edge_start region and the fitted background that will later be subtracted - fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,5)) - df.plot(x="ZapEnergy", y=filenames,color="Black",ax=ax1) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax1) - plt.axvline(x = max(df_start["ZapEnergy"])) - #fig = plt.figure(figsize=(15,15)) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax2) - ax1.set_title('Data and fitted background') - #Zooming into bacground region to confirm fit and limits looks reasonable - df.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y) - ax2.set_xlim([min(df_start["ZapEnergy"]),max(df_start["ZapEnergy"])+0.01]) - #finding maximum and minimum values in the backgrounds - min_values=[] - max_values=[] - for file in filenames: - min_values.append(min(df_start[file])) - max_values.append(max(df_start[file])) - ax2.set_ylim([min(min_values),max(max_values)]) - plt.axvline(x = max(df_start["ZapEnergy"])) - #ax2.set_xlim([25, 50]) - ###################### Subtracting the pre edge from xmap_roi00 ################ - - #making a new dataframe to insert the background subtracted intensities - df_bkgd_sub = pd.DataFrame(df["ZapEnergy"]) - #inserting the background subtracted original xmap_roi00 data - - for files in filenames: - newintensity_calc=df[files]-df_bkgd[files] - df_bkgd_sub.insert(1,files,newintensity_calc) - - if options['print'] == True: - df.plot(x = "ZapEnergy",y=filenames, color="Black", ax=ax3, legend=False) - #plt.axvline(x = max(df_start["ZapEnergy"])) - df_bkgd_sub.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax3, legend=False) - ax3.set_title('Data and background-subtracted data') - - return df_bkgd_sub,filenames,edge def post_edge_fit(path, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) From 2baa765806d5035b9c4c4d9bab7d5a18c7feb92a Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 15:55:21 +0200 Subject: [PATCH 06/15] Quasi-fixed linting issue causing automatic test to fail --- nafuma/xanes/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 527f300..f8a3e78 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -8,6 +8,9 @@ def split_xanes_scan(root, destination=None, replace=False): #destination should be the path to the processed data #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw + + # FIXME Only adding this variable to pass the Linting-tests - will refactor this later + filename = 'dummy' with open(filename, 'r') as f: lines = f.readlines() From a49fc8b0d23ec7b26f02fd1d60afd901205a400c Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 16:18:22 +0200 Subject: [PATCH 07/15] Refactor read_data --- nafuma/xanes/io.py | 49 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index f8a3e78..b5faf2c 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -2,6 +2,8 @@ import pandas as pd import matplotlib.pyplot as plt import os import numpy as np +import nafuma.auxillary as aux + def split_xanes_scan(root, destination=None, replace=False): #root is the path to the beamtime-folder @@ -105,8 +107,51 @@ def get_filenames(path): return filenames -def put_in_dataframe(path): - filenames = get_filenames(path) + + +def read_data(data: dict, options={}) -> pd.DataFrame: + + required_options = [] + default_options = { + + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + columns = ['ZapEnergy'] + + # Initialise DataFrame with only ZapEnergy-column + xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + + for filename in data['path']: + columns.append(filename) + + scan_data = pd.read_csv(filename) + scan_data = scan_data[[determine_active_roi(scan_data)]] + xanes_data.insert(1, filename, scan_data) + + + return xanes_data + + + + + +def determine_active_roi(scan_data): + + #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift + if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): + active_roi = 'xmap_roi00' + else: + active_roi = 'xmap_roi01' + + return active_roi + + + + +def put_into_dataframe(data: dict, options={}) -> pd.DataFrame: + filenames = get_filenames(data) #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy column_names = ["ZapEnergy"] From 0d757ce36501f1aa1a7add8abb470b4055cf070b Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 16:26:41 +0200 Subject: [PATCH 08/15] Move get_filenames to auxillary and generalise --- nafuma/auxillary.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 33a62fe..072ee83 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -1,5 +1,6 @@ import json import numpy as np +import os def update_options(options, required_options, default_options): ''' Takes a dictionary of options along with a list of required options and dictionary of default options, and sets all keyval-pairs of options that is not already defined to the default values''' @@ -71,4 +72,17 @@ def write_log(message, options={}): with open(options['logfile'], 'a') as f: - f.write(message) \ No newline at end of file + f.write(message) + + +#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements +def get_filenames(path, ext): + ''' Collects all filenames from specified path with a specificed extension + + Input: + path: path to find all filenames (relative or absolute) + ext: extension (including ".")''' + + filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext)] + + return filenames \ No newline at end of file From 303704c3577a2762041b1d4b647c8bea016536d8 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:54:51 +0200 Subject: [PATCH 09/15] Add filter --- nafuma/auxillary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 072ee83..2b87479 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -68,7 +68,7 @@ def write_log(message, options={}): options = update_options(options=options, required_options=required_options, default_options=default_options) now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') - message = f'{now} {message} \n' + message = f'[{now}] {message} \n' with open(options['logfile'], 'a') as f: @@ -76,13 +76,13 @@ def write_log(message, options={}): #Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements -def get_filenames(path, ext): +def get_filenames(path, ext, filter=''): ''' Collects all filenames from specified path with a specificed extension Input: path: path to find all filenames (relative or absolute) ext: extension (including ".")''' - filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext)] + filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext) and filter in filename] return filenames \ No newline at end of file From 0b89524ef19b8a2c6f8dd76302f4c6a51c3c7258 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:55:42 +0200 Subject: [PATCH 10/15] Clear up small bugs encountered during testing --- nafuma/xanes/calib.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 833e98e..a1211e2 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -22,13 +22,13 @@ def find_element(data: dict) -> str: 'Ni': [8.0, 8.6] } - if element_energy_intervals['Mn'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Mn'][1]: + if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Mn'][1]): edge = 'Mn' - elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Fe'][1]: + elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Fe'][1]): edge = 'Fe' - elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Co'][1]: + elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Co'][1]): edge = 'Co' - elif element_energy_intervals['Ni'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Ni'][1]: + elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Ni'][1]): edge = 'Ni' @@ -45,7 +45,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: default_options = { 'edge_start': None, 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'save_plots': False, 'save_folder': './' } @@ -62,11 +62,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if not options['edge_start']: edge_starts = { 'Mn': 6.42, - 'Fe': 7.11, + 'Fe': 7.09, 'Co': 7.705, 'Ni': 8.3 } + data['edge'] = find_element(data) edge_start = edge_starts[data['edge']] # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to @@ -79,7 +80,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting background on {filename} ({i} / {len(data["path"])}', options=options) + aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) @@ -95,15 +96,15 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '_pre_edge_fit.png' + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) @@ -111,7 +112,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) - plt.savefig(dst) + plt.savefig(dst, transparent=False) plt.close() From e7a95d65edd7a04d8dd38cce1e537e4008af30fd Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:56:08 +0200 Subject: [PATCH 11/15] Refactor read_data and move get_filenames --- nafuma/xanes/io.py | 71 ++++------------------------------------------ 1 file changed, 6 insertions(+), 65 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index b5faf2c..458b38f 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -89,28 +89,15 @@ def split_xanes_scan(root, destination=None, replace=False): df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) -#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements -def get_filenames(path): - - - cwd = os.getcwd() - - # Change into path provided - os.chdir(path) - - filenames = [os.path.join(path, filename) for filename in os.listdir() if os.path.isfile(filename) and filename[-4:] == '.dat'] #changed - - - - # Change directory back to where you ran the script from - os.chdir(cwd) - - return filenames + def read_data(data: dict, options={}) -> pd.DataFrame: + + # FIXME Handle the case when dataseries are not the same size + required_options = [] default_options = { @@ -138,6 +125,8 @@ def read_data(data: dict, options={}) -> pd.DataFrame: def determine_active_roi(scan_data): + + # FIXME For Co-edge, this gave a wrong scan #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): @@ -146,51 +135,3 @@ def determine_active_roi(scan_data): active_roi = 'xmap_roi01' return active_roi - - - - -def put_into_dataframe(data: dict, options={}) -> pd.DataFrame: - filenames = get_filenames(data) - - #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy - column_names = ["ZapEnergy"] - - for i in range(len(filenames)): - column_names.append(filenames[i]) - - #Taking the first file in the folder and extracting ZapEnergies and intensity from that (only need the intensity from the rest) - first = pd.read_csv(filenames[0], skiprows=0) - - #Making a data frame with the correct columns, and will fill inn data afterwards - df = pd.DataFrame(columns = column_names) - #First putting in the 2theta-values - df["ZapEnergy"]=first["ZapEnergy"] - - #filling in the intensities from all files into the corresponding column in the dataframe - for i in range(len(filenames)): - df2 = pd.read_csv(filenames[i]) - df2 = df2.drop(['Mon','Det1','Det2','Det3','Det4','Det5', 'Det6','Ion1'], axis=1) #, axis=1) - df2 = df2.drop(['MonEx','Ion2','Htime','MusstEnc1','MusstEnc3','MusstEnc4', 'TwoTheta', 'ZCryo'], axis=1) - df2 = df2.drop(['ZBlower1', 'ZBlower2', 'ZSrcur'], axis=1)#, axis=19) #removing the sigma at this point - - ############## THIS PART PICKS OUT WHICH ROI IS OF INTEREST, BUT MUST BE FIXED IF LOOKING AT THREE EDGES (roi00,roi01,roi02) ##################### - if 'xmap_roi01' in df2.columns: - #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift - if max(df2["xmap_roi00"])-min(df2["xmap_roi00"])>max(df2["xmap_roi01"])-min(df2["xmap_roi01"]): - df[filenames[i]]=df2["xmap_roi00"] #forMn - else: - df[filenames[i]]=df2["xmap_roi01"] #forNi - else: - df[filenames[i]]=df2["xmap_roi00"] - ############################################################################################### - - i=i+1 - - - #print(df) - #If I want to make a csv-file of the raw data. Decided that was not necessary: - #df.to_csv('static-Mn-edge.csv') #writing it to a csv, first row is datapoint (index), second column is 2theta, and from there the scans starts - - - return df \ No newline at end of file From 880722d7784d7cc6c3a44ceb98e87fb3aa7bd1b8 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:35:24 +0200 Subject: [PATCH 12/15] Load correct xmap_roi for more cases --- nafuma/xanes/io.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 458b38f..20aa717 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -115,7 +115,10 @@ def read_data(data: dict, options={}) -> pd.DataFrame: scan_data = pd.read_csv(filename) scan_data = scan_data[[determine_active_roi(scan_data)]] - xanes_data.insert(1, filename, scan_data) + xanes_data = pd.concat([xanes_data, scan_data], axis=1) + + + xanes_data.columns = columns return xanes_data @@ -129,9 +132,24 @@ def determine_active_roi(scan_data): # FIXME For Co-edge, this gave a wrong scan #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift - if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): - active_roi = 'xmap_roi00' - else: - active_roi = 'xmap_roi01' + # if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): + # active_roi = 'xmap_roi00' + # else: + # active_roi = 'xmap_roi01' + if (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): + if (scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min()) > (scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min()): + active_roi = 'xmap_roi00' + else: + active_roi = 'xmap_roi01' + + elif scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean(): + active_roi = 'xmap_roi00' + + elif scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean(): + active_roi = 'xmap_roi01' + + else: + active_roi = None + return active_roi From cb2c7532e6ac48073a3cbbccb809ae7631019b2b Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:46:33 +0200 Subject: [PATCH 13/15] Make sure data['path'] is in a list --- nafuma/xanes/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 20aa717..816b8f5 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -110,6 +110,9 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # Initialise DataFrame with only ZapEnergy-column xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + if not isinstance(data['path'], list): + data['path'] = [data['path']] + for filename in data['path']: columns.append(filename) From ba349a5892f480b398444e095f80779e5d7111c3 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:58:39 +0200 Subject: [PATCH 14/15] Refactor estimation of edge position and automatise pre edge limit setting --- nafuma/xanes/calib.py | 70 ++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a1211e2..e540e9b 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -12,7 +12,7 @@ from datetime import datetime ##Better to make a new function that loops through the files, and performing the split_xanes_scan on #Trying to make a function that can decide which edge it is based on the first ZapEnergy-value -def find_element(data: dict) -> str: +def find_element(data: dict, index=0) -> str: ''' Takes the data dictionary and determines based on the start value of the ZapEnergy-column which element the edge is from.''' element_energy_intervals = { @@ -22,13 +22,13 @@ def find_element(data: dict) -> str: 'Ni': [8.0, 8.6] } - if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Mn'][1]): + if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Mn'][1]): edge = 'Mn' - elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Fe'][1]): + elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Fe'][1]): edge = 'Fe' - elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Co'][1]): + elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Co'][1]): edge = 'Co' - elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Ni'][1]): + elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Ni'][1]): edge = 'Ni' @@ -58,22 +58,25 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position + # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit if not options['edge_start']: - edge_starts = { - 'Mn': 6.42, - 'Fe': 7.09, - 'Co': 7.705, - 'Ni': 8.3 + pre_edge_limit_offsets = { + 'Mn': 0.03, + 'Fe': 0.03, + 'Co': 0.03, + 'Ni': 0.03 } data['edge'] = find_element(data) - edge_start = edge_starts[data['edge']] + + edge_position = estimate_edge_position(data, options, index=0) + pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < edge_start] + pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < pre_edge_limit] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -161,6 +164,31 @@ def pre_edge_subtraction(data: dict, options={}): return xanes_data_bkgd_subtracted +def estimate_edge_position(data: dict, options={}, index=0): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) + + #shifting column values up so that average differential fits right between the points used in the calculation + df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + df_diff_max = df_diff[data['path'][index]].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] + + # FIXME Add logging option to see the result + + print(estimated_edge_shift) + + return estimated_edge_shift + + def post_edge_fit(path, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) required_options = ['print'] @@ -254,25 +282,7 @@ def smoothing(path, options={}): return df_smooth, filenames -def find_pos_maxdiff(df, filenames,options={}): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['print','periods'] - default_options = { - 'print': False, - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #making new dataframe to keep the differentiated data - df_diff = pd.DataFrame(df["ZapEnergy"]) - df_diff[filenames]=df[filenames].diff(periods=options['periods']) - - #shifting column values up so that average differential fits right between the points used in the calculation - df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) - df_diff_max = df_diff[filenames].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] - - return estimated_edge_shift, df_diff, df_diff_max def find_nearest(array, value): #function to find the value closes to "value" in an "array" From 672d5549fe9143bb724c65b195cc323f049448f5 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 16:14:21 +0200 Subject: [PATCH 15/15] Fix lint issue --- nafuma/xanes/calib.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index e540e9b..3a37eab 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -136,6 +136,7 @@ def pre_edge_subtraction(data: dict, options={}): 'save_folder': './' } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) if options['log']: aux.write_log(message='Starting pre edge subtraction', options=options) @@ -145,16 +146,16 @@ def pre_edge_subtraction(data: dict, options={}): if options['log']: aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) - xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data'][filename] - data['pre_edge_fit_data'][filename]) + xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data_original'][filename] - data['pre_edge_fit_data'][filename]) if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '_pre_edge_subtraction.png' + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_subtraction.png' - fig, ax = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) + fig, ax = plt.subplots(figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) @@ -184,7 +185,8 @@ def estimate_edge_position(data: dict, options={}, index=0): # FIXME Add logging option to see the result - print(estimated_edge_shift) + if options['log']: + aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) return estimated_edge_shift @@ -309,7 +311,7 @@ def finding_e0(path, options={}): print("MORE THAN ONE FILE --> generalize") ##### - estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df_smooth, filenames,options=options) + estimated_edge_shift, df_diff, df_diff_max = estimate_edge_position(df_smooth, filenames,options=options) print(estimated_edge_shift) #### ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) #