From c9660109cb32297838e1c3ac100401c4a56a86aa Mon Sep 17 00:00:00 2001 From: halvorhv Date: Wed, 27 Apr 2022 10:37:02 +0200 Subject: [PATCH 01/56] adding a smoothing function --- nafuma/xanes/calib.py | 59 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 11 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index ad3443f..fb436cb 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -2,9 +2,11 @@ import pandas as pd import numpy as np import os import matplotlib.pyplot as plt -import beamtime.auxillary as aux -import beamtime.xanes as xas -import beamtime.xanes.io as io +import nafuma.auxillary as aux +import nafuma.xanes as xas +import nafuma.xanes.io as io +from scipy.signal import savgol_filter + def rbkerbest(): print("ROSENBORG!<3") @@ -24,11 +26,10 @@ def finding_edge(df): edge='Ni' return(edge) -<<<<<<< HEAD:beamtime/xanes/calib.py #def pre_edge_subtraction(df,filenames, options={}): def test(innmat): df_test= xas.io.put_in_dataframe(innmat) - print(df_test) + #print(df_test) def pre_edge_subtraction(path, options={}): required_options = ['print','troubleshoot'] @@ -41,11 +42,6 @@ def pre_edge_subtraction(path, options={}): filenames = xas.io.get_filenames(path) df= xas.io.put_in_dataframe(path) edge=finding_edge(df) -======= -def split_xanes_scan(filename, destination=None, replace=False): - #root is the path to the beamtime-folder - #destination should be the path to the processed data ->>>>>>> master:nafuma/xanes/calib.py #Defining the end of the region used to define the background, thus start of the edge #implement widget @@ -154,4 +150,45 @@ def post_edge_normalization(path, options={}): df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) plt.axvline(x = min(df_end["ZapEnergy"])) - return df_bkgd_sub, df_postedge \ No newline at end of file + return df_bkgd_sub, df_postedge, filenames, edge + +def smoothing(path, options={}): + required_options = ['print','window_length','polyorder'] + default_options = { + 'print': False, + 'window_length': 3, + 'polyorder': 2 + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path) + #================= SMOOTHING + df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) + df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) + #df_smooth[filenames] = df_bkgd_sub.iloc[:,2].rolling(window=rolling_av).mean() + #df_smooth[filenames] = df_smooth[filenames].shift(-int((rolling_av)/2)) + for filename in filenames: + x=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) + df_smooth[filename] = x + x_default=savgol_filter(df_bkgd_sub[filename],default_options['window_length'],default_options['polyorder']) + df_default[filename] = x_default + + if options['print'] == True: + fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) + x_range_zoom=[6.54,6.55] #make into widget + y_range_zoom=[20000,80000] #make into widget + + df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") + df_smooth.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Blue") + ax1.set_xlim(x_range_zoom) + ax1.set_ylim(y_range_zoom) + ax1.set_title("Smoothed curve (blue) vs data (red) used for further analysis") + + df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") + df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Blue") + ax2.set_xlim(x_range_zoom) + ax2.set_ylim(y_range_zoom) + ax2.set_title("Smoothed curve (blue) vs data (red) using default window_length and polyorder") + + + From 8ce15574398d848354f5e4ad2983d32e01eaf7f4 Mon Sep 17 00:00:00 2001 From: halvorhv Date: Wed, 15 Jun 2022 10:00:13 +0200 Subject: [PATCH 02/56] finding e0 --- nafuma/xanes/calib.py | 191 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 183 insertions(+), 8 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index fb436cb..f7b57c6 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -44,9 +44,19 @@ def pre_edge_subtraction(path, options={}): edge=finding_edge(df) #Defining the end of the region used to define the background, thus start of the edge + + #######================================================================================================================================================ + #Trying to implement automatical region determination based on an estimate of the edge shift + #print(df) + #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) + + #print(estimated_edge_shift) + #estimated_edge_shift + ###========================================================================================================================================================================= #implement widget if edge == 'Mn': edge_start = 6.42 + #edge_start = estimated_edge_shift if edge == 'Ni': edge_start = 8.3 @@ -120,7 +130,7 @@ def post_edge_normalization(path, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub,filenames,edge = pre_edge_subtraction(path) + df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge #Implement widget if edge == 'Mn': @@ -161,34 +171,199 @@ def smoothing(path, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path) + df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path,options=options) #================= SMOOTHING df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #df_smooth[filenames] = df_bkgd_sub.iloc[:,2].rolling(window=rolling_av).mean() #df_smooth[filenames] = df_smooth[filenames].shift(-int((rolling_av)/2)) for filename in filenames: - x=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) - df_smooth[filename] = x + x_smooth=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) + df_smooth[filename] = x_smooth x_default=savgol_filter(df_bkgd_sub[filename],default_options['window_length'],default_options['polyorder']) df_default[filename] = x_default + + + #printing the smoothed curves vs data if options['print'] == True: + + ## ================================================ + #df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) + #df_diff_estimated_max = df_diff[filenames].dropna().max() + + + #estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + # ========================================== + + fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) x_range_zoom=[6.54,6.55] #make into widget y_range_zoom=[20000,80000] #make into widget - df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") + df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") df_smooth.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Blue") ax1.set_xlim(x_range_zoom) ax1.set_ylim(y_range_zoom) ax1.set_title("Smoothed curve (blue) vs data (red) used for further analysis") - df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") - df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Blue") + df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") + df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Green") ax2.set_xlim(x_range_zoom) ax2.set_ylim(y_range_zoom) - ax2.set_title("Smoothed curve (blue) vs data (red) using default window_length and polyorder") + ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + return df_smooth, filenames + +def find_pos_maxdiff(df, filenames,options={}): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(df["ZapEnergy"]) + df_diff[filenames]=df[filenames].diff(periods=options['periods']) + + #shifting column values up so that average differential fits right between the points used in the calculation + df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) + df_diff_max = df_diff[filenames].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + + return estimated_edge_shift, df_diff, df_diff_max + +def find_nearest(array, value): + #function to find the value closes to "value" in an "array" + array = np.asarray(array) + idx = (np.abs(array - value)).argmin() + return array[idx] + +def finding_e0(path, options={}): + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + df_smooth, filenames = smoothing(path, options=options) #This way the smoothing is printed as long as the "finding e0" is printed. + + if options['periods'] % 2 == 1: + print("NB!!!!!!!!!!!!!!!!! Periods needs to be an even number for the shifting of values to work properly") + ###df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) # + if len(filenames) == 1: + filenames=filenames[0] + else: + print("MORE THAN ONE FILE --> generalize") + + ##### + estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df_smooth, filenames,options=options) + print(estimated_edge_shift) + #### + ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # + df_doublediff=pd.DataFrame(df_smooth["ZapEnergy"]) + df_doublediff[filenames]=df_diff[filenames].diff(periods=options['periods']) + + if options['print'] == True: + fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) + + df_diff.plot(x = "ZapEnergy",y=filenames, ax=ax1) #defining x and y + df_doublediff.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y + + #shifting column values up so that average differential fits right between the points used in the calculation + #df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) # + df_doublediff[filenames]=df_doublediff[filenames].shift(-int(options['periods'])) + + #finding maximum value to maneuver to the correct part of the data set + #df_diff_max = df_diff[filenames].dropna().max() + + + estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + + fit_region = 0.0004 + df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] + df_diff_edge=df_diff_edge.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-fit_region)] + + + + + df_doublediff_edge=df_doublediff.loc[(df_doublediff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] + df_doublediff_edge=df_doublediff_edge.loc[(df_doublediff["ZapEnergy"] > estimated_edge_shift-fit_region)] + #df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-0.15) and (df_diff["ZapEnergy"] < estimated_edge_shift+0.15)] + + #df_diff_edge=df_diff.loc[df_diff["ZapEnergy"] > estimated_edge_shift-0.15] + #print(df_diff_edge) + if options['print'] == True: + fig, (ax3,ax4) = plt.subplots(1,2,figsize=(15,5)) + + df_diff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax3) #defining x and y + ax3.set_title("Zoomed into edge region (derivative))") + ax3.axvline(x = estimated_edge_shift) + + df_doublediff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax4,kind="scatter") #defining x and y + ax4.set_title("Zoomed into edge region (double derivative)") + ax4.axvline(x = estimated_edge_shift) + ax4.axhline(0) + + #ax1.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) + #ax1.set_title("not sure what this is tbh") + + #ax2.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) + #ax2.set_title("not sure what this is either tbh") + + #============== + #df_smooth=df_smooth2 + #================= + + + + + #========================== fitting first differential ========== + df_diff = df_diff[df_diff[filenames].notna()] + + #fitting a function to the chosen interval + d = np.polyfit(df_diff_edge["ZapEnergy"],df_diff_edge[filenames],2) + function_diff = np.poly1d(d) + + x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=1000) + y_diff=function_diff(x_diff) + #print(df_diff_edge["ZapEnergy"].iloc[-1]) + if options['print'] == True: + ax3.plot(x_diff,y_diff,color='Green') + + #y_diff_max=np.amax(y_diff,0) + y_diff_max_index = np.where(y_diff == np.amax(y_diff)) + #print(y_diff_max_index[0]) + edge_shift_diff=float(x_diff[y_diff_max_index]) + print("Edge shift estimated by the differential maximum is "+str(round(edge_shift_diff,5))) + if options['print'] == True: + ax3.axvline(x=edge_shift_diff,color="green") + #print(df_doublediff_edge["ZapEnergy"].iloc[0]) + #ax4.plot(x_doublediff,y_doublediff,color='Green')) + + + #fitting double differentiate + df_doublediff = df_doublediff[df_doublediff[filenames].notna()] + d = np.polyfit(df_doublediff_edge["ZapEnergy"],df_doublediff_edge[filenames],2) + function_doublediff = np.poly1d(d) + + x_doublediff=np.linspace(df_doublediff_edge["ZapEnergy"].iloc[0],df_doublediff_edge["ZapEnergy"].iloc[-1],num=10000) + y_doublediff=function_doublediff(x_doublediff) + + if options['print'] == True: + ax4.plot(x_doublediff,y_doublediff,color='Green') + + y_doublediff_zero=find_nearest(y_doublediff,0) + y_doublediff_zero_index = np.where(y_doublediff == y_doublediff_zero) + + edge_shift_doublediff=float(x_doublediff[y_doublediff_zero_index]) + + print("Edge shift estimated by the double differential zero-point is "+str(round(edge_shift_doublediff,5))) + if options['print'] == True: + ax4.axvline(x=edge_shift_doublediff,color="green") + From 7676bd06af5720bb10cfea269e9d38a20e75b8a2 Mon Sep 17 00:00:00 2001 From: halvorhv Date: Wed, 15 Jun 2022 13:21:26 +0200 Subject: [PATCH 03/56] Adding FIXME's --- nafuma/xanes/calib.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index f7b57c6..f11ff93 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -7,18 +7,12 @@ import nafuma.xanes as xas import nafuma.xanes.io as io from scipy.signal import savgol_filter -def rbkerbest(): - print("ROSENBORG!<3") - -#def split_xanes_scan(filename, destination=None): - - # with open(filename, 'r') as f: - ##Better to make a new function that loops through the files, and performing the split_xanes_scan on -#Tryiung to make a function that can decide which edge it is based on the first ZapEnergy-value +#Trying to make a function that can decide which edge it is based on the first ZapEnergy-value def finding_edge(df): + #FIXME add Fe and Co if 5.9 < df["ZapEnergy"][0] < 6.5: edge='Mn' return(edge) @@ -26,12 +20,8 @@ def finding_edge(df): edge='Ni' return(edge) -#def pre_edge_subtraction(df,filenames, options={}): -def test(innmat): - df_test= xas.io.put_in_dataframe(innmat) - #print(df_test) - def pre_edge_subtraction(path, options={}): + #FIXME add log-file instead of the troubleshoot-option required_options = ['print','troubleshoot'] default_options = { 'print': False, @@ -46,7 +36,7 @@ def pre_edge_subtraction(path, options={}): #Defining the end of the region used to define the background, thus start of the edge #######================================================================================================================================================ - #Trying to implement automatical region determination based on an estimate of the edge shift + #FIXME Trying to implement automatical region determination based on an estimate of the edge shift #print(df) #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) @@ -122,8 +112,8 @@ def pre_edge_subtraction(path, options={}): return df_bkgd_sub,filenames,edge -def post_edge_normalization(path, options={}): - +def post_edge_fit(path, options={}): + #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) required_options = ['print'] default_options = { 'print': False @@ -132,7 +122,7 @@ def post_edge_normalization(path, options={}): df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge - #Implement widget + #FIXME Use rought edge shift estimate, add X eV as first guess, have an option to adjust this value with widget if edge == 'Mn': edge_stop = 6.565 if edge == 'Ni': @@ -171,7 +161,7 @@ def smoothing(path, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path,options=options) + df_bkgd_sub, df_postedge, filenames, edge = post_edge_fit(path,options=options) #================= SMOOTHING df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) From d88a302d2a91ae48a4cea57ff7c24a0a9b4ef6d4 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 13:44:42 +0200 Subject: [PATCH 04/56] Add Fe and Co to find_element and refactor --- nafuma/xanes/calib.py | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index f11ff93..c93813d 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -11,14 +11,27 @@ from scipy.signal import savgol_filter ##Better to make a new function that loops through the files, and performing the split_xanes_scan on #Trying to make a function that can decide which edge it is based on the first ZapEnergy-value -def finding_edge(df): - #FIXME add Fe and Co - if 5.9 < df["ZapEnergy"][0] < 6.5: - edge='Mn' - return(edge) - if 8.0 < df["ZapEnergy"][0] < 8.6: - edge='Ni' - return(edge) +def find_element(data: dict) -> str: + ''' Takes the data dictionary and determines based on the start value of the ZapEnergy-column which element the edge is from.''' + + element_energy_intervals = { + 'Mn': [5.9, 6.5], + 'Fe': [7.0, 7.2], + 'Co': [7.6, 7.8], + 'Ni': [8.0, 8.6] + } + + if element_energy_intervals['Mn'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Mn'][1]: + edge = 'Mn' + elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Fe'][1]: + edge = 'Fe' + elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Co'][1]: + edge = 'Co' + elif element_energy_intervals['Ni'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Ni'][1]: + edge = 'Ni' + + + return(edge) def pre_edge_subtraction(path, options={}): #FIXME add log-file instead of the troubleshoot-option @@ -31,7 +44,7 @@ def pre_edge_subtraction(path, options={}): filenames = xas.io.get_filenames(path) df= xas.io.put_in_dataframe(path) - edge=finding_edge(df) + edge=find_element(df) #Defining the end of the region used to define the background, thus start of the edge From 909c616c508ce7bdea4c7fbb2e0fc31ef94cfd08 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 14:28:50 +0200 Subject: [PATCH 05/56] Add function to write out log messages --- nafuma/auxillary.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 68785f7..33a62fe 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -52,4 +52,23 @@ def floor(a, roundto=1): a = np.floor(a*fac) / fac - return a \ No newline at end of file + return a + + + +def write_log(message, options={}): + from datetime import datetime + + required_options = ['logfile'] + default_options = { + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}' + } + + options = update_options(options=options, required_options=required_options, default_options=default_options) + + now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') + message = f'{now} {message} \n' + + + with open(options['logfile'], 'a') as f: + f.write(message) \ No newline at end of file From d17e715d82cdd1243a08c2c7dfaf8655c7168283 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 15 Jun 2022 14:50:32 +0200 Subject: [PATCH 06/56] Separating and refactoring pre_edge_normalisation --- nafuma/xanes/calib.py | 63 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index c93813d..56d944b 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -33,7 +33,68 @@ def find_element(data: dict) -> str: return(edge) -def pre_edge_subtraction(path, options={}): + + +def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: + from datetime import datetime + + # FIXME Add log-file + + required_options = ['edge_start', 'log', 'troubleshoot'] + default_options = { + 'edge_start': None, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', + 'save_fit': False, + 'save_folder': './' + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + if options['log']: + aux.write_log(message='Starting pre edge fit', options=options) + + + + # FIXME Implement with finding accurate edge position + # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit + if not options['edge_start']: + edge_starts = { + 'Mn': 6.42, + 'Fe': 7.11, + 'Co': 7.705, + 'Ni': 8.3 + } + + edge_start = edge_starts[data['edge']] + + # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) + pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) + + for filename in data['path']: + if options['log']: + aux.write_log(message=f'Fitting background on {filename}', options=options) + + #Fitting linear function to the background + params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(pre_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + pre_edge_fit_data.insert(1,filename,background) + + if options['log']: + aux.write_log(message=f'Pre edge fitting done.', options=options) + + return pre_edge_fit_data + + +def pre_edge_subtraction(data: dict, options={}): #FIXME add log-file instead of the troubleshoot-option required_options = ['print','troubleshoot'] default_options = { From 7485adef07e9df407647ea061e78711a0f72956f Mon Sep 17 00:00:00 2001 From: halvorhv Date: Wed, 15 Jun 2022 16:00:47 +0200 Subject: [PATCH 07/56] Adding sketch for normalization and flattening --- nafuma/xanes/calib.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index f11ff93..5579574 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -356,4 +356,30 @@ def finding_e0(path, options={}): print("Edge shift estimated by the double differential zero-point is "+str(round(edge_shift_doublediff,5))) if options['print'] == True: ax4.axvline(x=edge_shift_doublediff,color="green") + + return df_smooth, filenames, edge_shift_diff + +def normalization(data,options={}): + required_options = ['print'] + default_options = { + 'print': False, + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #Finding the normalization constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 + normalization_constant=post_edge_fit_function(e0) - pre_edge_fit_function(e0) + #subtracting background (as in pre_edge_subtraction) + + #dividing the background-subtracted data with the normalization constant + + +def flattening(data,options={}): + #only picking out zapenergy-values higher than edge position (edge pos and below remains untouched) + df_e0_and_above=df.loc[df['ZapEnergy'] > edge_shift_diff] + + flattened_data = post_edge_fit_function(df_e0_and_above['ZapEnergy']) - pre_edge_fit_function(df_e0_and_above['ZapEnergy']) + + #make a new dataframe with flattened values + + From e0b71a85b788936e929e2b4a2868ab1606b07498 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 14:58:41 +0200 Subject: [PATCH 08/56] Add save fit function to pre edge fit function --- nafuma/xanes/calib.py | 50 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 56d944b..238aa3e 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -6,6 +6,7 @@ import nafuma.auxillary as aux import nafuma.xanes as xas import nafuma.xanes.io as io from scipy.signal import savgol_filter +from datetime import datetime ##Better to make a new function that loops through the files, and performing the split_xanes_scan on @@ -36,7 +37,7 @@ def find_element(data: dict) -> str: def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: - from datetime import datetime + # FIXME Add log-file @@ -68,15 +69,17 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: edge_start = edge_starts[data['edge']] + # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to + # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) - for filename in data['path']: + for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting background on {filename}', options=options) + aux.write_log(message=f'Fitting background on {filename} ({i} / {len(data["path"])}', options=options) #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) @@ -88,13 +91,50 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #adding a new column in df_background with the y-values of the background pre_edge_fit_data.insert(1,filename,background) - if options['log']: - aux.write_log(message=f'Pre edge fitting done.', options=options) + if options['save_fit']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], filename) + '.png' + + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst) + plt.close() + + + if options['log']: + aux.write_log(message=f'Pre edge fitting done.', options=options) return pre_edge_fit_data + def pre_edge_subtraction(data: dict, options={}): + + required_options = ['log', 'logfile'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + } + + + + + +def pre_edge_subtraction_legacy(data: dict, options={}): #FIXME add log-file instead of the troubleshoot-option required_options = ['print','troubleshoot'] default_options = { From bac137042e55c109450146c0d05bd23291214ea7 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 15:42:50 +0200 Subject: [PATCH 09/56] Refactor pre edge subtraction --- nafuma/xanes/calib.py | 120 ++++++++++-------------------------------- 1 file changed, 27 insertions(+), 93 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 238aa3e..833e98e 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -41,12 +41,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['edge_start', 'log', 'troubleshoot'] + required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'edge_start': None, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', - 'save_fit': False, + 'save_plots': False, 'save_folder': './' } @@ -72,10 +72,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data'].loc[data['xanes_data']["ZapEnergy"] < edge_start] + pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < edge_start] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data - pre_edge_fit_data = pd.DataFrame(data['xanes_data']["ZapEnergy"]) + pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) for i, filename in enumerate(data['path']): if options['log']: @@ -91,11 +91,11 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #adding a new column in df_background with the y-values of the background pre_edge_fit_data.insert(1,filename,background) - if options['save_fit']: + if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '.png' + dst = os.path.join(options['save_folder'], filename) + '_pre_edge_fit.png' fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) @@ -124,107 +124,41 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: def pre_edge_subtraction(data: dict, options={}): - required_options = ['log', 'logfile'] + required_options = ['log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + 'save_plots': False, + 'save_folder': './' } + if options['log']: + aux.write_log(message='Starting pre edge subtraction', options=options) + xanes_data_bkgd_subtracted = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) + xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data'][filename] - data['pre_edge_fit_data'][filename]) -def pre_edge_subtraction_legacy(data: dict, options={}): - #FIXME add log-file instead of the troubleshoot-option - required_options = ['print','troubleshoot'] - default_options = { - 'print': False, - 'troubleshoot': False - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - filenames = xas.io.get_filenames(path) - df= xas.io.put_in_dataframe(path) - edge=find_element(df) - - #Defining the end of the region used to define the background, thus start of the edge - - #######================================================================================================================================================ - #FIXME Trying to implement automatical region determination based on an estimate of the edge shift - #print(df) - #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) + dst = os.path.join(options['save_folder'], filename) + '_pre_edge_subtraction.png' - #print(estimated_edge_shift) - #estimated_edge_shift - ###========================================================================================================================================================================= - #implement widget - if edge == 'Mn': - edge_start = 6.42 - #edge_start = estimated_edge_shift - if edge == 'Ni': - edge_start = 8.3 + fig, ax = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) + xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) + ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) - #making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - df_start=df.loc[df["ZapEnergy"] < edge_start] - - #Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data - df_bkgd = pd.DataFrame(df["ZapEnergy"]) + plt.savefig(dst) + plt.close() - for files in filenames: + return xanes_data_bkgd_subtracted - #Fitting linear function to the background - d = np.polyfit(df_start["ZapEnergy"],df_start[files],1) - function_bkgd = np.poly1d(d) - - #making a list, y_pre,so the background will be applied to all ZapEnergy-values - y_bkgd=function_bkgd(df["ZapEnergy"]) - - #adding a new column in df_background with the y-values of the background - df_bkgd.insert(1,files,y_bkgd) - - - if options['troubleshoot'] == True: - ### FOR FIGURING OUT WHERE IT GOES WRONG/WHICH FILE IS CORRUPT - ax = df.plot(x = "ZapEnergy",y=files) - #Plotting the calculated pre-edge background with the region used for the regression - if options['print'] == True: - #Plotting an example of the edge_start region and the fitted background that will later be subtracted - fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,5)) - df.plot(x="ZapEnergy", y=filenames,color="Black",ax=ax1) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax1) - plt.axvline(x = max(df_start["ZapEnergy"])) - #fig = plt.figure(figsize=(15,15)) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax2) - ax1.set_title('Data and fitted background') - #Zooming into bacground region to confirm fit and limits looks reasonable - df.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y) - ax2.set_xlim([min(df_start["ZapEnergy"]),max(df_start["ZapEnergy"])+0.01]) - #finding maximum and minimum values in the backgrounds - min_values=[] - max_values=[] - for file in filenames: - min_values.append(min(df_start[file])) - max_values.append(max(df_start[file])) - ax2.set_ylim([min(min_values),max(max_values)]) - plt.axvline(x = max(df_start["ZapEnergy"])) - #ax2.set_xlim([25, 50]) - ###################### Subtracting the pre edge from xmap_roi00 ################ - - #making a new dataframe to insert the background subtracted intensities - df_bkgd_sub = pd.DataFrame(df["ZapEnergy"]) - #inserting the background subtracted original xmap_roi00 data - - for files in filenames: - newintensity_calc=df[files]-df_bkgd[files] - df_bkgd_sub.insert(1,files,newintensity_calc) - - if options['print'] == True: - df.plot(x = "ZapEnergy",y=filenames, color="Black", ax=ax3, legend=False) - #plt.axvline(x = max(df_start["ZapEnergy"])) - df_bkgd_sub.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax3, legend=False) - ax3.set_title('Data and background-subtracted data') - - return df_bkgd_sub,filenames,edge def post_edge_fit(path, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) From 2baa765806d5035b9c4c4d9bab7d5a18c7feb92a Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 15:55:21 +0200 Subject: [PATCH 10/56] Quasi-fixed linting issue causing automatic test to fail --- nafuma/xanes/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 527f300..f8a3e78 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -8,6 +8,9 @@ def split_xanes_scan(root, destination=None, replace=False): #destination should be the path to the processed data #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw + + # FIXME Only adding this variable to pass the Linting-tests - will refactor this later + filename = 'dummy' with open(filename, 'r') as f: lines = f.readlines() From a49fc8b0d23ec7b26f02fd1d60afd901205a400c Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 16:18:22 +0200 Subject: [PATCH 11/56] Refactor read_data --- nafuma/xanes/io.py | 49 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index f8a3e78..b5faf2c 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -2,6 +2,8 @@ import pandas as pd import matplotlib.pyplot as plt import os import numpy as np +import nafuma.auxillary as aux + def split_xanes_scan(root, destination=None, replace=False): #root is the path to the beamtime-folder @@ -105,8 +107,51 @@ def get_filenames(path): return filenames -def put_in_dataframe(path): - filenames = get_filenames(path) + + +def read_data(data: dict, options={}) -> pd.DataFrame: + + required_options = [] + default_options = { + + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + columns = ['ZapEnergy'] + + # Initialise DataFrame with only ZapEnergy-column + xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + + for filename in data['path']: + columns.append(filename) + + scan_data = pd.read_csv(filename) + scan_data = scan_data[[determine_active_roi(scan_data)]] + xanes_data.insert(1, filename, scan_data) + + + return xanes_data + + + + + +def determine_active_roi(scan_data): + + #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift + if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): + active_roi = 'xmap_roi00' + else: + active_roi = 'xmap_roi01' + + return active_roi + + + + +def put_into_dataframe(data: dict, options={}) -> pd.DataFrame: + filenames = get_filenames(data) #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy column_names = ["ZapEnergy"] From 0d757ce36501f1aa1a7add8abb470b4055cf070b Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 16:26:41 +0200 Subject: [PATCH 12/56] Move get_filenames to auxillary and generalise --- nafuma/auxillary.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 33a62fe..072ee83 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -1,5 +1,6 @@ import json import numpy as np +import os def update_options(options, required_options, default_options): ''' Takes a dictionary of options along with a list of required options and dictionary of default options, and sets all keyval-pairs of options that is not already defined to the default values''' @@ -71,4 +72,17 @@ def write_log(message, options={}): with open(options['logfile'], 'a') as f: - f.write(message) \ No newline at end of file + f.write(message) + + +#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements +def get_filenames(path, ext): + ''' Collects all filenames from specified path with a specificed extension + + Input: + path: path to find all filenames (relative or absolute) + ext: extension (including ".")''' + + filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext)] + + return filenames \ No newline at end of file From 303704c3577a2762041b1d4b647c8bea016536d8 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:54:51 +0200 Subject: [PATCH 13/56] Add filter --- nafuma/auxillary.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 072ee83..2b87479 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -68,7 +68,7 @@ def write_log(message, options={}): options = update_options(options=options, required_options=required_options, default_options=default_options) now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') - message = f'{now} {message} \n' + message = f'[{now}] {message} \n' with open(options['logfile'], 'a') as f: @@ -76,13 +76,13 @@ def write_log(message, options={}): #Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements -def get_filenames(path, ext): +def get_filenames(path, ext, filter=''): ''' Collects all filenames from specified path with a specificed extension Input: path: path to find all filenames (relative or absolute) ext: extension (including ".")''' - filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext)] + filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext) and filter in filename] return filenames \ No newline at end of file From 0b89524ef19b8a2c6f8dd76302f4c6a51c3c7258 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:55:42 +0200 Subject: [PATCH 14/56] Clear up small bugs encountered during testing --- nafuma/xanes/calib.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 833e98e..a1211e2 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -22,13 +22,13 @@ def find_element(data: dict) -> str: 'Ni': [8.0, 8.6] } - if element_energy_intervals['Mn'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Mn'][1]: + if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Mn'][1]): edge = 'Mn' - elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Fe'][1]: + elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Fe'][1]): edge = 'Fe' - elif element_energy_intervals['Co'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Co'][1]: + elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Co'][1]): edge = 'Co' - elif element_energy_intervals['Ni'][0] < data['xanes_data']["ZapEnergy"][0] < element_energy_intervals['Ni'][1]: + elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Ni'][1]): edge = 'Ni' @@ -45,7 +45,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: default_options = { 'edge_start': None, 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_fit.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'save_plots': False, 'save_folder': './' } @@ -62,11 +62,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if not options['edge_start']: edge_starts = { 'Mn': 6.42, - 'Fe': 7.11, + 'Fe': 7.09, 'Co': 7.705, 'Ni': 8.3 } + data['edge'] = find_element(data) edge_start = edge_starts[data['edge']] # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to @@ -79,7 +80,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting background on {filename} ({i} / {len(data["path"])}', options=options) + aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) @@ -95,15 +96,15 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '_pre_edge_fit.png' + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) @@ -111,7 +112,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) - plt.savefig(dst) + plt.savefig(dst, transparent=False) plt.close() From e7a95d65edd7a04d8dd38cce1e537e4008af30fd Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 16 Jun 2022 17:56:08 +0200 Subject: [PATCH 15/56] Refactor read_data and move get_filenames --- nafuma/xanes/io.py | 71 ++++------------------------------------------ 1 file changed, 6 insertions(+), 65 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index b5faf2c..458b38f 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -89,28 +89,15 @@ def split_xanes_scan(root, destination=None, replace=False): df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) -#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements -def get_filenames(path): - - - cwd = os.getcwd() - - # Change into path provided - os.chdir(path) - - filenames = [os.path.join(path, filename) for filename in os.listdir() if os.path.isfile(filename) and filename[-4:] == '.dat'] #changed - - - - # Change directory back to where you ran the script from - os.chdir(cwd) - - return filenames + def read_data(data: dict, options={}) -> pd.DataFrame: + + # FIXME Handle the case when dataseries are not the same size + required_options = [] default_options = { @@ -138,6 +125,8 @@ def read_data(data: dict, options={}) -> pd.DataFrame: def determine_active_roi(scan_data): + + # FIXME For Co-edge, this gave a wrong scan #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): @@ -146,51 +135,3 @@ def determine_active_roi(scan_data): active_roi = 'xmap_roi01' return active_roi - - - - -def put_into_dataframe(data: dict, options={}) -> pd.DataFrame: - filenames = get_filenames(data) - - #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy - column_names = ["ZapEnergy"] - - for i in range(len(filenames)): - column_names.append(filenames[i]) - - #Taking the first file in the folder and extracting ZapEnergies and intensity from that (only need the intensity from the rest) - first = pd.read_csv(filenames[0], skiprows=0) - - #Making a data frame with the correct columns, and will fill inn data afterwards - df = pd.DataFrame(columns = column_names) - #First putting in the 2theta-values - df["ZapEnergy"]=first["ZapEnergy"] - - #filling in the intensities from all files into the corresponding column in the dataframe - for i in range(len(filenames)): - df2 = pd.read_csv(filenames[i]) - df2 = df2.drop(['Mon','Det1','Det2','Det3','Det4','Det5', 'Det6','Ion1'], axis=1) #, axis=1) - df2 = df2.drop(['MonEx','Ion2','Htime','MusstEnc1','MusstEnc3','MusstEnc4', 'TwoTheta', 'ZCryo'], axis=1) - df2 = df2.drop(['ZBlower1', 'ZBlower2', 'ZSrcur'], axis=1)#, axis=19) #removing the sigma at this point - - ############## THIS PART PICKS OUT WHICH ROI IS OF INTEREST, BUT MUST BE FIXED IF LOOKING AT THREE EDGES (roi00,roi01,roi02) ##################### - if 'xmap_roi01' in df2.columns: - #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift - if max(df2["xmap_roi00"])-min(df2["xmap_roi00"])>max(df2["xmap_roi01"])-min(df2["xmap_roi01"]): - df[filenames[i]]=df2["xmap_roi00"] #forMn - else: - df[filenames[i]]=df2["xmap_roi01"] #forNi - else: - df[filenames[i]]=df2["xmap_roi00"] - ############################################################################################### - - i=i+1 - - - #print(df) - #If I want to make a csv-file of the raw data. Decided that was not necessary: - #df.to_csv('static-Mn-edge.csv') #writing it to a csv, first row is datapoint (index), second column is 2theta, and from there the scans starts - - - return df \ No newline at end of file From 880722d7784d7cc6c3a44ceb98e87fb3aa7bd1b8 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:35:24 +0200 Subject: [PATCH 16/56] Load correct xmap_roi for more cases --- nafuma/xanes/io.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 458b38f..20aa717 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -115,7 +115,10 @@ def read_data(data: dict, options={}) -> pd.DataFrame: scan_data = pd.read_csv(filename) scan_data = scan_data[[determine_active_roi(scan_data)]] - xanes_data.insert(1, filename, scan_data) + xanes_data = pd.concat([xanes_data, scan_data], axis=1) + + + xanes_data.columns = columns return xanes_data @@ -129,9 +132,24 @@ def determine_active_roi(scan_data): # FIXME For Co-edge, this gave a wrong scan #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift - if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): - active_roi = 'xmap_roi00' - else: - active_roi = 'xmap_roi01' + # if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): + # active_roi = 'xmap_roi00' + # else: + # active_roi = 'xmap_roi01' + if (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): + if (scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min()) > (scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min()): + active_roi = 'xmap_roi00' + else: + active_roi = 'xmap_roi01' + + elif scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean(): + active_roi = 'xmap_roi00' + + elif scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean(): + active_roi = 'xmap_roi01' + + else: + active_roi = None + return active_roi From cb2c7532e6ac48073a3cbbccb809ae7631019b2b Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:46:33 +0200 Subject: [PATCH 17/56] Make sure data['path'] is in a list --- nafuma/xanes/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 20aa717..816b8f5 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -110,6 +110,9 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # Initialise DataFrame with only ZapEnergy-column xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + if not isinstance(data['path'], list): + data['path'] = [data['path']] + for filename in data['path']: columns.append(filename) From ba349a5892f480b398444e095f80779e5d7111c3 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 15:58:39 +0200 Subject: [PATCH 18/56] Refactor estimation of edge position and automatise pre edge limit setting --- nafuma/xanes/calib.py | 70 ++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 30 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a1211e2..e540e9b 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -12,7 +12,7 @@ from datetime import datetime ##Better to make a new function that loops through the files, and performing the split_xanes_scan on #Trying to make a function that can decide which edge it is based on the first ZapEnergy-value -def find_element(data: dict) -> str: +def find_element(data: dict, index=0) -> str: ''' Takes the data dictionary and determines based on the start value of the ZapEnergy-column which element the edge is from.''' element_energy_intervals = { @@ -22,13 +22,13 @@ def find_element(data: dict) -> str: 'Ni': [8.0, 8.6] } - if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Mn'][1]): + if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Mn'][1]): edge = 'Mn' - elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Fe'][1]): + elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Fe'][1]): edge = 'Fe' - elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Co'][1]): + elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Co'][1]): edge = 'Co' - elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[0]) & (data['xanes_data_original']["ZapEnergy"].iloc[0] < element_energy_intervals['Ni'][1]): + elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Ni'][1]): edge = 'Ni' @@ -58,22 +58,25 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position + # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit if not options['edge_start']: - edge_starts = { - 'Mn': 6.42, - 'Fe': 7.09, - 'Co': 7.705, - 'Ni': 8.3 + pre_edge_limit_offsets = { + 'Mn': 0.03, + 'Fe': 0.03, + 'Co': 0.03, + 'Ni': 0.03 } data['edge'] = find_element(data) - edge_start = edge_starts[data['edge']] + + edge_position = estimate_edge_position(data, options, index=0) + pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < edge_start] + pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < pre_edge_limit] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -161,6 +164,31 @@ def pre_edge_subtraction(data: dict, options={}): return xanes_data_bkgd_subtracted +def estimate_edge_position(data: dict, options={}, index=0): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) + + #shifting column values up so that average differential fits right between the points used in the calculation + df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + df_diff_max = df_diff[data['path'][index]].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] + + # FIXME Add logging option to see the result + + print(estimated_edge_shift) + + return estimated_edge_shift + + def post_edge_fit(path, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) required_options = ['print'] @@ -254,25 +282,7 @@ def smoothing(path, options={}): return df_smooth, filenames -def find_pos_maxdiff(df, filenames,options={}): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['print','periods'] - default_options = { - 'print': False, - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #making new dataframe to keep the differentiated data - df_diff = pd.DataFrame(df["ZapEnergy"]) - df_diff[filenames]=df[filenames].diff(periods=options['periods']) - - #shifting column values up so that average differential fits right between the points used in the calculation - df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) - df_diff_max = df_diff[filenames].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] - - return estimated_edge_shift, df_diff, df_diff_max def find_nearest(array, value): #function to find the value closes to "value" in an "array" From 672d5549fe9143bb724c65b195cc323f049448f5 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 16:14:21 +0200 Subject: [PATCH 19/56] Fix lint issue --- nafuma/xanes/calib.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index e540e9b..3a37eab 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -136,6 +136,7 @@ def pre_edge_subtraction(data: dict, options={}): 'save_folder': './' } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) if options['log']: aux.write_log(message='Starting pre edge subtraction', options=options) @@ -145,16 +146,16 @@ def pre_edge_subtraction(data: dict, options={}): if options['log']: aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) - xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data'][filename] - data['pre_edge_fit_data'][filename]) + xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data_original'][filename] - data['pre_edge_fit_data'][filename]) if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], filename) + '_pre_edge_subtraction.png' + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_subtraction.png' - fig, ax = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) + fig, ax = plt.subplots(figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) @@ -184,7 +185,8 @@ def estimate_edge_position(data: dict, options={}, index=0): # FIXME Add logging option to see the result - print(estimated_edge_shift) + if options['log']: + aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) return estimated_edge_shift @@ -309,7 +311,7 @@ def finding_e0(path, options={}): print("MORE THAN ONE FILE --> generalize") ##### - estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df_smooth, filenames,options=options) + estimated_edge_shift, df_diff, df_diff_max = estimate_edge_position(df_smooth, filenames,options=options) print(estimated_edge_shift) #### ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # From 9c6a7d5991af452759fdf2f11afb76623e7d6aae Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 16:59:37 +0200 Subject: [PATCH 20/56] Refactor post_edge_fit --- nafuma/xanes/calib.py | 100 ++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a45457b..10afda9 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -41,9 +41,9 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] + required_options = ['pre_edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] default_options = { - 'edge_start': None, + 'pre_edge_start': None, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'save_plots': False, @@ -60,18 +60,13 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit - if not options['edge_start']: - pre_edge_limit_offsets = { - 'Mn': 0.03, - 'Fe': 0.03, - 'Co': 0.03, - 'Ni': 0.03 - } + if not options['pre_edge_start']: + pre_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] + pre_edge_limit = edge_position - pre_edge_limit_offset # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval @@ -169,6 +164,7 @@ def estimate_edge_position(data: dict, options={}, index=0): #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. required_options = ['print','periods'] default_options = { + 'print': False, 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } @@ -191,45 +187,71 @@ def estimate_edge_position(data: dict, options={}, index=0): return estimated_edge_shift -def post_edge_fit(path, options={}): +def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['print'] + required_options = ['post_edge_start', 'print'] default_options = { + 'post_edge_start': None, 'print': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #FIXME Allow min and max limits + + if not options['post_edge_start']: + post_edge_limit_offset = 0.03 + + data['edge'] = find_element(data) + + edge_position = estimate_edge_position(data, options, index=0) + post_edge_limit = edge_position + post_edge_limit_offset + + + post_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] > post_edge_limit] + post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) - df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) - #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge - #FIXME Use rought edge shift estimate, add X eV as first guess, have an option to adjust this value with widget - if edge == 'Mn': - edge_stop = 6.565 - if edge == 'Ni': - edge_stop = 8.361 + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) - df_end= df_bkgd_sub.loc[df_bkgd_sub["ZapEnergy"] > edge_stop] # new dataframe only containing the post edge, where a regression line will be calculated in the for-loop below - df_end.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit - df_postedge = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #making a new dataframe + #Fitting linear function to the background + params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(post_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + post_edge_fit_data.insert(1,filename,background) + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - function_post_list=[] - for files in filenames: - d = np.polyfit(df_end["ZapEnergy"],df_end[files],1) - function_post = np.poly1d(d) - y_post=function_post(df_bkgd_sub["ZapEnergy"]) - function_post_list.append(function_post) - df_postedge.insert(1,files,y_post) #adding a new column with the y-values of the fitted post edge + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_post_edge_fit.png' - #Plotting the background subtracted signal with the post-edge regression line and the start point for the linear regression line - if options['print'] == True: - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames) #defining x and y - plt.axvline(x = min(df_end["ZapEnergy"])) - fig = plt.figure(figsize=(15,15)) - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, legend=False) #defining x and y - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - plt.axvline(x = min(df_end["ZapEnergy"])) + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - return df_bkgd_sub, df_postedge, filenames, edge + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(post_edge_data['ZapEnergy']), max(post_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(post_edge_data[filename]), max(post_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() + + + return post_edge_fit_data def smoothing(path, options={}): required_options = ['print','window_length','polyorder'] From 7214746af18475257e041e54483f456424aca8dc Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 20 Jun 2022 16:08:36 +0200 Subject: [PATCH 21/56] Refactor split_scans --- nafuma/xanes/io.py | 159 ++++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 816b8f5..53ad1ca 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -2,94 +2,120 @@ import pandas as pd import matplotlib.pyplot as plt import os import numpy as np -import nafuma.auxillary as aux +import nafuma.auxillary as aux +from nafuma.xanes.calib import find_element -def split_xanes_scan(root, destination=None, replace=False): +def split_scan_data(data: dict, options={}): + + + required_options = ['save', 'save_folder', 'replace', 'add_rois'] + + default_options = { + 'save': False, + 'save_folder': '.', + 'replace': False, + 'add_rois': False + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) #root is the path to the beamtime-folder #destination should be the path to the processed data #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw # FIXME Only adding this variable to pass the Linting-tests - will refactor this later - filename = 'dummy' + + if not isinstance(data['path'], list): + data['path'] = [data['path']] + + all_scans = [] - with open(filename, 'r') as f: - lines = f.readlines() + for filename in data['path']: + + with open(filename, 'r') as f: + lines = f.readlines() + + scan_datas, scan_data = [], [] + headers, header = [], '' + read_data = False - datas = [] - data = [] - headers = [] - header = '' - start = False - - for line in lines: - if line[0:2] == "#L": - start = True - header = line[2:].split() - continue - - elif line[0:2] == "#C": - start = False - - if data: - datas.append(data) - data = [] + for line in lines: + # Header line starts with #L - reads headers, and toggles data read-in on + if line[0:2] == "#L": + header, read_data = line[2:].split(), True + continue + + # First line after data started with #C - stops data read-in + elif line[0:2] == "#C": + read_data = False - if header: - headers.append(header) - header = '' + if scan_data: + scan_datas.append(scan_data); scan_data = [] + + if header: + headers.append(header); header = '' + + # Ignore line if read-in not toggled + if read_data == False: + continue + + # Read in data if it is + else: + scan_data.append(line.split()) - - if start == False: - continue - - else: - data.append(line.split()) - - - - - edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]} - edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0} - - - for ind, data in enumerate(datas): - df = pd.DataFrame(data) - df.columns = headers[ind] - - edge_start = np.round((float(df["ZapEnergy"].min())), 1) - - for edge, energies in edges.items(): - if edge_start in energies: - edge_actual = edge - edge_count[edge] += 1 - + edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} + + for i, scan_data in enumerate(scan_datas): + xanes_df = pd.DataFrame(scan_data).apply(pd.to_numeric) + xanes_df.columns = headers[i] + + if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): + continue + + + edge = find_element({'xanes_data_original': xanes_df}) + edges[edge].append(xanes_df) + - filename = filename.split('/')[-1] - count = str(edge_count[edge_actual]).zfill(4) + if options['add']: + + added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} + for edge, scans in edges.items(): + if scans: + xanes_df = scans[0] - - # Save - if destination: - cwd = os.getcwd() + for i, scan in enumerate(scans): + if i > 0: - if not os.path.isdir(destination): - os.mkdir(destination) - - os.chdir(destination) + if 'xmap_roi00' in xanes_df.columns: + xanes_df['xmap_roi00'] += scan['xmap_roi00'] + if 'xmap_roi01' in xanes_df.columns: + xanes_df['xmap_roi01'] += scan['xmap_roi01'] - df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) + added_edges[edge].append(xanes_df) - os.chdir(cwd) - - else: - df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) + edges = added_edges + + if options['save']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + filename = os.path.basename(filename).split('.')[0] + for edge, scans in edges.items(): + for i, scan in enumerate(scans): + count = '' if options['add'] else '_'+str(i).zfill(4) + path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') + scan.to_csv(path) + + all_scans.append(edges) + + + return all_scans @@ -117,6 +143,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: columns.append(filename) scan_data = pd.read_csv(filename) + scan_data = scan_data[[determine_active_roi(scan_data)]] xanes_data = pd.concat([xanes_data, scan_data], axis=1) From cc80a48259dfae683a46752e76001488d035d605 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 20 Jun 2022 19:16:20 +0200 Subject: [PATCH 22/56] Add logging --- nafuma/xanes/io.py | 78 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 53ad1ca..40ea0c2 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -4,18 +4,23 @@ import os import numpy as np import nafuma.auxillary as aux from nafuma.xanes.calib import find_element +from datetime import datetime -def split_scan_data(data: dict, options={}): +def split_scan_data(data: dict, options={}) -> list: + ''' Splits a XANES-file from BM31 into different files depending on the edge. Has the option to add intensities of all scans of same edge into the same file. + As of now only picks out xmap_rois (fluoresence mode) and for Mn, Fe, Co and Ni K-edges.''' - required_options = ['save', 'save_folder', 'replace', 'add_rois'] + required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'add_rois'] default_options = { - 'save': False, - 'save_folder': '.', - 'replace': False, - 'add_rois': False + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_split_edges.log', + 'save': False, # whether to save the files or not + 'save_folder': '.', # root folder of where to save the files + 'replace': False, # whether to replace the files if they already exist + 'add_rois': False # Whether to add the rois of individual scans of the same edge together } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -30,9 +35,15 @@ def split_scan_data(data: dict, options={}): data['path'] = [data['path']] all_scans = [] + + if options['log']: + aux.write_log(message='Starting file splitting...', options=options) for filename in data['path']: + if options['log']: + aux.write_log(message=f'Reading {filename}...', options=options) + with open(filename, 'r') as f: lines = f.readlines() @@ -44,6 +55,9 @@ def split_scan_data(data: dict, options={}): # Header line starts with #L - reads headers, and toggles data read-in on if line[0:2] == "#L": header, read_data = line[2:].split(), True + + if options['log']: + aux.write_log(message='... Found scan data. Starting read-in...', options=options) continue # First line after data started with #C - stops data read-in @@ -69,27 +83,46 @@ def split_scan_data(data: dict, options={}): for i, scan_data in enumerate(scan_datas): + xanes_df = pd.DataFrame(scan_data).apply(pd.to_numeric) xanes_df.columns = headers[i] + edge = find_element({'xanes_data_original': xanes_df}) + + if options['log']: + aux.write_log(message=f'Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) + if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): + if options['log']: + aux.write_log(message='... Did not find fluoresence data. Skipping...', options=options) + continue - edge = find_element({'xanes_data_original': xanes_df}) + edges[edge].append(xanes_df) if options['add']: + + if options['log']: + aux.write_log(message=f'Addition of rois enabled. Starting addition...', options=options) added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} for edge, scans in edges.items(): + + if options['log']: + aux.write_log(message=f'... Adding rois of the {edge}-edge...', options=options) + if scans: xanes_df = scans[0] for i, scan in enumerate(scans): if i > 0: + if options['log']: + aux.write_log(message=f'... ... Adding {i}/{len(scans)}', options=options) + if 'xmap_roi00' in xanes_df.columns: xanes_df['xmap_roi00'] += scan['xmap_roi00'] if 'xmap_roi01' in xanes_df.columns: @@ -100,7 +133,14 @@ def split_scan_data(data: dict, options={}): edges = added_edges if options['save']: + + if options['log']: + aux.write_log(message=f'Saving data to {options["save_folder"]}', options=options) + if not os.path.isdir(options['save_folder']): + if options['log']: + aux.write_log(message=f'... {options["save_folder"]} does not exist. Creating folder.', options=options) + os.makedirs(options['save_folder']) @@ -110,10 +150,26 @@ def split_scan_data(data: dict, options={}): for i, scan in enumerate(scans): count = '' if options['add'] else '_'+str(i).zfill(4) path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') - scan.to_csv(path) + + if not os.path.isfile(path): + scan.to_csv(path) + if options['log']: + aux.write_log(message=f'... Scan saved to {path}', options=options) + + elif options['replace'] and os.path.isfile(path): + scan.to_csv(path) + if options['log']: + aux.write_log(message=f'... File already exists. Overwriting to {path}', options=options) + + elif not options['replace'] and os.path.isfile(path): + if options['log']: + aux.write_log(message=f'... File already exists. Skipping...', options=options) all_scans.append(edges) + if options['log']: + aux.write_log(message=f'All done!', options=options) + return all_scans @@ -124,9 +180,9 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # FIXME Handle the case when dataseries are not the same size - required_options = [] + required_options = ['adjust'] default_options = { - + 'adjust': 0 } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -135,6 +191,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # Initialise DataFrame with only ZapEnergy-column xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + xanes_data['ZapEnergy'] += options['adjust'] if not isinstance(data['path'], list): data['path'] = [data['path']] @@ -157,6 +214,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: + def determine_active_roi(scan_data): # FIXME For Co-edge, this gave a wrong scan From 054311ca102893b2aca555a9360e13f847dd01f2 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 18:01:53 +0200 Subject: [PATCH 23/56] Small adjustments to logging --- nafuma/xanes/io.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 40ea0c2..f623a38 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -16,7 +16,7 @@ def split_scan_data(data: dict, options={}) -> list: default_options = { 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_split_edges.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_split_edges.log', 'save': False, # whether to save the files or not 'save_folder': '.', # root folder of where to save the files 'replace': False, # whether to replace the files if they already exist @@ -24,12 +24,6 @@ def split_scan_data(data: dict, options={}) -> list: } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #root is the path to the beamtime-folder - #destination should be the path to the processed data - - #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw - - # FIXME Only adding this variable to pass the Linting-tests - will refactor this later if not isinstance(data['path'], list): data['path'] = [data['path']] @@ -89,12 +83,12 @@ def split_scan_data(data: dict, options={}) -> list: edge = find_element({'xanes_data_original': xanes_df}) if options['log']: - aux.write_log(message=f'Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) + aux.write_log(message=f'... Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): if options['log']: - aux.write_log(message='... Did not find fluoresence data. Skipping...', options=options) + aux.write_log(message='... ... Did not find fluoresence data. Skipping...', options=options) continue @@ -106,13 +100,13 @@ def split_scan_data(data: dict, options={}) -> list: if options['add']: if options['log']: - aux.write_log(message=f'Addition of rois enabled. Starting addition...', options=options) + aux.write_log(message=f'... Addition of rois enabled. Starting addition...', options=options) added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} for edge, scans in edges.items(): if options['log']: - aux.write_log(message=f'... Adding rois of the {edge}-edge...', options=options) + aux.write_log(message=f'... ... Adding rois of the {edge}-edge...', options=options) if scans: xanes_df = scans[0] @@ -121,7 +115,7 @@ def split_scan_data(data: dict, options={}) -> list: if i > 0: if options['log']: - aux.write_log(message=f'... ... Adding {i}/{len(scans)}', options=options) + aux.write_log(message=f'... ... ... Adding {i+1}/{len(scans)}', options=options) if 'xmap_roi00' in xanes_df.columns: xanes_df['xmap_roi00'] += scan['xmap_roi00'] @@ -135,11 +129,11 @@ def split_scan_data(data: dict, options={}) -> list: if options['save']: if options['log']: - aux.write_log(message=f'Saving data to {options["save_folder"]}', options=options) + aux.write_log(message=f'... Saving data to {options["save_folder"]}', options=options) if not os.path.isdir(options['save_folder']): if options['log']: - aux.write_log(message=f'... {options["save_folder"]} does not exist. Creating folder.', options=options) + aux.write_log(message=f'... ... {options["save_folder"]} does not exist. Creating folder.', options=options) os.makedirs(options['save_folder']) @@ -154,16 +148,16 @@ def split_scan_data(data: dict, options={}) -> list: if not os.path.isfile(path): scan.to_csv(path) if options['log']: - aux.write_log(message=f'... Scan saved to {path}', options=options) + aux.write_log(message=f'... ... Scan saved to {path}', options=options) elif options['replace'] and os.path.isfile(path): scan.to_csv(path) if options['log']: - aux.write_log(message=f'... File already exists. Overwriting to {path}', options=options) + aux.write_log(message=f'... ... File already exists. Overwriting to {path}', options=options) elif not options['replace'] and os.path.isfile(path): if options['log']: - aux.write_log(message=f'... File already exists. Skipping...', options=options) + aux.write_log(message=f'... ... File already exists. Skipping...', options=options) all_scans.append(edges) From 1cf949e36bbd1c9ef8e377535c0782f7507ccefa Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 18:02:08 +0200 Subject: [PATCH 24/56] Start clean-up of smoothing --- nafuma/xanes/calib.py | 97 ++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 10afda9..4692446 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -126,7 +126,7 @@ def pre_edge_subtraction(data: dict, options={}): required_options = ['log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_subtraction.log', 'save_plots': False, 'save_folder': './' } @@ -162,10 +162,10 @@ def pre_edge_subtraction(data: dict, options={}): def estimate_edge_position(data: dict, options={}, index=0): #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['print','periods'] + required_options = ['log','logfile', 'periods'] default_options = { - - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -189,25 +189,29 @@ def estimate_edge_position(data: dict, options={}, index=0): def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['post_edge_start', 'print'] + required_options = ['log', 'logfile', 'post_edge_interval'] default_options = { - 'post_edge_start': None, - 'print': False + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', + 'post_edge_interval': [None, None], } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #FIXME Allow min and max limits - if not options['post_edge_start']: + if not options['post_edge_interval'][0]: post_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - post_edge_limit = edge_position + post_edge_limit_offset + options['post_edge_interval'][0] = edge_position + post_edge_limit_offset - post_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] > post_edge_limit] + if not options['post_edge_interval'][1]: + options['post_edge_interval'][1] = data['xanes_data_original']['ZapEnergy'].max() + + + post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_interval'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_interval'][1])] post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data @@ -253,40 +257,31 @@ def post_edge_fit(data: dict, options={}): return post_edge_fit_data -def smoothing(path, options={}): - required_options = ['print','window_length','polyorder'] +def smoothing(data: dict, options={}): + + # FIXME Add logging + # FIXME Add saving of files + + required_options = ['log', 'logfile', 'window_length','polyorder'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', + 'save_plots': False, + 'save_folder': './', 'window_length': 3, 'polyorder': 2 } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub, df_postedge, filenames, edge = post_edge_fit(path,options=options) - #================= SMOOTHING - df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) - df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) - #df_smooth[filenames] = df_bkgd_sub.iloc[:,2].rolling(window=rolling_av).mean() - #df_smooth[filenames] = df_smooth[filenames].shift(-int((rolling_av)/2)) - for filename in filenames: - x_smooth=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) - df_smooth[filename] = x_smooth - x_default=savgol_filter(df_bkgd_sub[filename],default_options['window_length'],default_options['polyorder']) - df_default[filename] = x_default - - + + # FIXME Add other types of filters + for filename in data['path']: + xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) + default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + #printing the smoothed curves vs data - if options['print'] == True: - - ## ================================================ - #df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) - #df_diff_estimated_max = df_diff[filenames].dropna().max() - - - #estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] - # ========================================== - + if options['save_folder'] == True: fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) x_range_zoom=[6.54,6.55] #make into widget @@ -303,8 +298,34 @@ def smoothing(path, options={}): ax2.set_xlim(x_range_zoom) ax2.set_ylim(y_range_zoom) ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + + + # FIXME Clear up these two plotting functions + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' + + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() - return df_smooth, filenames + return xanes_smooth, default_smooth From 9e39135f0022903502fd81d6ba879da6e4bd8de0 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 19:04:04 +0200 Subject: [PATCH 25/56] Update smoothing function --- nafuma/xanes/calib.py | 89 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 4692446..d81ac0f 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -262,70 +262,75 @@ def smoothing(data: dict, options={}): # FIXME Add logging # FIXME Add saving of files - required_options = ['log', 'logfile', 'window_length','polyorder'] + required_options = ['log', 'logfile', 'window_length','polyorder', 'save_default'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', 'save_plots': False, 'save_folder': './', 'window_length': 3, - 'polyorder': 2 + 'polyorder': 2, + 'save_default': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + if options['save_default']: + data['xanes_data_smooth_default'] = data['xanes_data']['ZapEnergy'] # FIXME Add other types of filters + # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should + # then be returned for filename in data['path']: xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) - default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + if options['save_default']: + default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + data['xanes_data'][filename] = xanes_smooth - #printing the smoothed curves vs data - if options['save_folder'] == True: - - fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) - x_range_zoom=[6.54,6.55] #make into widget - y_range_zoom=[20000,80000] #make into widget - - df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") - df_smooth.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Blue") - ax1.set_xlim(x_range_zoom) - ax1.set_ylim(y_range_zoom) - ax1.set_title("Smoothed curve (blue) vs data (red) used for further analysis") - - df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") - df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Green") - ax2.set_xlim(x_range_zoom) - ax2.set_ylim(y_range_zoom) - ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + if options['save_default']: + data['xanes_data_smooth_default'][filename] = default_smooth - # FIXME Clear up these two plotting functions + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' - - fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') - ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) - pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) - ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') - ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) - ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) - ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + edge_pos = estimate_edge_position(data=data, options=options) + intensity_midpoint = data['xanes_data'][filename].max() - data['xanes_data'][filename].min() - plt.savefig(dst, transparent=False) - plt.close() + if options['save_default']: + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + + ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + data['xanes_data_smooth_default'].plot(x='ZapEnergy', y=filename, color='green', ax=ax2) + ax2.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax2.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) + + elif not options['save_default']: + fig, ax = plt.subplots(figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + + ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() - return xanes_smooth, default_smooth + # FIXME See comment above about return values + return None From 4d501adb729b4d48f0a3f5417e418a33462e2547 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 22 Jun 2022 15:56:34 +0200 Subject: [PATCH 26/56] Complete smooth and get determine_edge_position going --- nafuma/xanes/calib.py | 351 ++++++++++++++++++++++-------------------- 1 file changed, 187 insertions(+), 164 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index d81ac0f..dfe5b07 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -1,3 +1,5 @@ +from logging import raiseExceptions +from jinja2 import TemplateRuntimeError import pandas as pd import numpy as np import os @@ -160,31 +162,7 @@ def pre_edge_subtraction(data: dict, options={}): return xanes_data_bkgd_subtracted -def estimate_edge_position(data: dict, options={}, index=0): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['log','logfile', 'periods'] - default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #making new dataframe to keep the differentiated data - df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) - df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) - - #shifting column values up so that average differential fits right between the points used in the calculation - df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) - df_diff_max = df_diff[data['path'][index]].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] - - # FIXME Add logging option to see the result - - if options['log']: - aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) - - return estimated_edge_shift def post_edge_fit(data: dict, options={}): @@ -274,22 +252,20 @@ def smoothing(data: dict, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + df_smooth = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['save_default']: - data['xanes_data_smooth_default'] = data['xanes_data']['ZapEnergy'] + df_smooth_default = pd.DataFrame(data['xanes_data']['ZapEnergy']) # FIXME Add other types of filters # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should # then be returned for filename in data['path']: - xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) - if options['save_default']: - default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) - - data['xanes_data'][filename] = xanes_smooth + df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder'])) if options['save_default']: - data['xanes_data_smooth_default'][filename] = default_smooth - + df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder'])) + if options['save_plots']: if not os.path.isdir(options['save_folder']): @@ -298,39 +274,35 @@ def smoothing(data: dict, options={}): dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' edge_pos = estimate_edge_position(data=data, options=options) - intensity_midpoint = data['xanes_data'][filename].max() - data['xanes_data'][filename].min() - - + intensity_midpoint = df_smooth[filename].iloc[np.where(df_smooth['ZapEnergy'] == find_nearest(df_smooth['ZapEnergy'], edge_pos))].values[0] + if options['save_default']: fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) - + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) - data['xanes_data_smooth_default'].plot(x='ZapEnergy', y=filename, color='green', ax=ax2) - ax2.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax2.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') + df_smooth_default.loc[(df_smooth_default['ZapEnergy'] > edge_pos-0.0015) & (df_smooth_default['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) elif not options['save_default']: fig, ax = plt.subplots(figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax) + ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) + ax.set_ylim([intensity_midpoint*0.9, intensity_midpoint*1.1]) - ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + ax.set_title(f'{os.path.basename(filename)} - Smooth', size=20) plt.savefig(dst, transparent=False) plt.close() - # FIXME See comment above about return values - return None + if not options['save_default']: + df_smooth_default = None + + return df_smooth, df_smooth_default @@ -340,133 +312,184 @@ def find_nearest(array, value): idx = (np.abs(array - value)).argmin() return array[idx] -def finding_e0(path, options={}): - required_options = ['print','periods'] + +def estimate_edge_position(data: dict, options={}, index=0): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['log','logfile', 'periods'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_smooth, filenames = smoothing(path, options=options) #This way the smoothing is printed as long as the "finding e0" is printed. - - if options['periods'] % 2 == 1: - print("NB!!!!!!!!!!!!!!!!! Periods needs to be an even number for the shifting of values to work properly") - ###df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) # - if len(filenames) == 1: - filenames=filenames[0] - else: - print("MORE THAN ONE FILE --> generalize") - - ##### - estimated_edge_shift, df_diff, df_diff_max = estimate_edge_position(df_smooth, filenames,options=options) - print(estimated_edge_shift) - #### - ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # - df_doublediff=pd.DataFrame(df_smooth["ZapEnergy"]) - df_doublediff[filenames]=df_diff[filenames].diff(periods=options['periods']) - - if options['print'] == True: - fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) - - df_diff.plot(x = "ZapEnergy",y=filenames, ax=ax1) #defining x and y - df_doublediff.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) #shifting column values up so that average differential fits right between the points used in the calculation - #df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) # - df_doublediff[filenames]=df_doublediff[filenames].shift(-int(options['periods'])) + df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + df_diff_max = df_diff[data['path'][index]].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] + + # FIXME Add logging option to see the result + + if options['log']: + aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) + + return estimated_edge_shift + +def determine_edge_position(data: dict, options={}): - #finding maximum value to maneuver to the correct part of the data set - #df_diff_max = df_diff[filenames].dropna().max() - + required_options = ['log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', + 'save_plots': False, + 'save_folder': './', + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly, + 'diff': True, + 'double_diff': False, + 'fit_region': 0.0005 - estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + } - fit_region = 0.0004 - df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] - df_diff_edge=df_diff_edge.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-fit_region)] - - - - - df_doublediff_edge=df_doublediff.loc[(df_doublediff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] - df_doublediff_edge=df_doublediff_edge.loc[(df_doublediff["ZapEnergy"] > estimated_edge_shift-fit_region)] - #df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-0.15) and (df_diff["ZapEnergy"] < estimated_edge_shift+0.15)] + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #df_diff_edge=df_diff.loc[df_diff["ZapEnergy"] > estimated_edge_shift-0.15] - #print(df_diff_edge) - if options['print'] == True: - fig, (ax3,ax4) = plt.subplots(1,2,figsize=(15,5)) + if options['periods'] % 2 == 1: + raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") - df_diff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax3) #defining x and y - ax3.set_title("Zoomed into edge region (derivative))") - ax3.axvline(x = estimated_edge_shift) - - df_doublediff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax4,kind="scatter") #defining x and y - ax4.set_title("Zoomed into edge region (double derivative)") - ax4.axvline(x = estimated_edge_shift) - ax4.axhline(0) - - - - #ax1.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) - #ax1.set_title("not sure what this is tbh") - - #ax2.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) - #ax2.set_title("not sure what this is either tbh") - - #============== - #df_smooth=df_smooth2 - #================= - - - - - #========================== fitting first differential ========== - df_diff = df_diff[df_diff[filenames].notna()] - - #fitting a function to the chosen interval - d = np.polyfit(df_diff_edge["ZapEnergy"],df_diff_edge[filenames],2) - function_diff = np.poly1d(d) - - x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=1000) - y_diff=function_diff(x_diff) - #print(df_diff_edge["ZapEnergy"].iloc[-1]) - if options['print'] == True: - ax3.plot(x_diff,y_diff,color='Green') - - #y_diff_max=np.amax(y_diff,0) - y_diff_max_index = np.where(y_diff == np.amax(y_diff)) - #print(y_diff_max_index[0]) - edge_shift_diff=float(x_diff[y_diff_max_index]) - print("Edge shift estimated by the differential maximum is "+str(round(edge_shift_diff,5))) - if options['print'] == True: - ax3.axvline(x=edge_shift_diff,color="green") - #print(df_doublediff_edge["ZapEnergy"].iloc[0]) - #ax4.plot(x_doublediff,y_doublediff,color='Green')) - - - #fitting double differentiate - df_doublediff = df_doublediff[df_doublediff[filenames].notna()] - d = np.polyfit(df_doublediff_edge["ZapEnergy"],df_doublediff_edge[filenames],2) - function_doublediff = np.poly1d(d) - - x_doublediff=np.linspace(df_doublediff_edge["ZapEnergy"].iloc[0],df_doublediff_edge["ZapEnergy"].iloc[-1],num=10000) - y_doublediff=function_doublediff(x_doublediff) - - if options['print'] == True: - ax4.plot(x_doublediff,y_doublediff,color='Green') - - y_doublediff_zero=find_nearest(y_doublediff,0) - y_doublediff_zero_index = np.where(y_doublediff == y_doublediff_zero) - - edge_shift_doublediff=float(x_doublediff[y_doublediff_zero_index]) - print("Edge shift estimated by the double differential zero-point is "+str(round(edge_shift_doublediff,5))) - if options['print'] == True: - ax4.axvline(x=edge_shift_doublediff,color="green") + ##### - return df_smooth, filenames, edge_shift_diff + if options['diff']: + df_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['double_diff']: + df_double_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + + for i, filename in enumerate(data['path']): + estimated_edge_pos = estimate_edge_position(data, options=options, index=i) + + + #========================== fitting first differential ========== + + if options['diff']: + df_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']) + df_diff[filename]=df_diff[filename].shift(-int(options['periods']/2)) + + df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + + + # Fitting a function to the chosen interval + params = np.polyfit(df_diff_edge["ZapEnergy"], df_diff_edge[filename], 2) + diff_function = np.poly1d(params) + + x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=10000) + y_diff=diff_function(x_diff) + + df_diff_fit_function = pd.DataFrame(x_diff) + df_diff_fit_function['y_diff'] = y_diff + df_diff_fit_function.columns = ['x_diff', 'y_diff'] + + # Picks out the x-value where the y-value is at a maximum + edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] + + if options['log']: + aux.write_log(message=f"Edge shift estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + + + if options['double_diff']: + df_double_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']).diff(periods=options['periods']) + df_double_diff[filename]=df_double_diff[filename].shift(-int(options['periods'])) + + # Pick out region of interest + df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + + # Fitting a function to the chosen interval + params = np.polyfit(df_double_diff_edge["ZapEnergy"], df_double_diff_edge[filename], 2) + double_diff_function = np.poly1d(params) + + x_double_diff=np.linspace(df_double_diff_edge["ZapEnergy"].iloc[0], df_double_diff_edge["ZapEnergy"].iloc[-1],num=10000) + y_double_diff=double_diff_function(x_double_diff) + + df_double_diff_fit_function = pd.DataFrame(x_double_diff) + df_double_diff_fit_function['y_diff'] = y_double_diff + df_double_diff_fit_function.columns = ['x_diff', 'y_diff'] + + + # Picks out the x-value where the y-value is closest to 0 + edge_pos_double_diff=x_double_diff[np.where(y_double_diff == find_nearest(y_double_diff,0))][0] + + if options['log']: + aux.write_log(message=f"Edge shift estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))}", options=options) + + if options['save_plots']: + + if options['diff'] and options['double_diff']: + + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(20,20)) + df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax1) + ax1.set_xlim([edge_pos_diff-0.0015, edge_pos_diff+0.0015]) + ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + ax1.set_title('Fit region of differentiated data') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + ax2.set_title('Fit of differentiated data') + + + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) + ax3.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') + ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax3.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax4, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax4) + ax4.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax4.axvline(x=estimated_edge_pos, ls='--', c='red') + + + + + elif options['diff']: + fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) + df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + ax1.set_xlim([edge_pos_diff-0.5, edge_pos_diff+0.5]) + ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + + + elif options['double_diff']: + fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + ax1.set_xlim([edge_pos_double_diff-0.5, edge_pos_double_diff+0.5]) + ax1.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + ax2.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + + + if not options['diff']: + edge_pos_diff = None + if not options['double_diff']: + edge_pos_double_diff = None + + return edge_pos_diff, edge_pos_double_diff def normalization(data,options={}): required_options = ['print'] From ec1fba1c829a478d4e3ac3364617660dd9c37009 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 23 Jun 2022 11:46:06 +0200 Subject: [PATCH 27/56] Refactor normalisation and flattening functions --- nafuma/xanes/calib.py | 45 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index dfe5b07..5c111f5 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -491,25 +491,52 @@ def determine_edge_position(data: dict, options={}): return edge_pos_diff, edge_pos_double_diff -def normalization(data,options={}): - required_options = ['print'] +def normalise(data: dict, options={}): + required_options = ['log', 'logfile', 'save_values'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', + 'save_values': True } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #Finding the normalization constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 - normalization_constant=post_edge_fit_function(e0) - pre_edge_fit_function(e0) - - #subtracting background (as in pre_edge_subtraction) + normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) - #dividing the background-subtracted data with the normalization constant + #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 + for filename in data['path']: + normalisation_constant = data['post_edge_fit_function'][filename].loc[data['post_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] - data['pre_edge_fit_function'].loc[data['pre_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] + + normalised_df.insert(1, filename, data['xanes_data'] / normalisation_constant) + + if options['save_values']: + data['xanes_data'] = normalised_df + + + return normalised_df -def flattening(data,options={}): +def flatten(data:dict, options={}): #only picking out zapenergy-values higher than edge position (edge pos and below remains untouched) + + required_options = ['log', 'logfile', 'save_values'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_flattening.log', + 'save_values': True + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + df_e0_and_above=df.loc[df['ZapEnergy'] > edge_shift_diff] + flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) + + for filename in data['path']: + above_e0 = data['xanes_data'][filename].loc(data['xanes_data']['ZapEnergy'] > data['e0'][filename]) + flattened_data = data['post_edge_fit_function'][filename] - + + + flattened_data = post_edge_fit_function(df_e0_and_above['ZapEnergy']) - pre_edge_fit_function(df_e0_and_above['ZapEnergy']) #make a new dataframe with flattened values From 2b14a64c4bbab1590e6028a0f75ed5d699372ff6 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 23 Jun 2022 15:32:29 +0200 Subject: [PATCH 28/56] Attempt to get flattening and normalisation to behave properly --- nafuma/xanes/calib.py | 45 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 5c111f5..2b12c78 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -78,6 +78,8 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + data['pre_edge_params'] = {} + for i, filename in enumerate(data['path']): if options['log']: aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) @@ -85,6 +87,8 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) fit_function = np.poly1d(params) + + data['pre_edge_params'][filename] = params #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(pre_edge_fit_data["ZapEnergy"]) @@ -194,6 +198,8 @@ def post_edge_fit(data: dict, options={}): # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + + data['post_edge_params'] = {} for i, filename in enumerate(data['path']): if options['log']: @@ -202,6 +208,8 @@ def post_edge_fit(data: dict, options={}): #Fitting linear function to the background params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) fit_function = np.poly1d(params) + + data['post_edge_params'][filename] = params #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(post_edge_fit_data["ZapEnergy"]) @@ -341,8 +349,9 @@ def estimate_edge_position(data: dict, options={}, index=0): def determine_edge_position(data: dict, options={}): - required_options = ['log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] + required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] default_options = { + 'save_values': True, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', 'save_plots': False, @@ -366,6 +375,9 @@ def determine_edge_position(data: dict, options={}): df_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['double_diff']: df_double_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['save_values']: + data['e0'] = {} + for i, filename in enumerate(data['path']): estimated_edge_pos = estimate_edge_position(data, options=options, index=i) @@ -395,7 +407,10 @@ def determine_edge_position(data: dict, options={}): edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] if options['log']: - aux.write_log(message=f"Edge shift estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + + if options['save_values']: + data['e0'][filename] = edge_pos_diff if options['double_diff']: @@ -501,12 +516,21 @@ def normalise(data: dict, options={}): options = aux.update_options(options=options, required_options=required_options, default_options=default_options) normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) + data['normalisation_constants'] = {} #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: - normalisation_constant = data['post_edge_fit_function'][filename].loc[data['post_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] - data['pre_edge_fit_function'].loc[data['pre_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] + e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0'][filename])].index.values[0] + #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] + normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] + normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) - normalised_df.insert(1, filename, data['xanes_data'] / normalisation_constant) + + # Normalise the pre-edge and post-edge fit function data + data['pre_edge_fit_data'][filename] = data['pre_edge_fit_data'][filename] / normalisation_constant + data['post_edge_fit_data'][filename] = data['post_edge_fit_data'][filename] / normalisation_constant + + data['normalisation_constants'][filename] = normalisation_constant if options['save_values']: data['xanes_data'] = normalised_df @@ -527,17 +551,20 @@ def flatten(data:dict, options={}): options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_e0_and_above=df.loc[df['ZapEnergy'] > edge_shift_diff] - flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) for filename in data['path']: - above_e0 = data['xanes_data'][filename].loc(data['xanes_data']['ZapEnergy'] > data['e0'][filename]) - flattened_data = data['post_edge_fit_function'][filename] - + fit_function_diff = -data['post_edge_fit_data'][filename] + data['pre_edge_params'][filename][0] + fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0'][filename]] = 0 + + flattened_df[filename] = data['xanes_data'][filename] - fit_function_diff + if options['save_values']: + data['xanes_data'] = flattened_df + - flattened_data = post_edge_fit_function(df_e0_and_above['ZapEnergy']) - pre_edge_fit_function(df_e0_and_above['ZapEnergy']) + return flattened_df, fit_function_diff #make a new dataframe with flattened values From 726535c66fab28b9863a923be16b55e6a4955476 Mon Sep 17 00:00:00 2001 From: halvorhv Date: Fri, 24 Jun 2022 19:28:55 +0200 Subject: [PATCH 29/56] fixing the "add_rois"-option --- nafuma/xanes/io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index f623a38..87e5ac3 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -97,7 +97,7 @@ def split_scan_data(data: dict, options={}) -> list: edges[edge].append(xanes_df) - if options['add']: + if options['add_rois']: if options['log']: aux.write_log(message=f'... Addition of rois enabled. Starting addition...', options=options) @@ -142,7 +142,7 @@ def split_scan_data(data: dict, options={}) -> list: for edge, scans in edges.items(): for i, scan in enumerate(scans): - count = '' if options['add'] else '_'+str(i).zfill(4) + count = '' if options['add_rois'] else '_'+str(i).zfill(4) path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') if not os.path.isfile(path): From 931b3e42ae2aff55dbaa65b2cc6ab326b9d7104b Mon Sep 17 00:00:00 2001 From: halvorhv Date: Fri, 24 Jun 2022 19:39:11 +0200 Subject: [PATCH 30/56] adding a fixme for split_scan_data-function --- nafuma/xanes/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 87e5ac3..9676608 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -127,7 +127,7 @@ def split_scan_data(data: dict, options={}) -> list: edges = added_edges if options['save']: - + #FIXME If there is something wrong with the input file, the file will not be saved but log-file still sais it is saved. Goes from "Saving data to ..." to "All done!" no matter if it fals or not. if options['log']: aux.write_log(message=f'... Saving data to {options["save_folder"]}', options=options) From 8e0d8f486155cab13011287398009ecaf4db578a Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 27 Jun 2022 12:20:49 +0200 Subject: [PATCH 31/56] ADd interactive mode for pre and post edge fitting --- nafuma/xanes/calib.py | 129 +++++++++++++++++++++++++++++++----------- 1 file changed, 96 insertions(+), 33 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 2b12c78..911af8d 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -5,10 +5,13 @@ import numpy as np import os import matplotlib.pyplot as plt import nafuma.auxillary as aux +import nafuma.plotting as btp import nafuma.xanes as xas import nafuma.xanes.io as io from scipy.signal import savgol_filter from datetime import datetime +import ipywidgets as widgets +from IPython.display import display ##Better to make a new function that loops through the files, and performing the split_xanes_scan on @@ -43,13 +46,15 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['pre_edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] + required_options = ['pre_edge_limit', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'interactive'] default_options = { - 'pre_edge_start': None, + 'pre_edge_limit': [None, None], 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', + 'show_plots': False, 'save_plots': False, - 'save_folder': './' + 'save_folder': './', + 'interactive': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -57,23 +62,34 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if options['log']: aux.write_log(message='Starting pre edge fit', options=options) - - # FIXME Implement with finding accurate edge position # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit - if not options['pre_edge_start']: + if not options['pre_edge_limit'][0]: + options['pre_edge_limit'][0] = data['xanes_data_original']['ZapEnergy'].min() + + if not options['pre_edge_limit'][1]: pre_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - pre_edge_limit = edge_position - pre_edge_limit_offset + options['pre_edge_limit'][1] = edge_position - pre_edge_limit_offset + + # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session + if options['interactive']: + options['interactive'] = False + options['interactive_session_active'] = True + options['show_plots'] = True + pre_edge_fit_interactive(data=data, options=options) + return + + # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < pre_edge_limit] + pre_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['pre_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['pre_edge_limit'][1])] # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -96,16 +112,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #adding a new column in df_background with the y-values of the background pre_edge_fit_data.insert(1,filename,background) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) - - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' - - fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + if options['show_plots'] or options['save_plots']: + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,10)) data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.axvline(x = min(pre_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) @@ -115,9 +127,15 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - plt.savefig(dst, transparent=False) - plt.close() + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' + plt.savefig(dst, transparent=False) + + if not options['show_plots']: + plt.close() if options['log']: @@ -127,6 +145,21 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: +def pre_edge_fit_interactive(data: dict, options: dict) -> None: + + + w = widgets.interactive( + btp.ipywidgets_update, func=widgets.fixed(pre_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), + pre_edge_limit=widgets.FloatRangeSlider(value=[options['pre_edge_limit'][0], options['pre_edge_limit'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + ) + + options['widget'] = w + + display(w) + + + + def pre_edge_subtraction(data: dict, options={}): required_options = ['log', 'logfile', 'save_plots', 'save_folder'] @@ -171,30 +204,43 @@ def pre_edge_subtraction(data: dict, options={}): def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['log', 'logfile', 'post_edge_interval'] + required_options = ['log', 'logfile', 'post_edge_limit', 'interactive'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', - 'post_edge_interval': [None, None], + 'post_edge_limit': [None, None], + 'interactive': False, + 'show_plots': False, + 'save_plots': False, + 'save_folder': './', } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - if not options['post_edge_interval'][0]: + if not options['post_edge_limit'][0]: post_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - options['post_edge_interval'][0] = edge_position + post_edge_limit_offset + options['post_edge_limit'][0] = edge_position + post_edge_limit_offset - if not options['post_edge_interval'][1]: - options['post_edge_interval'][1] = data['xanes_data_original']['ZapEnergy'].max() + if not options['post_edge_limit'][1]: + options['post_edge_limit'][1] = data['xanes_data_original']['ZapEnergy'].max() + + # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session + if options['interactive']: + options['interactive'] = False + options['interactive_session_active'] = True + options['show_plots'] = True + post_edge_fit_interactive(data=data, options=options) + return - post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_interval'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_interval'][1])] - post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit + + post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_limit'][1])] + post_edge_data = post_edge_data.dropna() #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -217,16 +263,14 @@ def post_edge_fit(data: dict, options={}): #adding a new column in df_background with the y-values of the background post_edge_fit_data.insert(1,filename,background) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) + if options['save_plots'] or options['show_plots']: - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_post_edge_fit.png' - fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,10)) data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax1.axvline(x = min(post_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) @@ -236,13 +280,32 @@ def post_edge_fit(data: dict, options={}): ax2.set_ylim([min(post_edge_data[filename]), max(post_edge_data[filename])]) ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - plt.savefig(dst, transparent=False) - plt.close() + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_post_edge_fit.png' + + plt.savefig(dst, transparent=False) + + if not options['show_plots']: + plt.close() return post_edge_fit_data + +def post_edge_fit_interactive(data: dict, options: dict) -> None: + + w = widgets.interactive( + btp.ipywidgets_update, func=widgets.fixed(post_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), + post_edge_limit=widgets.FloatRangeSlider(value=[options['post_edge_limit'][0], options['post_edge_limit'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + ) + + options['widget'] = w + + display(w) + def smoothing(data: dict, options={}): # FIXME Add logging @@ -295,7 +358,7 @@ def smoothing(data: dict, options={}): ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) elif not options['save_default']: - fig, ax = plt.subplots(figsize=(10,5)) + fig, ax = plt.subplots(figsize=(20,10)) data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax, kind='scatter') df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax) ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) From 537c7b3c5ad4d8eaa8c36f33f8b7752d6029d075 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 27 Jun 2022 13:34:18 +0200 Subject: [PATCH 32/56] Add masks to pre and post edge fitting --- nafuma/xanes/calib.py | 47 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 911af8d..2e03086 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -46,14 +46,16 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['pre_edge_limit', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'interactive'] + required_options = ['pre_edge_limit', 'masks', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive'] default_options = { 'pre_edge_limit': [None, None], + 'masks': [], 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'show_plots': False, 'save_plots': False, 'save_folder': './', + 'ylim': [None, None], 'interactive': False } @@ -67,6 +69,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit if not options['pre_edge_limit'][0]: options['pre_edge_limit'][0] = data['xanes_data_original']['ZapEnergy'].min() + if not options['pre_edge_limit'][1]: pre_edge_limit_offset = 0.03 @@ -89,8 +92,13 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['pre_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['pre_edge_limit'][1])] - + pre_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['pre_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['pre_edge_limit'][1])].copy() + + for mask in options['masks']: + pre_edge_data.loc[(pre_edge_data['ZapEnergy'] > mask[0]) & (pre_edge_data['ZapEnergy'] < mask[1])] = np.nan + + pre_edge_data = pre_edge_data.dropna() + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -119,6 +127,14 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') ax1.axvline(x = min(pre_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + if options['ylim'][0] != None: + ax1.set_ylim(bottom=options['ylim'][0]) + if options['ylim'][1]: + ax1.set_ylim(top=options['ylim'][1]) + + for mask in options['masks']: + ax1.fill_between(x=mask, y1=0, y2=data['xanes_data_original'][filename].max()*2, alpha=0.2, color='black') data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) @@ -204,11 +220,13 @@ def pre_edge_subtraction(data: dict, options={}): def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['log', 'logfile', 'post_edge_limit', 'interactive'] + required_options = ['log', 'logfile', 'masks', 'post_edge_limit', 'interactive', 'show_plots', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', 'post_edge_limit': [None, None], + 'masks': [], + 'polyorder': 2, 'interactive': False, 'show_plots': False, 'save_plots': False, @@ -239,7 +257,11 @@ def post_edge_fit(data: dict, options={}): - post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_limit'][1])] + post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_limit'][1])].copy() + + for mask in options['masks']: + post_edge_data.loc[(post_edge_data['ZapEnergy'] > mask[0]) & (post_edge_data['ZapEnergy'] < mask[1])] = np.nan + post_edge_data = post_edge_data.dropna() #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data @@ -249,12 +271,15 @@ def post_edge_fit(data: dict, options={}): for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) + aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])}) with polynomial order {options["polyorder"]}', options=options) #Fitting linear function to the background - params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) + params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], options['polyorder']) fit_function = np.poly1d(params) + if options['log']: + aux.write_log(message=f'Post edge fitted with parameters: {params}') + data['post_edge_params'][filename] = params #making a list, y_pre,so the background will be applied to all ZapEnergy-values @@ -273,6 +298,14 @@ def post_edge_fit(data: dict, options={}): ax1.axvline(x = min(post_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + for mask in options['masks']: + ax1.fill_between(x=mask, y1=0, y2=data['xanes_data_original'][filename].max()*2, alpha=0.2, color='black') + + if options['ylim'][0] != None: + ax1.set_ylim(bottom=options['ylim'][0]) + if options['ylim'][1] != None: + ax1.set_ylim(top=options['ylim'][1]) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') From 8c2723ee552bdc642aa984d0fca8df8a21f93b6f Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 27 Jun 2022 16:16:46 +0200 Subject: [PATCH 33/56] Plot full scan with computed edge position --- nafuma/xanes/calib.py | 139 +++++++++++++++++++++++++++--------------- 1 file changed, 90 insertions(+), 49 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 2e03086..66c4229 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -220,7 +220,7 @@ def pre_edge_subtraction(data: dict, options={}): def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['log', 'logfile', 'masks', 'post_edge_limit', 'interactive', 'show_plots', 'save_plots', 'save_folder'] + required_options = ['log', 'logfile', 'masks', 'post_edge_limit', 'polyorder', 'interactive', 'show_plots', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', @@ -444,24 +444,31 @@ def estimate_edge_position(data: dict, options={}, index=0): return estimated_edge_shift def determine_edge_position(data: dict, options={}): + ''' Determines the edge position by 1) first differential maximum and/or 2) second differential zero-point. Calculates differential and/or double differential by periods''' - required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] + required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'fit_region'] default_options = { 'save_values': True, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', + 'show_plots': False, 'save_plots': False, 'save_folder': './', - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly, 'diff': True, + 'diff.polyorder': 2, + 'diff.periods': 2, #Periods needs to be an even number for the shifting of values to work properly, 'double_diff': False, - 'fit_region': 0.0005 + 'double_diff.polyorder': 2, + 'double_diff.periods': 2, #Periods needs to be an even number for the shifting of values to work properly, + 'fit_region': None # The length of the region to find points to fit to a function } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - if options['periods'] % 2 == 1: + if options['diff'] and options['diff.periods'] % 2 != 0: + raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") + if options['double_diff'] and options['double_diff.periods'] % 2 != 0: raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") @@ -478,18 +485,21 @@ def determine_edge_position(data: dict, options={}): for i, filename in enumerate(data['path']): estimated_edge_pos = estimate_edge_position(data, options=options, index=i) - - #========================== fitting first differential ========== + if not options['fit_region']: + options['fit_region'] = (5)*(data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0]) + + + #========================== Fitting first differential ========== if options['diff']: df_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']) df_diff[filename]=df_diff[filename].shift(-int(options['periods']/2)) - df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] <= estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] >= estimated_edge_pos-options['fit_region']))] # Fitting a function to the chosen interval - params = np.polyfit(df_diff_edge["ZapEnergy"], df_diff_edge[filename], 2) + params = np.polyfit(df_diff_edge["ZapEnergy"], df_diff_edge[filename], options['diff.polyorder']) diff_function = np.poly1d(params) x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=10000) @@ -503,7 +513,7 @@ def determine_edge_position(data: dict, options={}): edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] if options['log']: - aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))} keV", options=options) if options['save_values']: data['e0'][filename] = edge_pos_diff @@ -517,7 +527,7 @@ def determine_edge_position(data: dict, options={}): df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] # Fitting a function to the chosen interval - params = np.polyfit(df_double_diff_edge["ZapEnergy"], df_double_diff_edge[filename], 2) + params = np.polyfit(df_double_diff_edge["ZapEnergy"], df_double_diff_edge[filename], options['double_diff.polyorder']) double_diff_function = np.poly1d(params) x_double_diff=np.linspace(df_double_diff_edge["ZapEnergy"].iloc[0], df_double_diff_edge["ZapEnergy"].iloc[-1],num=10000) @@ -532,67 +542,98 @@ def determine_edge_position(data: dict, options={}): edge_pos_double_diff=x_double_diff[np.where(y_double_diff == find_nearest(y_double_diff,0))][0] if options['log']: - aux.write_log(message=f"Edge shift estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))}", options=options) + aux.write_log(message=f"Edge position estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))} keV", options=options) - if options['save_plots']: + if options['diff']: + aux.write_log(message=f"Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.") + + if options['save_plots'] or options['show_plots']: if options['diff'] and options['double_diff']: - - fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(20,20)) - df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') - df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax1) - ax1.set_xlim([edge_pos_diff-0.0015, edge_pos_diff+0.0015]) - ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + + fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_diff, ls='--', c='green') - ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') - ax1.set_title('Fit region of differentiated data') - - df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + + df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_diff-0.0015, edge_pos_diff+0.0015]) + ax2.axvline(x=estimated_edge_pos-options['fit_region'], ls='--', c='black') ax2.axvline(x=edge_pos_diff, ls='--', c='green') - ax2.axvline(x=estimated_edge_pos, ls='--', c='red') - ax2.set_title('Fit of differentiated data') + ax2.axvline(x=estimated_edge_pos+options['fit_region'], ls='--', c='black') + ax2.set_title('Fit region of differentiated data') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + ax3.set_title('Fit of differentiated data') - df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) - ax3.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) - ax3.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') - ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax3.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') - - df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax4, kind='scatter') - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax4) + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax4, c='black') ax4.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax4.axvline(x=estimated_edge_pos, ls='--', c='red') + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax5, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax5) + ax5.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) + ax5.axvline(x=estimated_edge_pos-options['fit_region'], ls='--', c='black') + ax5.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax5.axvline(x=estimated_edge_pos+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax6, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax6) + ax6.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax6.axvline(x=estimated_edge_pos, ls='--', c='red') elif options['diff']: - fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) - df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') - ax1.set_xlim([edge_pos_diff-0.5, edge_pos_diff+0.5]) - ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_diff, ls='--', c='green') - ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') - df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_diff-0.5, edge_pos_diff+0.5]) + ax2.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') ax2.axvline(x=edge_pos_diff, ls='--', c='green') - ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + ax2.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') elif options['double_diff']: - fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) - df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') - ax1.set_xlim([edge_pos_double_diff-0.5, edge_pos_double_diff+0.5]) - ax1.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') + fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax1.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') - df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_double_diff-0.5, edge_pos_double_diff+0.5]) + ax2.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') ax2.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + ax2.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_edge_position.png' + + plt.savefig(dst, transparent=False) + + if not options['show_plots']: + plt.close() if not options['diff']: From 1e147854a7173ca0d4191f99b64cc5d46726aa06 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 27 Jun 2022 16:43:42 +0200 Subject: [PATCH 34/56] Update documentation for determination of edge position --- nafuma/xanes/calib.py | 76 ++++++++++++++++++++++++++++--------------- 1 file changed, 50 insertions(+), 26 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 66c4229..d6fecc2 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -444,44 +444,56 @@ def estimate_edge_position(data: dict, options={}, index=0): return estimated_edge_shift def determine_edge_position(data: dict, options={}): - ''' Determines the edge position by 1) first differential maximum and/or 2) second differential zero-point. Calculates differential and/or double differential by periods''' + ''' Determines the edge position by 1) first differential maximum and/or 2) second differential zero-point. Calculates differential and/or double differential by diff.periods and double_diff.periods respectively. + The differentiated and/or doubly differentiated data is fitted to a polynomial of diff.polyorder and/or double_diff.polyorder around the estimated edge position. The estimated edge position is set to be the x-value of the data + point at maximum of the differentiated data. The region to be fitted to the polynomial is determined by fit_region, which defaults to 5 times the distance between two data points, giving five data points to fit to. + + Allows plotting and saving of three plots to assess the quality of the fit, and also allows logging. + + Requires that XANES-data is already loaded in data['xanes_data']. This allows the user to choose when to determine the edge position - whether before or after normalisation, flattening etc.''' required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'fit_region'] default_options = { - 'save_values': True, - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', - 'show_plots': False, - 'save_plots': False, - 'save_folder': './', - 'diff': True, - 'diff.polyorder': 2, - 'diff.periods': 2, #Periods needs to be an even number for the shifting of values to work properly, - 'double_diff': False, - 'double_diff.polyorder': 2, - 'double_diff.periods': 2, #Periods needs to be an even number for the shifting of values to work properly, + 'save_values': True, # Whether the edge positions should be stored in a dictionary within the main data dictionary. + 'log': False, # Toggles logging on/off + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', # Sets the path to the logfile. Ignored if log == False + 'show_plots': False, # Toggles on/off whether plots should be shown. For sequential data, saving the plots and inspecting them there is probably better. + 'save_plots': False, # Toggles on/off whether plots should be saved. + 'save_folder': './', # Sets the path to where the plots should be saved. Creates folder if doesn't exist. Ignored if save_plots == False + 'diff': True, # Toggles calculation of the edge position based on differential data + 'diff.polyorder': 2, # Sets the order of the polynomial to fit edge region of the differential to + 'diff.periods': 2, # Sets the number of data points between which the first order difference should be calculated. Needs to be even for subsequent shifting of data to function. + 'double_diff': False, # Toggles calculation of the edge position based on double differential data + 'double_diff.polyorder': 2, # Sets the order of the polynomial to fit edge region of the double differential to + 'double_diff.periods': 2, # Sets the number of data points between which the second order difference should be calculated. Needs to be even for subsequent shifting of data to function. 'fit_region': None # The length of the region to find points to fit to a function - } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + # Check if periods are even if options['diff'] and options['diff.periods'] % 2 != 0: + if options['log']: + aux.write_log(message='Periods for differentiation is not even. Ending run.', options=options) raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") if options['double_diff'] and options['double_diff.periods'] % 2 != 0: + aux.write_log(message='Periods for double differentiation is not even. Ending run.', options=options) raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") - ##### - + + # Prepare dataframes for differential data if options['diff']: df_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['double_diff']: df_double_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['save_values']: - data['e0'] = {} + data['e0_diff'] = {} + data['e0_double_diff'] = {} + # Get rough estimate of edge position for i, filename in enumerate(data['path']): estimated_edge_pos = estimate_edge_position(data, options=options, index=i) @@ -489,12 +501,13 @@ def determine_edge_position(data: dict, options={}): options['fit_region'] = (5)*(data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0]) - #========================== Fitting first differential ========== + #========================== Fitting the first order derivative ========== if options['diff']: - df_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']) - df_diff[filename]=df_diff[filename].shift(-int(options['periods']/2)) + df_diff[filename] = data['xanes_data'][filename].diff(periods=options['diff.periods']) + df_diff[filename]=df_diff[filename].shift(-int(options['diff.periods']/2)) # Shifts the data back so that the difference between the points is located in the middle of the two points the caluclated difference is between + # Picks out the points to be fitted df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] <= estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] >= estimated_edge_pos-options['fit_region']))] @@ -516,12 +529,12 @@ def determine_edge_position(data: dict, options={}): aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))} keV", options=options) if options['save_values']: - data['e0'][filename] = edge_pos_diff - + data['e0_diff'][filename] = edge_pos_diff + #========================== Fitting the second order derivative ========== if options['double_diff']: - df_double_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']).diff(periods=options['periods']) - df_double_diff[filename]=df_double_diff[filename].shift(-int(options['periods'])) + df_double_diff[filename] = data['xanes_data'][filename].diff(periods=options['double_diff.periods']).diff(periods=options['double_diff.periods']) + df_double_diff[filename]=df_double_diff[filename].shift(-int(options['double_diff.periods'])) # Pick out region of interest df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] @@ -547,8 +560,15 @@ def determine_edge_position(data: dict, options={}): if options['diff']: aux.write_log(message=f"Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.") + if options['save_values']: + data['e0_double_diff'][filename] = edge_pos_double_diff + + + # Make and show / save plots if options['save_plots'] or options['show_plots']: + + # If both are enabled if options['diff'] and options['double_diff']: fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) @@ -586,7 +606,7 @@ def determine_edge_position(data: dict, options={}): ax6.axvline(x=estimated_edge_pos, ls='--', c='red') - + # If only first order differentials is enabled elif options['diff']: fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) @@ -605,7 +625,7 @@ def determine_edge_position(data: dict, options={}): ax3.axvline(x=edge_pos_diff, ls='--', c='green') ax3.axvline(x=estimated_edge_pos, ls='--', c='red') - + # If only second order differentials is enabled elif options['double_diff']: fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) @@ -624,6 +644,8 @@ def determine_edge_position(data: dict, options={}): ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + + # Save plots if toggled if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) @@ -632,6 +654,8 @@ def determine_edge_position(data: dict, options={}): plt.savefig(dst, transparent=False) + + # Close plots if show_plots not toggled if not options['show_plots']: plt.close() From cc3c4dc5b65206b2b36323b26f96d3560e383568 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 27 Jun 2022 20:46:01 +0200 Subject: [PATCH 35/56] Add interactive to smoothing + documentation updates --- nafuma/xanes/calib.py | 145 +++++++++++++++++++++++++++++------------- 1 file changed, 101 insertions(+), 44 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index d6fecc2..9bb13f4 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -48,8 +48,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: required_options = ['pre_edge_limit', 'masks', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive'] default_options = { - 'pre_edge_limit': [None, None], - 'masks': [], + 'pre_edge_limits': [None, None], + 'pre_edge_masks': [], + 'pre_edge_polyorder': 1, + 'pre_edge_save_data': False, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'show_plots': False, @@ -67,17 +69,17 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit - if not options['pre_edge_limit'][0]: - options['pre_edge_limit'][0] = data['xanes_data_original']['ZapEnergy'].min() + if not options['pre_edge_limits'][0]: + options['pre_edge_limits'][0] = data['xanes_data_original']['ZapEnergy'].min() - if not options['pre_edge_limit'][1]: + if not options['pre_edge_limits'][1]: pre_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - options['pre_edge_limit'][1] = edge_position - pre_edge_limit_offset + options['pre_edge_limits'][1] = edge_position - pre_edge_limit_offset # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session if options['interactive']: @@ -92,9 +94,9 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - pre_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['pre_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['pre_edge_limit'][1])].copy() + pre_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['pre_edge_limits'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['pre_edge_limits'][1])].copy() - for mask in options['masks']: + for mask in options['pre_edge_masks']: pre_edge_data.loc[(pre_edge_data['ZapEnergy'] > mask[0]) & (pre_edge_data['ZapEnergy'] < mask[1])] = np.nan pre_edge_data = pre_edge_data.dropna() @@ -106,10 +108,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) + aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1}/{len(data["path"])})', options=options) #Fitting linear function to the background - params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) + params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],options['pre_edge_polyorder']) fit_function = np.poly1d(params) data['pre_edge_params'][filename] = params @@ -133,7 +135,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if options['ylim'][1]: ax1.set_ylim(top=options['ylim'][1]) - for mask in options['masks']: + for mask in options['pre_edge_masks']: ax1.fill_between(x=mask, y1=0, y2=data['xanes_data_original'][filename].max()*2, alpha=0.2, color='black') data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) @@ -155,7 +157,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if options['log']: - aux.write_log(message=f'Pre edge fitting done.', options=options) + aux.write_log(message=f'Pre edge fitting done.', options=options) + + if options['pre_edge_save_data']: + data['pre_edge_fit_data'] = pre_edge_fit_data return pre_edge_fit_data @@ -166,7 +171,7 @@ def pre_edge_fit_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(pre_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - pre_edge_limit=widgets.FloatRangeSlider(value=[options['pre_edge_limit'][0], options['pre_edge_limit'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + pre_edge_limits=widgets.FloatRangeSlider(value=[options['pre_edge_limits'][0], options['pre_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) ) options['widget'] = w @@ -219,33 +224,40 @@ def pre_edge_subtraction(data: dict, options={}): def post_edge_fit(data: dict, options={}): - #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['log', 'logfile', 'masks', 'post_edge_limit', 'polyorder', 'interactive', 'show_plots', 'save_plots', 'save_folder'] + ''' Fit the post edge within the post_edge.limits to a polynomial of post_edge.polyorder order. Allows interactive plotting, as well as showing static plots and saving plots to drive. + + Requires data to have already been read to data['xanes_data_original'] + ''' + + + required_options = ['log', 'logfile', 'post_edge_masks', 'post_edge_limits', 'post_edge_polyorder', 'interactive', 'show_plots', 'save_plots', 'save_folder'] default_options = { - 'log': False, + 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', - 'post_edge_limit': [None, None], - 'masks': [], - 'polyorder': 2, + 'post_edge_limits': [None, None], + 'post_edge_masks': [], + 'post_edge_polyorder': 2, + 'post_edge_save_data': False, 'interactive': False, 'show_plots': False, 'save_plots': False, 'save_folder': './', + 'ylim': [None, None] } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - if not options['post_edge_limit'][0]: + if not options['post_edge_limits'][0]: post_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - options['post_edge_limit'][0] = edge_position + post_edge_limit_offset + options['post_edge_limits'][0] = edge_position + post_edge_limit_offset - if not options['post_edge_limit'][1]: - options['post_edge_limit'][1] = data['xanes_data_original']['ZapEnergy'].max() + if not options['post_edge_limits'][1]: + options['post_edge_limits'][1] = data['xanes_data_original']['ZapEnergy'].max() # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session if options['interactive']: @@ -257,9 +269,9 @@ def post_edge_fit(data: dict, options={}): - post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_limit'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_limit'][1])].copy() + post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_limits'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_limits'][1])].copy() - for mask in options['masks']: + for mask in options['post_edge_masks']: post_edge_data.loc[(post_edge_data['ZapEnergy'] > mask[0]) & (post_edge_data['ZapEnergy'] < mask[1])] = np.nan post_edge_data = post_edge_data.dropna() #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit @@ -298,7 +310,7 @@ def post_edge_fit(data: dict, options={}): ax1.axvline(x = min(post_edge_data['ZapEnergy']), ls='--') ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - for mask in options['masks']: + for mask in options['post_edge_masks']: ax1.fill_between(x=mask, y1=0, y2=data['xanes_data_original'][filename].max()*2, alpha=0.2, color='black') if options['ylim'][0] != None: @@ -325,14 +337,22 @@ def post_edge_fit(data: dict, options={}): plt.close() + if options['log']: + aux.write_log(message='Post edge fitting done!', options=options) + + if options['post_edge_save_data']: + data['post_edge_fit_data'] = post_edge_fit_data + + return post_edge_fit_data def post_edge_fit_interactive(data: dict, options: dict) -> None: + ''' Defines the widgets to use with the ipywidgets interactive mode and calls the update function found in btp.ipywidgets. ''' w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(post_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - post_edge_limit=widgets.FloatRangeSlider(value=[options['post_edge_limit'][0], options['post_edge_limit'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + post_edge_limit=widgets.FloatRangeSlider(value=[options['post_edge.limits'][0], options['post_edge.limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) ) options['widget'] = w @@ -344,43 +364,59 @@ def smoothing(data: dict, options={}): # FIXME Add logging # FIXME Add saving of files - required_options = ['log', 'logfile', 'window_length','polyorder', 'save_default'] + required_options = ['log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'smooth_window_length', 'smooth_algorithm', 'smooth_polyorder', 'smooth_save_default'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', 'save_plots': False, 'save_folder': './', - 'window_length': 3, - 'polyorder': 2, - 'save_default': False + 'smooth_window_length': 3, + 'smooth_polyorder': 2, + 'smooth_algorithm': 'savgol', # At the present, only Savitzky-Golay filter is implemented. Add Gaussian and Boxcar later. + 'smooth_save_default': False, } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) df_smooth = pd.DataFrame(data['xanes_data']['ZapEnergy']) - if options['save_default']: + if options['smooth_save_default']: df_smooth_default = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['log']: + aux.write_log(message='Starting smoothing.') + + + if options['interactive']: + options['interactive'] = False + options['interactive_session_active'] = True + options['show_plots'] = True + smoothing_interactive(data=data, options=options) + return + + # FIXME Add other types of filters # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should # then be returned - for filename in data['path']: - df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder'])) + for i, filename in enumerate(data['path']): + + if options['smooth_algorithm'] == 'savgol': + if options['log']: + aux.write_log(message=f'Smoothing {filename} with algorithm: {options["smooth_algorithm"]} ({i+1}/{len(data["path"])})', options=options) + df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['smooth_window_length'], options['smooth_polyorder'])) - if options['save_default']: - df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder'])) + if options['smooth_save_default']: + if options['smooth.algorithm'] == 'savgol': + if options['log']: + aux.write_log(message=f'Smoothing {filename} using default parameters with algorithm: {options["smooth_algorithm"]} ({i+1}/{len(data["path"])})', options=options) + df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['smooth_window_length'], default_options['smooth_polyorder'])) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) - - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' + if options['save_plots'] or options['show_plots']: edge_pos = estimate_edge_position(data=data, options=options) intensity_midpoint = df_smooth[filename].iloc[np.where(df_smooth['ZapEnergy'] == find_nearest(df_smooth['ZapEnergy'], edge_pos))].values[0] - if options['save_default']: + if options['smooth_save_default']: fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax1) @@ -399,9 +435,15 @@ def smoothing(data: dict, options={}): ax.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - plt.savefig(dst, transparent=False) - plt.close() + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' + plt.savefig(dst, transparent=False) + + if not options['show_plots']: + plt.close() if not options['save_default']: df_smooth_default = None @@ -410,6 +452,21 @@ def smoothing(data: dict, options={}): +def smoothing_interactive(data: dict, options: dict) -> None: + ''' Defines the widgets to use with the ipywidgets interactive mode and calls the update function found in btp.ipywidgets. ''' + + w = widgets.interactive( + btp.ipywidgets_update, func=widgets.fixed(smoothing), data=widgets.fixed(data), options=widgets.fixed(options), + smooth_window_length=widgets.IntSlider(value=options['smooth_window_length'], min=1, max=20, step=1), + smooth_polyorder=widgets.IntSlider(value=options['smooth_polyorder'], min=1, max=5, step=1), + ) + + + options['widget'] = w + + display(w) + + def find_nearest(array, value): #function to find the value closes to "value" in an "array" array = np.asarray(array) From c522b73ca4170e639487564fd51daf98acf09b65 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 28 Jun 2022 12:10:46 +0200 Subject: [PATCH 36/56] Add K-edges from ITC Vol C --- nafuma/xanes/__init__.py | 2 +- nafuma/xanes/edges.py | 27 +++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) create mode 100644 nafuma/xanes/edges.py diff --git a/nafuma/xanes/__init__.py b/nafuma/xanes/__init__.py index b11c1f3..a3834e8 100644 --- a/nafuma/xanes/__init__.py +++ b/nafuma/xanes/__init__.py @@ -1 +1 @@ -from . import io, calib \ No newline at end of file +from . import io, calib, edges \ No newline at end of file diff --git a/nafuma/xanes/edges.py b/nafuma/xanes/edges.py new file mode 100644 index 0000000..dc02601 --- /dev/null +++ b/nafuma/xanes/edges.py @@ -0,0 +1,27 @@ +import pandas as pd +import numpy as np +from scipy.constants import c, h + +# From 2019 redefinition of SI base units: https://en.wikipedia.org/wiki/2019_redefinition_of_the_SI_base_units +keV_per_J = (1 / 1.602176634e-19) / 1000 + +# kXu values taken from International Tables for Crystallography Volume , Kulwer Academic Publishers - Dordrect / Boston / London (1992) +k_edge = { 'Z': [ 1, 2, + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, + 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48], + 'Atom': [ 'H', 'He', + 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', + 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', + 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', + 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd'], + 'kXu': [ np.nan, np.nan, + 226.5, np.nan, np.nan, 43.68, 30.99, 23.32, np.nan, np.nan, + np.nan, 9.5117, 7.9511, 6.7446, 5.7866, 5.0182, 4.3969, 3.8707, + 3.43645, 3.07016, 2.7573, 2.49730, 2.26902, 2.07012, 1.89636, 1.74334, 1.60811, 1.48802, 1.38043, 1.2833, 1.19567, 1.11652, 1.04497, 0.97978, 0.91995, 0.86547, + 0.81549, 0.76969, 0.72762, 0.68877, 0.65291, 0.61977, 0.5891, 0.56047, 0.53378, 0.50915, 0.48582, 0.46409]} + + +k_edge = pd.DataFrame(k_edge) +k_edge['keV'] = np.round(h*c/(k_edge['kXu']*10**-10) * keV_per_J, 3) \ No newline at end of file From 6bbd6776b896bea21f4f6bfa846a6aecee7c0e59 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 29 Jun 2022 15:26:43 +0200 Subject: [PATCH 37/56] Tweaks based on workflow testing --- nafuma/auxillary.py | 14 +- nafuma/xanes/calib.py | 334 +++++++++++++++++++++++++----------------- 2 files changed, 214 insertions(+), 134 deletions(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 2b87479..0ccde1f 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -12,11 +12,21 @@ def update_options(options, required_options, default_options): return options -def save_options(options, path): +def save_options(options, path, ignore=None): ''' Saves any options dictionary to a JSON-file in the specified path''' + options_copy = options.copy() + + if ignore: + if not isinstance(ignore, list): + ignore = [ignore] + + for i in ignore: + options_copy[i] = 'Removed' + + with open(path, 'w') as f: - json.dump(options,f) + json.dump(options_copy,f, skipkeys=True, indent=4) def load_options(path): diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 9bb13f4..22d63eb 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -46,12 +46,12 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['pre_edge_limit', 'masks', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive'] + required_options = ['pre_edge_limits', 'pre_edge_masks', 'pre_edge_polyorder', 'pre_edge_store_data', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive'] default_options = { 'pre_edge_limits': [None, None], 'pre_edge_masks': [], 'pre_edge_polyorder': 1, - 'pre_edge_save_data': False, + 'pre_edge_store_data': False, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'show_plots': False, @@ -159,7 +159,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: if options['log']: aux.write_log(message=f'Pre edge fitting done.', options=options) - if options['pre_edge_save_data']: + if options['pre_edge_store_data']: data['pre_edge_fit_data'] = pre_edge_fit_data return pre_edge_fit_data @@ -171,7 +171,8 @@ def pre_edge_fit_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(pre_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - pre_edge_limits=widgets.FloatRangeSlider(value=[options['pre_edge_limits'][0], options['pre_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + pre_edge_limits=widgets.FloatRangeSlider(value=[options['pre_edge_limits'][0], options['pre_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001), + pre_edge_store_data=widgets.Checkbox(value=options['pre_edge_store_data']) ) options['widget'] = w @@ -183,12 +184,14 @@ def pre_edge_fit_interactive(data: dict, options: dict) -> None: def pre_edge_subtraction(data: dict, options={}): - required_options = ['log', 'logfile', 'save_plots', 'save_folder'] + required_options = ['log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'pre_edge_subtraction_store_data'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_subtraction.log', + 'show_plots': False, 'save_plots': False, - 'save_folder': './' + 'save_folder': './', + 'pre_edge_subtraction_store_data': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -203,19 +206,28 @@ def pre_edge_subtraction(data: dict, options={}): xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data_original'][filename] - data['pre_edge_fit_data'][filename]) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) + if options['save_plots'] or options['show_plots']: - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_subtraction.png' fig, ax = plt.subplots(figsize=(10,5)) - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) - xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax, label='Original data') + xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax, label='Pre edge subtracted') ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) - plt.savefig(dst) - plt.close() + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_subtraction.png' + + plt.savefig(dst) + + if not options['show_plots']: + plt.close() + + if options['pre_edge_subtraction_store_data']: + data['xanes_data'] = xanes_data_bkgd_subtracted return xanes_data_bkgd_subtracted @@ -230,14 +242,14 @@ def post_edge_fit(data: dict, options={}): ''' - required_options = ['log', 'logfile', 'post_edge_masks', 'post_edge_limits', 'post_edge_polyorder', 'interactive', 'show_plots', 'save_plots', 'save_folder'] + required_options = ['log', 'logfile', 'post_edge_masks', 'post_edge_limits', 'post_edge_polyorder', 'post_edge_store_data', 'interactive', 'show_plots', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', 'post_edge_limits': [None, None], 'post_edge_masks': [], 'post_edge_polyorder': 2, - 'post_edge_save_data': False, + 'post_edge_store_data': False, 'interactive': False, 'show_plots': False, 'save_plots': False, @@ -283,10 +295,10 @@ def post_edge_fit(data: dict, options={}): for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])}) with polynomial order {options["polyorder"]}', options=options) + aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])}) with polynomial order {options["post_edge_polyorder"]}', options=options) #Fitting linear function to the background - params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], options['polyorder']) + params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], options['post_edge_polyorder']) fit_function = np.poly1d(params) if options['log']: @@ -340,7 +352,7 @@ def post_edge_fit(data: dict, options={}): if options['log']: aux.write_log(message='Post edge fitting done!', options=options) - if options['post_edge_save_data']: + if options['post_edge_store_data']: data['post_edge_fit_data'] = post_edge_fit_data @@ -352,7 +364,8 @@ def post_edge_fit_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(post_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - post_edge_limit=widgets.FloatRangeSlider(value=[options['post_edge.limits'][0], options['post_edge.limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001) + post_edge_limits=widgets.FloatRangeSlider(value=[options['post_edge_limits'][0], options['post_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001), + post_edge_store_data=widgets.Checkbox(value=options['post_edge_store_data']) ) options['widget'] = w @@ -364,16 +377,19 @@ def smoothing(data: dict, options={}): # FIXME Add logging # FIXME Add saving of files - required_options = ['log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'smooth_window_length', 'smooth_algorithm', 'smooth_polyorder', 'smooth_save_default'] + required_options = ['log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'interactive', 'smooth_window_length', 'smooth_algorithm', 'smooth_polyorder', 'smooth_save_default', 'smooth_store_data'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', + 'show_plots': False, 'save_plots': False, 'save_folder': './', + 'interactive': False, 'smooth_window_length': 3, 'smooth_polyorder': 2, 'smooth_algorithm': 'savgol', # At the present, only Savitzky-Golay filter is implemented. Add Gaussian and Boxcar later. 'smooth_save_default': False, + 'smooth_store_data': False, } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -387,6 +403,7 @@ def smoothing(data: dict, options={}): if options['interactive']: + data['xanes_data_backup'] = data['xanes_data'] options['interactive'] = False options['interactive_session_active'] = True options['show_plots'] = True @@ -405,7 +422,7 @@ def smoothing(data: dict, options={}): df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['smooth_window_length'], options['smooth_polyorder'])) if options['smooth_save_default']: - if options['smooth.algorithm'] == 'savgol': + if options['smooth_algorithm'] == 'savgol': if options['log']: aux.write_log(message=f'Smoothing {filename} using default parameters with algorithm: {options["smooth_algorithm"]} ({i+1}/{len(data["path"])})', options=options) df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['smooth_window_length'], default_options['smooth_polyorder'])) @@ -413,27 +430,36 @@ def smoothing(data: dict, options={}): if options['save_plots'] or options['show_plots']: + + edge_pos = estimate_edge_position(data=data, options=options) intensity_midpoint = df_smooth[filename].iloc[np.where(df_smooth['ZapEnergy'] == find_nearest(df_smooth['ZapEnergy'], edge_pos))].values[0] + step_length = data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0] + + if options['smooth_save_default']: fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) - data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') - df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-10*step_length) & (data['xanes_data']['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-10*step_length) & (df_smooth['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) - data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') - df_smooth_default.loc[(df_smooth_default['ZapEnergy'] > edge_pos-0.0015) & (df_smooth_default['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-10*step_length) & (data['xanes_data']['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') + df_smooth_default.loc[(df_smooth_default['ZapEnergy'] > edge_pos-10*step_length) & (df_smooth_default['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) - elif not options['save_default']: - fig, ax = plt.subplots(figsize=(20,10)) - data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax, kind='scatter') - df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax) - ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) - ax.set_ylim([intensity_midpoint*0.9, intensity_midpoint*1.1]) + elif not options['smooth_save_default']: + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter', c='black') + df_smooth.plot(x='ZapEnergy', y=filename, ax=ax1, c='red') + + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-10*step_length) & (data['xanes_data']['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-10*step_length) & (df_smooth['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + #ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) + #ax.set_ylim([intensity_midpoint*0.9, intensity_midpoint*1.1]) - ax.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + ax2.set_title(f'{os.path.basename(filename)} - Smooth Edge Region', size=20) if options['save_plots']: if not os.path.isdir(options['save_folder']): @@ -445,8 +471,12 @@ def smoothing(data: dict, options={}): if not options['show_plots']: plt.close() - if not options['save_default']: + if not options['smooth_save_default']: df_smooth_default = None + + if options['smooth_store_data']: + data['xanes_data'] = df_smooth + options['smooth_store_data'] = False return df_smooth, df_smooth_default @@ -457,16 +487,21 @@ def smoothing_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(smoothing), data=widgets.fixed(data), options=widgets.fixed(options), - smooth_window_length=widgets.IntSlider(value=options['smooth_window_length'], min=1, max=20, step=1), + smooth_window_length=widgets.IntSlider(value=options['smooth_window_length'], min=3, max=21, step=2), smooth_polyorder=widgets.IntSlider(value=options['smooth_polyorder'], min=1, max=5, step=1), + smooth_store_data=widgets.Checkbox(value=options['smooth_store_data']) ) - options['widget'] = w display(w) +def restore_from_backup(data): + if 'xanes_data_bakcup' in data.keys(): + data['xanes_data'] = data['xanes_data_backup'] + + def find_nearest(array, value): #function to find the value closes to "value" in an "array" array = np.asarray(array) @@ -480,7 +515,7 @@ def estimate_edge_position(data: dict, options={}, index=0): default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + 'periods': 6, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -509,7 +544,7 @@ def determine_edge_position(data: dict, options={}): Requires that XANES-data is already loaded in data['xanes_data']. This allows the user to choose when to determine the edge position - whether before or after normalisation, flattening etc.''' - required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'fit_region'] + required_options = ['save_values', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'points_around_edge'] default_options = { 'save_values': True, # Whether the edge positions should be stored in a dictionary within the main data dictionary. 'log': False, # Toggles logging on/off @@ -521,9 +556,9 @@ def determine_edge_position(data: dict, options={}): 'diff.polyorder': 2, # Sets the order of the polynomial to fit edge region of the differential to 'diff.periods': 2, # Sets the number of data points between which the first order difference should be calculated. Needs to be even for subsequent shifting of data to function. 'double_diff': False, # Toggles calculation of the edge position based on double differential data - 'double_diff.polyorder': 2, # Sets the order of the polynomial to fit edge region of the double differential to + 'double_diff.polyorder': 1, # Sets the order of the polynomial to fit edge region of the double differential to 'double_diff.periods': 2, # Sets the number of data points between which the second order difference should be calculated. Needs to be even for subsequent shifting of data to function. - 'fit_region': None # The length of the region to find points to fit to a function + 'points_around_edge': 5 # The length of the region to find points to fit to a function } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -538,6 +573,16 @@ def determine_edge_position(data: dict, options={}): aux.write_log(message='Periods for double differentiation is not even. Ending run.', options=options) raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") + + if options['interactive']: + data['xanes_data_backup'] = data['xanes_data'] + options['interactive'] = False + options['interactive_session_active'] = True + options['show_plots'] = True + determine_edge_position_interactive(data=data, options=options) + return + + # Prepare dataframes for differential data @@ -554,8 +599,8 @@ def determine_edge_position(data: dict, options={}): for i, filename in enumerate(data['path']): estimated_edge_pos = estimate_edge_position(data, options=options, index=i) - if not options['fit_region']: - options['fit_region'] = (5)*(data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0]) + + fit_region = (options['points_around_edge']+1)*(data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0]) #========================== Fitting the first order derivative ========== @@ -565,7 +610,7 @@ def determine_edge_position(data: dict, options={}): df_diff[filename]=df_diff[filename].shift(-int(options['diff.periods']/2)) # Shifts the data back so that the difference between the points is located in the middle of the two points the caluclated difference is between # Picks out the points to be fitted - df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] <= estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] >= estimated_edge_pos-options['fit_region']))] + df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] <= estimated_edge_pos+fit_region) & ((df_diff["ZapEnergy"] >= estimated_edge_pos-fit_region))] # Fitting a function to the chosen interval @@ -594,7 +639,7 @@ def determine_edge_position(data: dict, options={}): df_double_diff[filename]=df_double_diff[filename].shift(-int(options['double_diff.periods'])) # Pick out region of interest - df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+fit_region) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-fit_region))] # Fitting a function to the chosen interval params = np.polyfit(df_double_diff_edge["ZapEnergy"], df_double_diff_edge[filename], options['double_diff.polyorder']) @@ -621,100 +666,100 @@ def determine_edge_position(data: dict, options={}): data['e0_double_diff'][filename] = edge_pos_double_diff - # Make and show / save plots - if options['save_plots'] or options['show_plots']: + # Make and show / save plots + if options['save_plots'] or options['show_plots']: - # If both are enabled - if options['diff'] and options['double_diff']: - - fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') - ax1.axvline(x=edge_pos_diff, ls='--', c='green') - - df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') - df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) - ax2.set_xlim([edge_pos_diff-0.0015, edge_pos_diff+0.0015]) - ax2.axvline(x=estimated_edge_pos-options['fit_region'], ls='--', c='black') - ax2.axvline(x=edge_pos_diff, ls='--', c='green') - ax2.axvline(x=estimated_edge_pos+options['fit_region'], ls='--', c='black') - ax2.set_title('Fit region of differentiated data') + # If both are enabled + if options['diff'] and options['double_diff']: - df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') - df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) - ax3.axvline(x=edge_pos_diff, ls='--', c='green') - ax3.axvline(x=estimated_edge_pos, ls='--', c='red') - ax3.set_title('Fit of differentiated data') + fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') + + df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_diff-fit_region*1.5, edge_pos_diff+fit_region*1.5]) + ax2.axvline(x=estimated_edge_pos-fit_region, ls='--', c='black') + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos+fit_region, ls='--', c='black') + ax2.set_title('Fit region of differentiated data') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + ax3.set_title('Fit of differentiated data') - data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax4, c='black') - ax4.axvline(x=edge_pos_double_diff, ls='--', c='green') + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax4, c='black') + ax4.axvline(x=edge_pos_double_diff, ls='--', c='green') - df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax5, kind='scatter') - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax5) - ax5.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) - ax5.axvline(x=estimated_edge_pos-options['fit_region'], ls='--', c='black') - ax5.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax5.axvline(x=estimated_edge_pos+options['fit_region'], ls='--', c='black') + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax5, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax5) + ax5.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) + ax5.axvline(x=estimated_edge_pos-fit_region, ls='--', c='black') + ax5.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax5.axvline(x=estimated_edge_pos+fit_region, ls='--', c='black') - df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax6, kind='scatter') - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax6) - ax6.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax6.axvline(x=estimated_edge_pos, ls='--', c='red') - + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax6, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax6) + ax6.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax6.axvline(x=estimated_edge_pos, ls='--', c='red') + - # If only first order differentials is enabled - elif options['diff']: - fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) - - data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') - ax1.axvline(x=edge_pos_diff, ls='--', c='green') + # If only first order differentials is enabled + elif options['diff']: + fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') - df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') - df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) - ax2.set_xlim([edge_pos_diff-0.5, edge_pos_diff+0.5]) - ax2.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') - ax2.axvline(x=edge_pos_diff, ls='--', c='green') - ax2.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + df_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_diff-fit_region*1.5, edge_pos_diff+fit_region*1.5]) + ax2.axvline(x=edge_pos_diff-fit_region, ls='--', c='black') + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=edge_pos_diff+fit_region, ls='--', c='black') - df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) - df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) - ax3.axvline(x=edge_pos_diff, ls='--', c='green') - ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') - # If only second order differentials is enabled - elif options['double_diff']: - fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) - - data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') - ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') + # If only second order differentials is enabled + elif options['double_diff']: + fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + + data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') + ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') - df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) - ax2.set_xlim([edge_pos_double_diff-0.5, edge_pos_double_diff+0.5]) - ax2.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') - ax2.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax2.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.set_xlim([edge_pos_double_diff-fit_region*1.5, edge_pos_double_diff+fit_region*1.5]) + ax2.axvline(x=edge_pos_double_diff-fit_region, ls='--', c='black') + ax2.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax2.axvline(x=edge_pos_double_diff+fit_region, ls='--', c='black') - df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) - df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) - ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') - ax3.axvline(x=estimated_edge_pos, ls='--', c='red') + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax3) + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax3.axvline(x=estimated_edge_pos, ls='--', c='red') - # Save plots if toggled - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) + # Save plots if toggled + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_edge_position.png' + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_edge_position.png' - plt.savefig(dst, transparent=False) + plt.savefig(dst, transparent=False) - # Close plots if show_plots not toggled - if not options['show_plots']: - plt.close() + # Close plots if show_plots not toggled + if not options['show_plots']: + plt.close() if not options['diff']: @@ -724,35 +769,59 @@ def determine_edge_position(data: dict, options={}): return edge_pos_diff, edge_pos_double_diff + + +def determine_edge_position_interactive(data: dict, options: dict) -> None: + ''' Defines the widgets to use with the ipywidgets interactive mode and calls the update function found in btp.ipywidgets. ''' + + + step_size = data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0] + + w = widgets.interactive( + btp.ipywidgets_update, func=widgets.fixed(determine_edge_position), data=widgets.fixed(data), options=widgets.fixed(options), + points_around_edge=widgets.IntSlider(value=options['points_around_edge'], min=1, max=20, step=1), + ) + + options['widget'] = w + + display(w) + def normalise(data: dict, options={}): - required_options = ['log', 'logfile', 'save_values'] + required_options = ['log', 'logfile', 'normalisation_store_data'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', - 'save_values': True + 'normalisation_store_data': False, } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) data['normalisation_constants'] = {} + if options['normalisation_store_data']: + pre_edge_fit_data_norm = pd.DataFrame(data['pre_edge_fit_data']['ZapEnergy']) + post_edge_fit_data_norm = pd.DataFrame(data['post_edge_fit_data']['ZapEnergy']) #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: - e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0'][filename])].index.values[0] + e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0_diff'][filename])].index.values[0] #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) + if options['normalisation_store_data']: + pre_edge_fit_data_norm.insert(1, filename, data['pre_edge_fit_data'][filename] / normalisation_constant) + post_edge_fit_data_norm.insert(1, filename, data['post_edge_fit_data'][filename] / normalisation_constant) + + + if options['normalisation_store_data']: + data['xanes_data'] = normalised_df # Normalise the pre-edge and post-edge fit function data - data['pre_edge_fit_data'][filename] = data['pre_edge_fit_data'][filename] / normalisation_constant - data['post_edge_fit_data'][filename] = data['post_edge_fit_data'][filename] / normalisation_constant + data['pre_edge_fit_data_norm'] = pre_edge_fit_data_norm + data['post_edge_fit_data_norm'] = post_edge_fit_data_norm data['normalisation_constants'][filename] = normalisation_constant - if options['save_values']: - data['xanes_data'] = normalised_df - return normalised_df @@ -760,11 +829,11 @@ def normalise(data: dict, options={}): def flatten(data:dict, options={}): #only picking out zapenergy-values higher than edge position (edge pos and below remains untouched) - required_options = ['log', 'logfile', 'save_values'] + required_options = ['log', 'logfile', 'flatten_store_data'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_flattening.log', - 'save_values': True + 'flatten_store_data': False, } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -772,13 +841,14 @@ def flatten(data:dict, options={}): flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) for filename in data['path']: - fit_function_diff = -data['post_edge_fit_data'][filename] + data['pre_edge_params'][filename][0] - fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0'][filename]] = 0 + fit_function_diff = data['post_edge_fit_data_norm'][filename] - 1 + + fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0_diff'][filename]] = 0 flattened_df[filename] = data['xanes_data'][filename] - fit_function_diff - if options['save_values']: + if options['flatten_store_data']: data['xanes_data'] = flattened_df From faf41db41fcbad3720ddb4e804ef2ef2a8bb8bef Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 29 Jun 2022 15:26:51 +0200 Subject: [PATCH 38/56] Add tabulated K-edge values --- nafuma/xanes/edges.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/nafuma/xanes/edges.py b/nafuma/xanes/edges.py index dc02601..abce1c6 100644 --- a/nafuma/xanes/edges.py +++ b/nafuma/xanes/edges.py @@ -6,7 +6,7 @@ from scipy.constants import c, h keV_per_J = (1 / 1.602176634e-19) / 1000 # kXu values taken from International Tables for Crystallography Volume , Kulwer Academic Publishers - Dordrect / Boston / London (1992) -k_edge = { 'Z': [ 1, 2, +K = { 'Z': [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, @@ -23,5 +23,8 @@ k_edge = { 'Z': [ 1, 2, 0.81549, 0.76969, 0.72762, 0.68877, 0.65291, 0.61977, 0.5891, 0.56047, 0.53378, 0.50915, 0.48582, 0.46409]} -k_edge = pd.DataFrame(k_edge) -k_edge['keV'] = np.round(h*c/(k_edge['kXu']*10**-10) * keV_per_J, 3) \ No newline at end of file +K = pd.DataFrame(K) +K['keV'] = np.round(h*c/(K['kXu']*10**-10) * keV_per_J, 3) + + +# FIXME If needed, add energies for L-edges as well. \ No newline at end of file From 254becff69cbf7bca4d4f5a0e93a01b10c8c29a1 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 29 Jun 2022 16:13:19 +0200 Subject: [PATCH 39/56] Make sure filenames is a list before reading --- nafuma/xanes/io.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 9676608..be7c9f3 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -183,12 +183,13 @@ def read_data(data: dict, options={}) -> pd.DataFrame: columns = ['ZapEnergy'] + if not isinstance(data['path'], list): + data['path'] = [data['path']] + # Initialise DataFrame with only ZapEnergy-column xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] xanes_data['ZapEnergy'] += options['adjust'] - if not isinstance(data['path'], list): - data['path'] = [data['path']] for filename in data['path']: columns.append(filename) From b84cecaf84680e3e8cf3708efcba9cc6e842157a Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 29 Jun 2022 16:40:36 +0200 Subject: [PATCH 40/56] Update documentation --- nafuma/xanes/calib.py | 118 ++++++++++++++++++++++++------------------ 1 file changed, 69 insertions(+), 49 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 22d63eb..71505f8 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -373,52 +373,53 @@ def post_edge_fit_interactive(data: dict, options: dict) -> None: display(w) def smoothing(data: dict, options={}): + ' Smoothes the data using the Savitzky-Golay filter. This is the only algorithm at this moment. ' - # FIXME Add logging - # FIXME Add saving of files required_options = ['log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'interactive', 'smooth_window_length', 'smooth_algorithm', 'smooth_polyorder', 'smooth_save_default', 'smooth_store_data'] default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', - 'show_plots': False, - 'save_plots': False, - 'save_folder': './', - 'interactive': False, - 'smooth_window_length': 3, - 'smooth_polyorder': 2, + 'log': False, # Toggles logging on / off + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', # Sets path to log-file. Ignored if log == False + 'show_plots': False, # Toggles showing plots on / off. This is only recommended when working with a handful of scans. + 'save_plots': False, # Toggles saving plots on / off + 'save_folder': './', # Sets path to folder where plots should be saved. Ignored if save_plots == False + 'interactive': False, # Toggles interactive mode on / off. This is only recommended for a single scan to determine proper parameters for smoothing. + 'smooth_window_length': 3, # Determines the window length of smoothing that the savgol-filter uses for smoothing + 'smooth_polyorder': 2, # Determines the order of the polynomial used in the smoothing algorithm 'smooth_algorithm': 'savgol', # At the present, only Savitzky-Golay filter is implemented. Add Gaussian and Boxcar later. - 'smooth_save_default': False, - 'smooth_store_data': False, + 'smooth_save_default': False, # Toggles whether or not to run a separate smoothing using default values on / off + 'smooth_store_data': False, # Toggles storing data to data['xanes_data'] on / off } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + # Initialise new DataFrame with correct x-values df_smooth = pd.DataFrame(data['xanes_data']['ZapEnergy']) + # Do the same if smoothing with default values is toggled on if options['smooth_save_default']: df_smooth_default = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['log']: - aux.write_log(message='Starting smoothing.') + aux.write_log(message='Starting smoothing procedure.') + # Run in interactive mode if enabled if options['interactive']: - data['xanes_data_backup'] = data['xanes_data'] - options['interactive'] = False - options['interactive_session_active'] = True - options['show_plots'] = True - smoothing_interactive(data=data, options=options) + data['xanes_data_backup'] = data['xanes_data'] # Backup the data + options['interactive'] = False # Turn interactive mode off so that it is not called again within the interactive loop + options['show_plots'] = True # Force plotting on as interactive mode is useless without it + smoothing_interactive(data=data, options=options) # Call interactive version of the function return # FIXME Add other types of filters - # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should - # then be returned for i, filename in enumerate(data['path']): if options['smooth_algorithm'] == 'savgol': if options['log']: aux.write_log(message=f'Smoothing {filename} with algorithm: {options["smooth_algorithm"]} ({i+1}/{len(data["path"])})', options=options) + + # Apply savgol filter and add to DataFrame df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['smooth_window_length'], options['smooth_polyorder'])) if options['smooth_save_default']: @@ -428,16 +429,16 @@ def smoothing(data: dict, options={}): df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['smooth_window_length'], default_options['smooth_polyorder'])) + # Make plots ... if options['save_plots'] or options['show_plots']: edge_pos = estimate_edge_position(data=data, options=options) - intensity_midpoint = df_smooth[filename].iloc[np.where(df_smooth['ZapEnergy'] == find_nearest(df_smooth['ZapEnergy'], edge_pos))].values[0] step_length = data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0] - + # ... if default smoothing is enabled. Only plotting +- 10 step sizes from the edge position if options['smooth_save_default']: fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-10*step_length) & (data['xanes_data']['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') @@ -448,6 +449,7 @@ def smoothing(data: dict, options={}): df_smooth_default.loc[(df_smooth_default['ZapEnergy'] > edge_pos-10*step_length) & (df_smooth_default['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) + # ... if only smoothing with user defined variables is enabled. Only plotting +- 10 step sizes from the edge position elif not options['smooth_save_default']: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20,10)) data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter', c='black') @@ -455,12 +457,11 @@ def smoothing(data: dict, options={}): data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-10*step_length) & (data['xanes_data']['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-10*step_length) & (df_smooth['ZapEnergy'] < edge_pos+10*step_length)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) - #ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) - #ax.set_ylim([intensity_midpoint*0.9, intensity_midpoint*1.1]) ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) ax2.set_title(f'{os.path.basename(filename)} - Smooth Edge Region', size=20) + # Save plots if options['save_plots']: if not os.path.isdir(options['save_folder']): os.makedirs(options['save_folder']) @@ -468,6 +469,7 @@ def smoothing(data: dict, options={}): dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' plt.savefig(dst, transparent=False) + # Close plots if not options['show_plots']: plt.close() @@ -498,23 +500,29 @@ def smoothing_interactive(data: dict, options: dict) -> None: def restore_from_backup(data): + ''' Restores DataFrame from data['xanes_data_backup'] to data['xanes_data']. This can be useful e.g. when smoothing and you want to re-do the smoothing with different parameters. + + If there is no DataFrame stored in data['xanes_data_backup'], this function does nothing. ''' + if 'xanes_data_bakcup' in data.keys(): data['xanes_data'] = data['xanes_data_backup'] def find_nearest(array, value): - #function to find the value closes to "value" in an "array" + ''' Finds the value closest to value in array''' array = np.asarray(array) idx = (np.abs(array - value)).argmin() return array[idx] def estimate_edge_position(data: dict, options={}, index=0): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + ''' Gets an estimation of the edge position. This is very similar to determine_edge_position, but provides instead a quick and dirty way where the actual data point closest to the maximum of the differentiated data + is located. ''' + required_options = ['log','logfile', 'periods'] default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', + 'log': False, # Toggles logging on/off + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', # Sets path to log-file 'periods': 6, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -528,10 +536,8 @@ def estimate_edge_position(data: dict, options={}, index=0): df_diff_max = df_diff[data['path'][index]].dropna().max() estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] - # FIXME Add logging option to see the result - if options['log']: - aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) + aux.write_log(message=f'Estimated edge shift is: {estimated_edge_shift} keV', options=options) return estimated_edge_shift @@ -666,14 +672,14 @@ def determine_edge_position(data: dict, options={}): data['e0_double_diff'][filename] = edge_pos_double_diff - # Make and show / save plots + # Make and show / save plots ... if options['save_plots'] or options['show_plots']: - # If both are enabled + # ... if both are enabled if options['diff'] and options['double_diff']: - fig, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) + _, ((ax1, ax2, ax3), (ax4, ax5, ax6)) = plt.subplots(ncols=3, nrows=2, figsize=(20,20)) data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_diff, ls='--', c='green') @@ -708,9 +714,9 @@ def determine_edge_position(data: dict, options={}): ax6.axvline(x=estimated_edge_pos, ls='--', c='red') - # If only first order differentials is enabled + # ... if only first order differentials is enabled elif options['diff']: - fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + _, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_diff, ls='--', c='green') @@ -727,9 +733,9 @@ def determine_edge_position(data: dict, options={}): ax3.axvline(x=edge_pos_diff, ls='--', c='green') ax3.axvline(x=estimated_edge_pos, ls='--', c='red') - # If only second order differentials is enabled + # ... if only second order differentials is enabled elif options['double_diff']: - fig, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) + _, (ax1, ax2, ax3) = plt.subplots(ncols=3,nrows=1, figsize=(20, 10)) data['xanes_data'].plot(x='ZapEnergy', y=filename, ax=ax1, c='black') ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') @@ -774,9 +780,6 @@ def determine_edge_position(data: dict, options={}): def determine_edge_position_interactive(data: dict, options: dict) -> None: ''' Defines the widgets to use with the ipywidgets interactive mode and calls the update function found in btp.ipywidgets. ''' - - step_size = data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0] - w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(determine_edge_position), data=widgets.fixed(data), options=widgets.fixed(options), points_around_edge=widgets.IntSlider(value=options['points_around_edge'], min=1, max=20, step=1), @@ -786,12 +789,18 @@ def determine_edge_position_interactive(data: dict, options: dict) -> None: display(w) + def normalise(data: dict, options={}): + ''' Normalises the data so that the difference between the fitted pre- and post-edge functions is 1 at the edge position. + + Requires that edge positions have already been determined with determine_edge_position() and stored in data['e0_diff']. ''' + + required_options = ['log', 'logfile', 'normalisation_store_data'] default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', - 'normalisation_store_data': False, + 'log': False, # Toggles logging on/off + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', # Sets path to log-file + 'normalisation_store_data': False, # Toggles storing of the flattened data in data['xanes_data'] on/off } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -827,33 +836,44 @@ def normalise(data: dict, options={}): def flatten(data:dict, options={}): - #only picking out zapenergy-values higher than edge position (edge pos and below remains untouched) + ''' Flattens the post-edge region (from edge position and up). Only for visual purposes. + + Requires data['xanes_data'] that is normalised through normalise() and that normalised versions of the post_edge_fit_data is stored in data['post_edge_fit_data_norm']. + Also assumes that the pre edge-fit data is already subtracted from the data''' + required_options = ['log', 'logfile', 'flatten_store_data'] default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_flattening.log', - 'flatten_store_data': False, + 'log': False, # Toggles logging on/off + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_flattening.log', # Sets path to log-file + 'flatten_store_data': False, # Toggles storing of the flattened data in data['xanes_data'] on/off } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + # Initialise DataFrame with x-values flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) + # Loop through all files for filename in data['path']: + + # Subtract 1 from the _normalised_ post edge fit function fit_function_diff = data['post_edge_fit_data_norm'][filename] - 1 + # Set all values from edge position and downwards to 0 so that only data above the edge position will be adjusted fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0_diff'][filename]] = 0 + # Subtract the difference between 1 and the post edge fit function from the normalised data. flattened_df[filename] = data['xanes_data'][filename] - fit_function_diff + # Saves the flattened DataFrame if options['flatten_store_data']: data['xanes_data'] = flattened_df return flattened_df, fit_function_diff - #make a new dataframe with flattened values + From 8702cdfa0025f6e53594ccc7aa0ec0700f1893ec Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 30 Jun 2022 17:07:31 +0200 Subject: [PATCH 41/56] Make logfile-directory if not already exists --- nafuma/auxillary.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 0ccde1f..bf06f8e 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -77,6 +77,10 @@ def write_log(message, options={}): options = update_options(options=options, required_options=required_options, default_options=default_options) + if not os.path.isdir(os.path.dirname(options['logfile'])): + os.makedirs(os.path.dirname(options['logfile'])) + + now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') message = f'[{now}] {message} \n' From 1757445f89892fe41e386edf9442624aa87daa5f Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 30 Jun 2022 17:08:14 +0200 Subject: [PATCH 42/56] Add new functions and fix certain bugs --- nafuma/xanes/calib.py | 78 +++++++++++++++++++++++++++++++++++-------- 1 file changed, 65 insertions(+), 13 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 71505f8..7d567b7 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -115,6 +115,11 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: fit_function = np.poly1d(params) data['pre_edge_params'][filename] = params + + if options['log']: + aux.write_log(message=f'Pre edge fitted between {options["pre_edge_limits"][0]} and {options["pre_edge_limits"][1]} with polynomial of order {options["pre_edge_polyorder"]} with parmameters {params}.', options=options) + if options['pre_edge_masks']: + aux.write_log(message=f'Excluded regions: {options["pre_edge_masks"]}', options=options) #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(pre_edge_fit_data["ZapEnergy"]) @@ -171,7 +176,7 @@ def pre_edge_fit_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(pre_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - pre_edge_limits=widgets.FloatRangeSlider(value=[options['pre_edge_limits'][0], options['pre_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001), + pre_edge_limits=widgets.FloatRangeSlider(value=[options['pre_edge_limits'][0], options['pre_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.0001), pre_edge_store_data=widgets.Checkbox(value=options['pre_edge_store_data']) ) @@ -267,10 +272,13 @@ def post_edge_fit(data: dict, options={}): edge_position = estimate_edge_position(data, options, index=0) options['post_edge_limits'][0] = edge_position + post_edge_limit_offset - if not options['post_edge_limits'][1]: options['post_edge_limits'][1] = data['xanes_data_original']['ZapEnergy'].max() + if options['post_edge_limits'][0] > options['post_edge_limits'][1]: + options['post_edge_limits'][0] = options['post_edge_limits'][1] - 0.1 + + # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session if options['interactive']: options['interactive'] = False @@ -288,6 +296,8 @@ def post_edge_fit(data: dict, options={}): post_edge_data = post_edge_data.dropna() #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) @@ -302,7 +312,9 @@ def post_edge_fit(data: dict, options={}): fit_function = np.poly1d(params) if options['log']: - aux.write_log(message=f'Post edge fitted with parameters: {params}') + aux.write_log(message=f'Post edge fitted between {options["post_edge_limits"][0]} and {options["post_edge_limits"][1]} with polynomial of order {options["post_edge_polyorder"]} with parmameters {params}.', options=options) + if options['post_edge_masks']: + aux.write_log(message=f'Excluded regions: {options["post_edge_masks"]}', options=options) data['post_edge_params'][filename] = params @@ -364,7 +376,7 @@ def post_edge_fit_interactive(data: dict, options: dict) -> None: w = widgets.interactive( btp.ipywidgets_update, func=widgets.fixed(post_edge_fit), data=widgets.fixed(data), options=widgets.fixed(options), - post_edge_limits=widgets.FloatRangeSlider(value=[options['post_edge_limits'][0], options['post_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.001), + post_edge_limits=widgets.FloatRangeSlider(value=[options['post_edge_limits'][0], options['post_edge_limits'][1]], min=data['xanes_data_original']['ZapEnergy'].min(), max=data['xanes_data_original']['ZapEnergy'].max(), step=0.0001), post_edge_store_data=widgets.Checkbox(value=options['post_edge_store_data']) ) @@ -400,7 +412,7 @@ def smoothing(data: dict, options={}): df_smooth_default = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['log']: - aux.write_log(message='Starting smoothing procedure.') + aux.write_log(message='Starting smoothing procedure.', options=options) # Run in interactive mode if enabled @@ -498,6 +510,10 @@ def smoothing_interactive(data: dict, options: dict) -> None: display(w) +def backup(data): + + data['xanes_data_backup'] = data['xanes_data'].copy() + def restore_from_backup(data): ''' Restores DataFrame from data['xanes_data_backup'] to data['xanes_data']. This can be useful e.g. when smoothing and you want to re-do the smoothing with different parameters. @@ -505,7 +521,7 @@ def restore_from_backup(data): If there is no DataFrame stored in data['xanes_data_backup'], this function does nothing. ''' if 'xanes_data_bakcup' in data.keys(): - data['xanes_data'] = data['xanes_data_backup'] + data['xanes_data'] = data['xanes_data_backup'].copy() def find_nearest(array, value): @@ -523,7 +539,7 @@ def estimate_edge_position(data: dict, options={}, index=0): default_options = { 'log': False, # Toggles logging on/off 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', # Sets path to log-file - 'periods': 6, #Periods needs to be an even number for the shifting of values to work properly + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -533,13 +549,25 @@ def estimate_edge_position(data: dict, options={}, index=0): #shifting column values up so that average differential fits right between the points used in the calculation df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + + + if 'pre_edge_masks' in options.keys(): + for mask in options['pre_edge_masks']: + df_diff[data['path'][index]].loc[(df_diff['ZapEnergy'] > mask[0]) & (df_diff['ZapEnergy'] < mask[1])] = 0 + + if 'post_edge_masks' in options.keys(): + for mask in options['post_edge_masks']: + df_diff[data['path'][index]].loc[(df_diff['ZapEnergy'] > mask[0]) & (df_diff['ZapEnergy'] < mask[1])] = 0 + + df_diff_max = df_diff[data['path'][index]].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] + + estimated_edge_pos = df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] if options['log']: - aux.write_log(message=f'Estimated edge shift is: {estimated_edge_shift} keV', options=options) + aux.write_log(message=f'Estimated edge position is: {estimated_edge_pos} keV', options=options) - return estimated_edge_shift + return estimated_edge_pos def determine_edge_position(data: dict, options={}): ''' Determines the edge position by 1) first differential maximum and/or 2) second differential zero-point. Calculates differential and/or double differential by diff.periods and double_diff.periods respectively. @@ -550,7 +578,7 @@ def determine_edge_position(data: dict, options={}): Requires that XANES-data is already loaded in data['xanes_data']. This allows the user to choose when to determine the edge position - whether before or after normalisation, flattening etc.''' - required_options = ['save_values', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'points_around_edge'] + required_options = ['save_values', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'diff', 'diff.polyorder', 'diff.periods', 'double_diff', 'double_diff.polyorder', 'double_diff.periods', 'points_around_edge', 'save_diff_data'] default_options = { 'save_values': True, # Whether the edge positions should be stored in a dictionary within the main data dictionary. 'log': False, # Toggles logging on/off @@ -564,7 +592,8 @@ def determine_edge_position(data: dict, options={}): 'double_diff': False, # Toggles calculation of the edge position based on double differential data 'double_diff.polyorder': 1, # Sets the order of the polynomial to fit edge region of the double differential to 'double_diff.periods': 2, # Sets the number of data points between which the second order difference should be calculated. Needs to be even for subsequent shifting of data to function. - 'points_around_edge': 5 # The length of the region to find points to fit to a function + 'points_around_edge': 1, # The length of the region to find points to fit to a function + 'save_diff_data': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -608,6 +637,9 @@ def determine_edge_position(data: dict, options={}): fit_region = (options['points_around_edge']+1)*(data['xanes_data']['ZapEnergy'].iloc[1] - data['xanes_data']['ZapEnergy'].iloc[0]) + if fit_region < 0: + fit_region = (options['points_around_edge']+1)*(data['xanes_data']['ZapEnergy'].iloc[10] - data['xanes_data']['ZapEnergy'].iloc[9]) + #========================== Fitting the first order derivative ========== @@ -666,7 +698,7 @@ def determine_edge_position(data: dict, options={}): aux.write_log(message=f"Edge position estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))} keV", options=options) if options['diff']: - aux.write_log(message=f"Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.") + aux.write_log(message=f"Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.", options=options) if options['save_values']: data['e0_double_diff'][filename] = edge_pos_double_diff @@ -773,6 +805,11 @@ def determine_edge_position(data: dict, options={}): if not options['double_diff']: edge_pos_double_diff = None + + if options['save_diff_data']: + data['diff_data'] = df_diff if options['diff'] else None + data['double_diff_data'] = df_double_diff if options['double_diff'] else None + return edge_pos_diff, edge_pos_double_diff @@ -789,6 +826,20 @@ def determine_edge_position_interactive(data: dict, options: dict) -> None: display(w) +def determine_edge_shift(data: dict, options: dict, edge_pos: float) -> None: + + if 'edge' not in data.keys(): + data['edge'] = find_element(data) + + + reference_energy = xas.edges.K['keV'].loc[xas.edges.K['Atom'] == data['edge']].values[0] + + edge_shift = reference_energy - edge_pos + + if options['log']: + aux.write_log(message=f'Edge shift vs. reference value for {data["edge"]} is {edge_shift*1000} eV', options=options) + + return edge_shift def normalise(data: dict, options={}): ''' Normalises the data so that the difference between the fitted pre- and post-edge functions is 1 at the edge position. @@ -806,6 +857,7 @@ def normalise(data: dict, options={}): normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) data['normalisation_constants'] = {} + if options['normalisation_store_data']: pre_edge_fit_data_norm = pd.DataFrame(data['pre_edge_fit_data']['ZapEnergy']) post_edge_fit_data_norm = pd.DataFrame(data['post_edge_fit_data']['ZapEnergy']) From eb8660d71df6ad2ce3fb7a3807b7f41a032b1e01 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 30 Jun 2022 17:08:28 +0200 Subject: [PATCH 43/56] Fix determination of active roi if only one exists --- nafuma/xanes/io.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index be7c9f3..25e792f 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -12,7 +12,7 @@ def split_scan_data(data: dict, options={}) -> list: As of now only picks out xmap_rois (fluoresence mode) and for Mn, Fe, Co and Ni K-edges.''' - required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'add_rois'] + required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'add_rois', 'return'] default_options = { 'log': False, @@ -20,7 +20,8 @@ def split_scan_data(data: dict, options={}) -> list: 'save': False, # whether to save the files or not 'save_folder': '.', # root folder of where to save the files 'replace': False, # whether to replace the files if they already exist - 'add_rois': False # Whether to add the rois of individual scans of the same edge together + 'add_rois': False, # Whether to add the rois of individual scans of the same edge together + 'return': True } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -165,7 +166,10 @@ def split_scan_data(data: dict, options={}) -> list: aux.write_log(message=f'All done!', options=options) - return all_scans + if options['return']: + return all_scans + else: + return @@ -219,8 +223,15 @@ def determine_active_roi(scan_data): # active_roi = 'xmap_roi00' # else: # active_roi = 'xmap_roi01' + + + if not ('xmap_roi00' in scan_data.columns) or not ('xmap_roi01' in scan_data.columns): + if 'xmap_roi00' in scan_data.columns: + active_roi = 'xmap_roi00' + elif 'xmap_roi01' in scan_data.columns: + active_roi = 'xmap_roi01' - if (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): + elif (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): if (scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min()) > (scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min()): active_roi = 'xmap_roi00' else: From 7336af061ffe5209fb6e488dff7c8dbe4f57b599 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 1 Jul 2022 14:29:13 +0200 Subject: [PATCH 44/56] Only plot one dataset if in interactive mode --- nafuma/xanes/calib.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 7d567b7..617cc83 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -46,7 +46,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['pre_edge_limits', 'pre_edge_masks', 'pre_edge_polyorder', 'pre_edge_store_data', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive'] + required_options = ['pre_edge_limits', 'pre_edge_masks', 'pre_edge_polyorder', 'pre_edge_store_data', 'log', 'logfile', 'show_plots', 'save_plots', 'save_folder', 'ylim', 'interactive', 'interactive_session_active'] default_options = { 'pre_edge_limits': [None, None], 'pre_edge_masks': [], @@ -58,7 +58,8 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: 'save_plots': False, 'save_folder': './', 'ylim': [None, None], - 'interactive': False + 'interactive': False, + 'interactive_session_active': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -107,6 +108,10 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: data['pre_edge_params'] = {} for i, filename in enumerate(data['path']): + + if options['interactive_session_active'] and i > 0: + continue + if options['log']: aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1}/{len(data["path"])})', options=options) @@ -247,7 +252,7 @@ def post_edge_fit(data: dict, options={}): ''' - required_options = ['log', 'logfile', 'post_edge_masks', 'post_edge_limits', 'post_edge_polyorder', 'post_edge_store_data', 'interactive', 'show_plots', 'save_plots', 'save_folder'] + required_options = ['log', 'logfile', 'post_edge_masks', 'post_edge_limits', 'post_edge_polyorder', 'post_edge_store_data', 'interactive', 'interactive_session_active', 'show_plots', 'save_plots', 'save_folder'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', @@ -256,6 +261,7 @@ def post_edge_fit(data: dict, options={}): 'post_edge_polyorder': 2, 'post_edge_store_data': False, 'interactive': False, + 'interactive_session_active': False, 'show_plots': False, 'save_plots': False, 'save_folder': './', @@ -304,6 +310,10 @@ def post_edge_fit(data: dict, options={}): data['post_edge_params'] = {} for i, filename in enumerate(data['path']): + + if options['interactive_session_active'] and i > 0: + continue + if options['log']: aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])}) with polynomial order {options["post_edge_polyorder"]}', options=options) From 1a06e7b4fcace31e3609b36ce10d08517a1d870a Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 1 Jul 2022 16:09:43 +0200 Subject: [PATCH 45/56] Poor attempts at fixing bug in roi choice --- nafuma/xanes/calib.py | 2 ++ nafuma/xanes/io.py | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 617cc83..e16f5a3 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -536,6 +536,7 @@ def restore_from_backup(data): def find_nearest(array, value): ''' Finds the value closest to value in array''' + array = np.asarray(array) idx = (np.abs(array - value)).argmin() return array[idx] @@ -874,6 +875,7 @@ def normalise(data: dict, options={}): #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: + print(filename) e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0_diff'][filename])].index.values[0] #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 25e792f..7a554db 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -225,6 +225,9 @@ def determine_active_roi(scan_data): # active_roi = 'xmap_roi01' + # FIXME This is broken now - + + if not ('xmap_roi00' in scan_data.columns) or not ('xmap_roi01' in scan_data.columns): if 'xmap_roi00' in scan_data.columns: active_roi = 'xmap_roi00' @@ -232,7 +235,7 @@ def determine_active_roi(scan_data): active_roi = 'xmap_roi01' elif (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): - if (scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min()) > (scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min()): + if ((scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min())) > ((scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min())): active_roi = 'xmap_roi00' else: active_roi = 'xmap_roi01' From b5cac158a4697c23e9f2b90bc14686d326c16202 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 4 Jul 2022 18:18:31 +0200 Subject: [PATCH 46/56] Fix active_roi determination --- nafuma/xanes/calib.py | 3 +-- nafuma/xanes/io.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index e16f5a3..a5e4455 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -375,7 +375,7 @@ def post_edge_fit(data: dict, options={}): aux.write_log(message='Post edge fitting done!', options=options) if options['post_edge_store_data']: - data['post_edge_fit_data'] = post_edge_fit_data + data['post_edge_fit_data'] = post_edge_fit_data.dropna(axis=0) return post_edge_fit_data @@ -875,7 +875,6 @@ def normalise(data: dict, options={}): #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: - print(filename) e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0_diff'][filename])].index.values[0] #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 7a554db..217a529 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -225,9 +225,6 @@ def determine_active_roi(scan_data): # active_roi = 'xmap_roi01' - # FIXME This is broken now - - - if not ('xmap_roi00' in scan_data.columns) or not ('xmap_roi01' in scan_data.columns): if 'xmap_roi00' in scan_data.columns: active_roi = 'xmap_roi00' @@ -235,7 +232,7 @@ def determine_active_roi(scan_data): active_roi = 'xmap_roi01' elif (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): - if ((scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min())) > ((scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min())): + if (scan_data['xmap_roi00'].iloc[:int(scan_data.shape[0]/2)].max() - scan_data['xmap_roi00'].iloc[0])/scan_data['xmap_roi00'].max() > (scan_data['xmap_roi01'].iloc[:int(scan_data.shape[0]/2)].max() - scan_data['xmap_roi01'].iloc[0])/scan_data['xmap_roi01'].max(): active_roi = 'xmap_roi00' else: active_roi = 'xmap_roi01' From da7099a9248b0d5a37f334cb6f766a5fe529a5b1 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 5 Jul 2022 10:55:39 +0200 Subject: [PATCH 47/56] Add manual choice of active roi --- nafuma/xanes/io.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 217a529..a590255 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -12,7 +12,7 @@ def split_scan_data(data: dict, options={}) -> list: As of now only picks out xmap_rois (fluoresence mode) and for Mn, Fe, Co and Ni K-edges.''' - required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'add_rois', 'return'] + required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'active_roi', 'add_rois', 'return'] default_options = { 'log': False, @@ -20,6 +20,7 @@ def split_scan_data(data: dict, options={}) -> list: 'save': False, # whether to save the files or not 'save_folder': '.', # root folder of where to save the files 'replace': False, # whether to replace the files if they already exist + 'active_roi': None, 'add_rois': False, # Whether to add the rois of individual scans of the same edge together 'return': True } @@ -200,7 +201,11 @@ def read_data(data: dict, options={}) -> pd.DataFrame: scan_data = pd.read_csv(filename) - scan_data = scan_data[[determine_active_roi(scan_data)]] + if not options['active_roi']: + scan_data = scan_data[[determine_active_roi(scan_data)]] + else: + scan_data = scan_data[options['active_roi']] + xanes_data = pd.concat([xanes_data, scan_data], axis=1) From 5e2cef2cdea3dfcf7f3d0c68d9ce6b6943c62186 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 5 Jul 2022 16:37:31 +0200 Subject: [PATCH 48/56] Let save_options create folder if it doesn't exist --- nafuma/auxillary.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index bf06f8e..45633c5 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -25,6 +25,10 @@ def save_options(options, path, ignore=None): options_copy[i] = 'Removed' + if not os.path.isdir(os.path.dirname(path)): + os.makedirs(os.path.dirname(path)) + + with open(path, 'w') as f: json.dump(options_copy,f, skipkeys=True, indent=4) From 327cef5b51e8760c21bd6fbd00f9659940d662d5 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 5 Jul 2022 16:37:42 +0200 Subject: [PATCH 49/56] Log updates and let normalise and flatten save plots --- nafuma/xanes/calib.py | 94 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 83 insertions(+), 11 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a5e4455..84ed070 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -82,6 +82,11 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: edge_position = estimate_edge_position(data, options, index=0) options['pre_edge_limits'][1] = edge_position - pre_edge_limit_offset + print(edge_position) + + if options['pre_edge_limits'][0] >= options['pre_edge_limits'][1]: + options['pre_edge_limits'][1] = options['pre_edge_limits'][0] + 0.03 + # Start inteactive session with ipywidgets. Disables options['interactive'] in order for the interactive loop to not start another interactive session if options['interactive']: options['interactive'] = False @@ -113,7 +118,7 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: continue if options['log']: - aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1}/{len(data["path"])})', options=options) + aux.write_log(message=f'... Fitting pre edge on {os.path.basename(filename)} ({i+1}/{len(data["path"])})', options=options) #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],options['pre_edge_polyorder']) @@ -122,9 +127,9 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: data['pre_edge_params'][filename] = params if options['log']: - aux.write_log(message=f'Pre edge fitted between {options["pre_edge_limits"][0]} and {options["pre_edge_limits"][1]} with polynomial of order {options["pre_edge_polyorder"]} with parmameters {params}.', options=options) + aux.write_log(message=f'...... Pre edge fitted between {options["pre_edge_limits"][0]} and {options["pre_edge_limits"][1]} with polynomial of order {options["pre_edge_polyorder"]} with parmameters {params}.', options=options) if options['pre_edge_masks']: - aux.write_log(message=f'Excluded regions: {options["pre_edge_masks"]}', options=options) + aux.write_log(message=f'...... Excluded regions: {options["pre_edge_masks"]}', options=options) #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(pre_edge_fit_data["ZapEnergy"]) @@ -212,7 +217,7 @@ def pre_edge_subtraction(data: dict, options={}): for i, filename in enumerate(data['path']): if options['log']: - aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) + aux.write_log(message=f'... Subtracting background on {os.path.basename(filename)} ({i}/{len(data["path"])})', options=options) xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data_original'][filename] - data['pre_edge_fit_data'][filename]) @@ -315,16 +320,16 @@ def post_edge_fit(data: dict, options={}): continue if options['log']: - aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])}) with polynomial order {options["post_edge_polyorder"]}', options=options) + aux.write_log(message=f'... Fitting post edge on {os.path.basename(filename)} ({i+1}/{len(data["path"])}) with polynomial order {options["post_edge_polyorder"]}', options=options) #Fitting linear function to the background params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], options['post_edge_polyorder']) fit_function = np.poly1d(params) if options['log']: - aux.write_log(message=f'Post edge fitted between {options["post_edge_limits"][0]} and {options["post_edge_limits"][1]} with polynomial of order {options["post_edge_polyorder"]} with parmameters {params}.', options=options) + aux.write_log(message=f'...... Post edge fitted between {options["post_edge_limits"][0]} and {options["post_edge_limits"][1]} with polynomial of order {options["post_edge_polyorder"]} with parmameters {params}.', options=options) if options['post_edge_masks']: - aux.write_log(message=f'Excluded regions: {options["post_edge_masks"]}', options=options) + aux.write_log(message=f'...... Excluded regions: {options["post_edge_masks"]}', options=options) data['post_edge_params'][filename] = params @@ -570,6 +575,9 @@ def estimate_edge_position(data: dict, options={}, index=0): for mask in options['post_edge_masks']: df_diff[data['path'][index]].loc[(df_diff['ZapEnergy'] > mask[0]) & (df_diff['ZapEnergy'] < mask[1])] = 0 + if 'edge_masks' in options.keys(): + for mask in options['edge_masks']: + df_diff[data['path'][index]].loc[(df_diff['ZapEnergy'] > mask[0]) & (df_diff['ZapEnergy'] < mask[1])] = 0 df_diff_max = df_diff[data['path'][index]].dropna().max() @@ -597,6 +605,7 @@ def determine_edge_position(data: dict, options={}): 'show_plots': False, # Toggles on/off whether plots should be shown. For sequential data, saving the plots and inspecting them there is probably better. 'save_plots': False, # Toggles on/off whether plots should be saved. 'save_folder': './', # Sets the path to where the plots should be saved. Creates folder if doesn't exist. Ignored if save_plots == False + 'edge_masks': [], 'diff': True, # Toggles calculation of the edge position based on differential data 'diff.polyorder': 2, # Sets the order of the polynomial to fit edge region of the differential to 'diff.periods': 2, # Sets the number of data points between which the first order difference should be calculated. Needs to be even for subsequent shifting of data to function. @@ -641,8 +650,13 @@ def determine_edge_position(data: dict, options={}): data['e0_double_diff'] = {} + if options['log']: + aux.write_log(message='Starting edge position determination', options=options) + + # Get rough estimate of edge position for i, filename in enumerate(data['path']): + estimated_edge_pos = estimate_edge_position(data, options=options, index=i) @@ -677,7 +691,7 @@ def determine_edge_position(data: dict, options={}): edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] if options['log']: - aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))} keV", options=options) + aux.write_log(message=f"... Edge position of {os.path.basename(filename)} determined by the differential maximum is: {str(round(edge_pos_diff,5))} keV", options=options) if options['save_values']: data['e0_diff'][filename] = edge_pos_diff @@ -706,10 +720,10 @@ def determine_edge_position(data: dict, options={}): edge_pos_double_diff=x_double_diff[np.where(y_double_diff == find_nearest(y_double_diff,0))][0] if options['log']: - aux.write_log(message=f"Edge position estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))} keV", options=options) + aux.write_log(message=f"... Edge position of {os.path.basename(filename)} determined by the double differential zero-point is {str(round(edge_pos_double_diff,5))} keV", options=options) if options['diff']: - aux.write_log(message=f"Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.", options=options) + aux.write_log(message=f"... Difference between edge position estimated from differential maximum and double differential zero-point is {(edge_pos_diff-edge_pos_double_diff)*1000} eV.", options=options) if options['save_values']: data['e0_double_diff'][filename] = edge_pos_double_diff @@ -862,6 +876,9 @@ def normalise(data: dict, options={}): default_options = { 'log': False, # Toggles logging on/off 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', # Sets path to log-file + 'show_plots': False, # Toggles on/off whether plots should be shown. For sequential data, saving the plots and inspecting them there is probably better. + 'save_plots': False, # Toggles on/off whether plots should be saved. + 'save_folder': './', # Sets the path to where the plots should be saved. Creates folder if doesn't exist. Ignored if save_plots == False 'normalisation_store_data': False, # Toggles storing of the flattened data in data['xanes_data'] on/off } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -877,15 +894,44 @@ def normalise(data: dict, options={}): for filename in data['path']: e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0_diff'][filename])].index.values[0] #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] - normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] + normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] #- data['pre_edge_fit_data'][filename].iloc[e0_ind] normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) + + if options['show_plots'] or options['save_plots']: + + fig, ax = plt.subplots(figsize=(10,5)) + + normalised_df.plot(x='ZapEnergy', y=filename, ax=ax, color='red', label='Normalised data') + ax.set_title(f'{os.path.basename(filename)} - After normalisation', size=20) + ax.set_ylabel('Normalised x$\mu$(E)', size=20) + ax.set_xlabel('Energy (keV)', size=20) + ax.axhline(y=1, ls='--', c='black') + + + # Save plots if toggled + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_normalisation.png' + + plt.savefig(dst, transparent=False) + + + # Close plots if show_plots not toggled + if not options['show_plots']: + plt.close() + + if options['normalisation_store_data']: pre_edge_fit_data_norm.insert(1, filename, data['pre_edge_fit_data'][filename] / normalisation_constant) post_edge_fit_data_norm.insert(1, filename, data['post_edge_fit_data'][filename] / normalisation_constant) + + if options['normalisation_store_data']: data['xanes_data'] = normalised_df # Normalise the pre-edge and post-edge fit function data @@ -930,6 +976,32 @@ def flatten(data:dict, options={}): flattened_df[filename] = data['xanes_data'][filename] - fit_function_diff + if options['show_plots'] or options['save_plots']: + + fig, ax = plt.subplots(figsize=(10,5)) + + flattened_df.plot(x='ZapEnergy', y=filename, ax=ax, color='red', label='Flattened data') + ax.set_title(f'{os.path.basename(filename)} - After flattening', size=20) + ax.set_ylabel('Normalised x$\mu$(E)', size=20) + ax.set_xlabel('Energy (keV)', size=20) + ax.axhline(y=1, ls='--', c='black') + + + # Save plots if toggled + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_flattened.png' + + plt.savefig(dst, transparent=False) + + + # Close plots if show_plots not toggled + if not options['show_plots']: + plt.close() + + # Saves the flattened DataFrame if options['flatten_store_data']: data['xanes_data'] = flattened_df From 6eb45772d1947aee7b467cdcfc6e7f8551c1f376 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 6 Jul 2022 17:33:38 +0200 Subject: [PATCH 50/56] Fix normalisation issues --- nafuma/xanes/calib.py | 8 +++++--- nafuma/xanes/io.py | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 84ed070..2c8881e 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -206,7 +206,7 @@ def pre_edge_subtraction(data: dict, options={}): 'show_plots': False, 'save_plots': False, 'save_folder': './', - 'pre_edge_subtraction_store_data': False + 'pre_edge_subtraction_store_data': True } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -893,8 +893,10 @@ def normalise(data: dict, options={}): #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0_diff'][filename])].index.values[0] + #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] - normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] #- data['pre_edge_fit_data'][filename].iloc[e0_ind] + normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] + print(normalisation_constant) normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) @@ -967,7 +969,7 @@ def flatten(data:dict, options={}): for filename in data['path']: # Subtract 1 from the _normalised_ post edge fit function - fit_function_diff = data['post_edge_fit_data_norm'][filename] - 1 + fit_function_diff = data['post_edge_fit_data_norm'][filename] - 1 - data['pre_edge_fit_data_norm'][filename] # Set all values from edge position and downwards to 0 so that only data above the edge position will be adjusted fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0_diff'][filename]] = 0 diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index a590255..6c4ac1a 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -57,7 +57,7 @@ def split_scan_data(data: dict, options={}) -> list: continue # First line after data started with #C - stops data read-in - elif line[0:2] == "#C": + elif line[0:2] == "#C" or line[0:2] == '#S': read_data = False if scan_data: From 4cec8d275663c4cdc3b98f693a0658dee43c6d3e Mon Sep 17 00:00:00 2001 From: halvorhv Date: Thu, 7 Jul 2022 10:24:04 +0200 Subject: [PATCH 51/56] added FIXMEs --- nafuma/xanes/io.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 6c4ac1a..70f6891 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -178,7 +178,8 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # FIXME Handle the case when dataseries are not the same size - + # FIXME Add possibility to extract TIME (for operando runs) and Blower Temp (for variable temperature runs) + # FIXME Add possibility to iport transmission data required_options = ['adjust'] default_options = { 'adjust': 0 From 8939bb8479cc1ba6c7813734102b20c308dd71f9 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 7 Jul 2022 11:49:58 +0200 Subject: [PATCH 52/56] Remove annoying print in normalise() --- nafuma/xanes/calib.py | 1 - 1 file changed, 1 deletion(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 2c8881e..77e3a5c 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -896,7 +896,6 @@ def normalise(data: dict, options={}): #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] - print(normalisation_constant) normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) From 0ed85e1398949625e02b25f6c6f3e124f1aa1f54 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 7 Jul 2022 11:50:15 +0200 Subject: [PATCH 53/56] Write timestamp during split and add reading of metadata --- nafuma/xanes/io.py | 70 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 8 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 70f6891..f62554b 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -4,7 +4,7 @@ import os import numpy as np import nafuma.auxillary as aux from nafuma.xanes.calib import find_element -from datetime import datetime +import datetime def split_scan_data(data: dict, options={}) -> list: @@ -16,7 +16,7 @@ def split_scan_data(data: dict, options={}) -> list: default_options = { 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_split_edges.log', + 'logfile': f'{datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_split_edges.log', 'save': False, # whether to save the files or not 'save_folder': '.', # root folder of where to save the files 'replace': False, # whether to replace the files if they already exist @@ -43,13 +43,17 @@ def split_scan_data(data: dict, options={}) -> list: with open(filename, 'r') as f: lines = f.readlines() + timestamps = [] scan_datas, scan_data = [], [] headers, header = [], '' read_data = False - for line in lines: + for i, line in enumerate(lines): # Header line starts with #L - reads headers, and toggles data read-in on - if line[0:2] == "#L": + if 'zapline mono' in line: + timestamps.append(lines[i+1].strip('#D')) + + elif line[0:2] == "#L": header, read_data = line[2:].split(), True if options['log']: @@ -148,12 +152,19 @@ def split_scan_data(data: dict, options={}) -> list: path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') if not os.path.isfile(path): - scan.to_csv(path) + + with open(path, 'w', newline = '\n') as f: + + f.write(f'# Time: {timestamps[i]}') + scan.to_csv(f) + if options['log']: aux.write_log(message=f'... ... Scan saved to {path}', options=options) elif options['replace'] and os.path.isfile(path): - scan.to_csv(path) + with open(path, 'w', newline = '\n') as f: + scan.to_csv(f) + if options['log']: aux.write_log(message=f'... ... File already exists. Overwriting to {path}', options=options) @@ -193,14 +204,14 @@ def read_data(data: dict, options={}) -> pd.DataFrame: data['path'] = [data['path']] # Initialise DataFrame with only ZapEnergy-column - xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + xanes_data = pd.read_csv(data['path'][0], skiprows=1)[['ZapEnergy']] xanes_data['ZapEnergy'] += options['adjust'] for filename in data['path']: columns.append(filename) - scan_data = pd.read_csv(filename) + scan_data = pd.read_csv(filename, skiprows=1) if not options['active_roi']: scan_data = scan_data[[determine_active_roi(scan_data)]] @@ -216,6 +227,49 @@ def read_data(data: dict, options={}) -> pd.DataFrame: return xanes_data +def read_metadata(data: dict, options={}) -> dict: + + required_options = ['get_temperature', 'get_timestamp', 'adjust_time'] + + default_options = { + 'get_temperature': True, + 'get_timestamp': True, + 'adjust_time': False + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + + temperatures = [] + timestamps = [] + + for filename in data['path']: + scan_data = pd.read_csv(filename, skiprows=1) + + if options['get_temperature']: + temperatures.append(scan_data['ZBlower2'].mean()) + + if options['get_timestamp']: + + with open(filename, 'r') as f: + time = f.readline().strip('# Time: ') + time = datetime.datetime.strptime(time, "%a %b %d %H:%M:%S %Y ") + + if options['adjust_time']: + time_elapsed = scan_data['Htime'].iloc[-1] - scan_data['Htime'].iloc[0] + + time += datetime.timedelta(microseconds=time_elapsed)/2 + + + timestamps.append(time) + + + metadata = {'time': timestamps, 'temperature': temperatures} + + return metadata + + + From b1b705f28f7d3b888f6e8bf1995ec429bef38ebb Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 7 Jul 2022 13:07:22 +0200 Subject: [PATCH 54/56] Ignore makedirs-command if file in base folder --- nafuma/auxillary.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 45633c5..a63c8fc 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -26,11 +26,12 @@ def save_options(options, path, ignore=None): if not os.path.isdir(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) + if os.path.dirname(path): + os.makedirs(os.path.dirname(path)) with open(path, 'w') as f: - json.dump(options_copy,f, skipkeys=True, indent=4) + json.dump(options_copy, f, skipkeys=True, indent=4) def load_options(path): From f0c547f889be17454f75828aeb324aa1d4c51f20 Mon Sep 17 00:00:00 2001 From: halvorhv Date: Thu, 7 Jul 2022 14:49:27 +0200 Subject: [PATCH 55/56] Adding function to rearrange filenames-array --- nafuma/auxillary.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 45633c5..bd30713 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -103,4 +103,11 @@ def get_filenames(path, ext, filter=''): filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext) and filter in filename] + return filenames + +def move_list_element_last(filenames,string): + for i,file in enumerate(filenames): + if string in file: + del filenames[i] + filenames.append(file) return filenames \ No newline at end of file From 8096cf3bd540a8cb142e4439ff51461c2aa79b83 Mon Sep 17 00:00:00 2001 From: halvorhv Date: Thu, 7 Jul 2022 14:51:57 +0200 Subject: [PATCH 56/56] no change --- nafuma/xanes/io.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 70f6891..98a3943 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -253,3 +253,4 @@ def determine_active_roi(scan_data): active_roi = None return active_roi + \ No newline at end of file