From 9c6a7d5991af452759fdf2f11afb76623e7d6aae Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 16:59:37 +0200 Subject: [PATCH] Refactor post_edge_fit --- nafuma/xanes/calib.py | 100 ++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a45457b..10afda9 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -41,9 +41,9 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] + required_options = ['pre_edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] default_options = { - 'edge_start': None, + 'pre_edge_start': None, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'save_plots': False, @@ -60,18 +60,13 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit - if not options['edge_start']: - pre_edge_limit_offsets = { - 'Mn': 0.03, - 'Fe': 0.03, - 'Co': 0.03, - 'Ni': 0.03 - } + if not options['pre_edge_start']: + pre_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] + pre_edge_limit = edge_position - pre_edge_limit_offset # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval @@ -169,6 +164,7 @@ def estimate_edge_position(data: dict, options={}, index=0): #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. required_options = ['print','periods'] default_options = { + 'print': False, 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } @@ -191,45 +187,71 @@ def estimate_edge_position(data: dict, options={}, index=0): return estimated_edge_shift -def post_edge_fit(path, options={}): +def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['print'] + required_options = ['post_edge_start', 'print'] default_options = { + 'post_edge_start': None, 'print': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #FIXME Allow min and max limits + + if not options['post_edge_start']: + post_edge_limit_offset = 0.03 + + data['edge'] = find_element(data) + + edge_position = estimate_edge_position(data, options, index=0) + post_edge_limit = edge_position + post_edge_limit_offset + + + post_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] > post_edge_limit] + post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) - df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) - #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge - #FIXME Use rought edge shift estimate, add X eV as first guess, have an option to adjust this value with widget - if edge == 'Mn': - edge_stop = 6.565 - if edge == 'Ni': - edge_stop = 8.361 + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) - df_end= df_bkgd_sub.loc[df_bkgd_sub["ZapEnergy"] > edge_stop] # new dataframe only containing the post edge, where a regression line will be calculated in the for-loop below - df_end.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit - df_postedge = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #making a new dataframe + #Fitting linear function to the background + params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(post_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + post_edge_fit_data.insert(1,filename,background) + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - function_post_list=[] - for files in filenames: - d = np.polyfit(df_end["ZapEnergy"],df_end[files],1) - function_post = np.poly1d(d) - y_post=function_post(df_bkgd_sub["ZapEnergy"]) - function_post_list.append(function_post) - df_postedge.insert(1,files,y_post) #adding a new column with the y-values of the fitted post edge + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_post_edge_fit.png' - #Plotting the background subtracted signal with the post-edge regression line and the start point for the linear regression line - if options['print'] == True: - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames) #defining x and y - plt.axvline(x = min(df_end["ZapEnergy"])) - fig = plt.figure(figsize=(15,15)) - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, legend=False) #defining x and y - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - plt.axvline(x = min(df_end["ZapEnergy"])) + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - return df_bkgd_sub, df_postedge, filenames, edge + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(post_edge_data['ZapEnergy']), max(post_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(post_edge_data[filename]), max(post_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() + + + return post_edge_fit_data def smoothing(path, options={}): required_options = ['print','window_length','polyorder']