diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index fb436cb..f7b57c6 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -44,9 +44,19 @@ def pre_edge_subtraction(path, options={}): edge=finding_edge(df) #Defining the end of the region used to define the background, thus start of the edge + + #######================================================================================================================================================ + #Trying to implement automatical region determination based on an estimate of the edge shift + #print(df) + #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) + + #print(estimated_edge_shift) + #estimated_edge_shift + ###========================================================================================================================================================================= #implement widget if edge == 'Mn': edge_start = 6.42 + #edge_start = estimated_edge_shift if edge == 'Ni': edge_start = 8.3 @@ -120,7 +130,7 @@ def post_edge_normalization(path, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub,filenames,edge = pre_edge_subtraction(path) + df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge #Implement widget if edge == 'Mn': @@ -161,34 +171,199 @@ def smoothing(path, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path) + df_bkgd_sub, df_postedge, filenames, edge = post_edge_normalization(path,options=options) #================= SMOOTHING df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #df_smooth[filenames] = df_bkgd_sub.iloc[:,2].rolling(window=rolling_av).mean() #df_smooth[filenames] = df_smooth[filenames].shift(-int((rolling_av)/2)) for filename in filenames: - x=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) - df_smooth[filename] = x + x_smooth=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) + df_smooth[filename] = x_smooth x_default=savgol_filter(df_bkgd_sub[filename],default_options['window_length'],default_options['polyorder']) df_default[filename] = x_default + + + #printing the smoothed curves vs data if options['print'] == True: + + ## ================================================ + #df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) + #df_diff_estimated_max = df_diff[filenames].dropna().max() + + + #estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + # ========================================== + + fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) x_range_zoom=[6.54,6.55] #make into widget y_range_zoom=[20000,80000] #make into widget - df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") + df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") df_smooth.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Blue") ax1.set_xlim(x_range_zoom) ax1.set_ylim(y_range_zoom) ax1.set_title("Smoothed curve (blue) vs data (red) used for further analysis") - df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") - df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Blue") + df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") + df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Green") ax2.set_xlim(x_range_zoom) ax2.set_ylim(y_range_zoom) - ax2.set_title("Smoothed curve (blue) vs data (red) using default window_length and polyorder") + ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + return df_smooth, filenames + +def find_pos_maxdiff(df, filenames,options={}): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(df["ZapEnergy"]) + df_diff[filenames]=df[filenames].diff(periods=options['periods']) + + #shifting column values up so that average differential fits right between the points used in the calculation + df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) + df_diff_max = df_diff[filenames].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + + return estimated_edge_shift, df_diff, df_diff_max + +def find_nearest(array, value): + #function to find the value closes to "value" in an "array" + array = np.asarray(array) + idx = (np.abs(array - value)).argmin() + return array[idx] + +def finding_e0(path, options={}): + required_options = ['print','periods'] + default_options = { + 'print': False, + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + df_smooth, filenames = smoothing(path, options=options) #This way the smoothing is printed as long as the "finding e0" is printed. + + if options['periods'] % 2 == 1: + print("NB!!!!!!!!!!!!!!!!! Periods needs to be an even number for the shifting of values to work properly") + ###df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) # + if len(filenames) == 1: + filenames=filenames[0] + else: + print("MORE THAN ONE FILE --> generalize") + + ##### + estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df_smooth, filenames,options=options) + print(estimated_edge_shift) + #### + ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # + df_doublediff=pd.DataFrame(df_smooth["ZapEnergy"]) + df_doublediff[filenames]=df_diff[filenames].diff(periods=options['periods']) + + if options['print'] == True: + fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) + + df_diff.plot(x = "ZapEnergy",y=filenames, ax=ax1) #defining x and y + df_doublediff.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y + + #shifting column values up so that average differential fits right between the points used in the calculation + #df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) # + df_doublediff[filenames]=df_doublediff[filenames].shift(-int(options['periods'])) + + #finding maximum value to maneuver to the correct part of the data set + #df_diff_max = df_diff[filenames].dropna().max() + + + estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + + fit_region = 0.0004 + df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] + df_diff_edge=df_diff_edge.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-fit_region)] + + + + + df_doublediff_edge=df_doublediff.loc[(df_doublediff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] + df_doublediff_edge=df_doublediff_edge.loc[(df_doublediff["ZapEnergy"] > estimated_edge_shift-fit_region)] + #df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-0.15) and (df_diff["ZapEnergy"] < estimated_edge_shift+0.15)] + + #df_diff_edge=df_diff.loc[df_diff["ZapEnergy"] > estimated_edge_shift-0.15] + #print(df_diff_edge) + if options['print'] == True: + fig, (ax3,ax4) = plt.subplots(1,2,figsize=(15,5)) + + df_diff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax3) #defining x and y + ax3.set_title("Zoomed into edge region (derivative))") + ax3.axvline(x = estimated_edge_shift) + + df_doublediff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax4,kind="scatter") #defining x and y + ax4.set_title("Zoomed into edge region (double derivative)") + ax4.axvline(x = estimated_edge_shift) + ax4.axhline(0) + + #ax1.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) + #ax1.set_title("not sure what this is tbh") + + #ax2.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) + #ax2.set_title("not sure what this is either tbh") + + #============== + #df_smooth=df_smooth2 + #================= + + + + + #========================== fitting first differential ========== + df_diff = df_diff[df_diff[filenames].notna()] + + #fitting a function to the chosen interval + d = np.polyfit(df_diff_edge["ZapEnergy"],df_diff_edge[filenames],2) + function_diff = np.poly1d(d) + + x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=1000) + y_diff=function_diff(x_diff) + #print(df_diff_edge["ZapEnergy"].iloc[-1]) + if options['print'] == True: + ax3.plot(x_diff,y_diff,color='Green') + + #y_diff_max=np.amax(y_diff,0) + y_diff_max_index = np.where(y_diff == np.amax(y_diff)) + #print(y_diff_max_index[0]) + edge_shift_diff=float(x_diff[y_diff_max_index]) + print("Edge shift estimated by the differential maximum is "+str(round(edge_shift_diff,5))) + if options['print'] == True: + ax3.axvline(x=edge_shift_diff,color="green") + #print(df_doublediff_edge["ZapEnergy"].iloc[0]) + #ax4.plot(x_doublediff,y_doublediff,color='Green')) + + + #fitting double differentiate + df_doublediff = df_doublediff[df_doublediff[filenames].notna()] + d = np.polyfit(df_doublediff_edge["ZapEnergy"],df_doublediff_edge[filenames],2) + function_doublediff = np.poly1d(d) + + x_doublediff=np.linspace(df_doublediff_edge["ZapEnergy"].iloc[0],df_doublediff_edge["ZapEnergy"].iloc[-1],num=10000) + y_doublediff=function_doublediff(x_doublediff) + + if options['print'] == True: + ax4.plot(x_doublediff,y_doublediff,color='Green') + + y_doublediff_zero=find_nearest(y_doublediff,0) + y_doublediff_zero_index = np.where(y_doublediff == y_doublediff_zero) + + edge_shift_doublediff=float(x_doublediff[y_doublediff_zero_index]) + + print("Edge shift estimated by the double differential zero-point is "+str(round(edge_shift_doublediff,5))) + if options['print'] == True: + ax4.axvline(x=edge_shift_doublediff,color="green") +