diff --git a/nafuma/auxillary.py b/nafuma/auxillary.py index 68785f7..2b87479 100644 --- a/nafuma/auxillary.py +++ b/nafuma/auxillary.py @@ -1,5 +1,6 @@ import json import numpy as np +import os def update_options(options, required_options, default_options): ''' Takes a dictionary of options along with a list of required options and dictionary of default options, and sets all keyval-pairs of options that is not already defined to the default values''' @@ -52,4 +53,36 @@ def floor(a, roundto=1): a = np.floor(a*fac) / fac - return a \ No newline at end of file + return a + + + +def write_log(message, options={}): + from datetime import datetime + + required_options = ['logfile'] + default_options = { + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}' + } + + options = update_options(options=options, required_options=required_options, default_options=default_options) + + now = datetime.now().strftime('%Y/%m/%d %H:%M:%S') + message = f'[{now}] {message} \n' + + + with open(options['logfile'], 'a') as f: + f.write(message) + + +#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements +def get_filenames(path, ext, filter=''): + ''' Collects all filenames from specified path with a specificed extension + + Input: + path: path to find all filenames (relative or absolute) + ext: extension (including ".")''' + + filenames = [os.path.join(path, filename) for filename in os.listdir(path) if os.path.isfile(os.path.join(path, filename)) and filename.endswith(ext) and filter in filename] + + return filenames \ No newline at end of file diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 5579574..a45457b 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -6,111 +6,190 @@ import nafuma.auxillary as aux import nafuma.xanes as xas import nafuma.xanes.io as io from scipy.signal import savgol_filter +from datetime import datetime ##Better to make a new function that loops through the files, and performing the split_xanes_scan on #Trying to make a function that can decide which edge it is based on the first ZapEnergy-value -def finding_edge(df): - #FIXME add Fe and Co - if 5.9 < df["ZapEnergy"][0] < 6.5: - edge='Mn' - return(edge) - if 8.0 < df["ZapEnergy"][0] < 8.6: - edge='Ni' - return(edge) +def find_element(data: dict, index=0) -> str: + ''' Takes the data dictionary and determines based on the start value of the ZapEnergy-column which element the edge is from.''' -def pre_edge_subtraction(path, options={}): - #FIXME add log-file instead of the troubleshoot-option - required_options = ['print','troubleshoot'] + element_energy_intervals = { + 'Mn': [5.9, 6.5], + 'Fe': [7.0, 7.2], + 'Co': [7.6, 7.8], + 'Ni': [8.0, 8.6] + } + + if (element_energy_intervals['Mn'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Mn'][1]): + edge = 'Mn' + elif (element_energy_intervals['Fe'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Fe'][1]): + edge = 'Fe' + elif (element_energy_intervals['Co'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Co'][1]): + edge = 'Co' + elif (element_energy_intervals['Ni'][0] < data['xanes_data_original']["ZapEnergy"].iloc[index]) & (data['xanes_data_original']["ZapEnergy"].iloc[index] < element_energy_intervals['Ni'][1]): + edge = 'Ni' + + + return(edge) + + + +def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: + + + # FIXME Add log-file + + required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] + default_options = { + 'edge_start': None, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', + 'save_plots': False, + 'save_folder': './' + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + if options['log']: + aux.write_log(message='Starting pre edge fit', options=options) + + + + # FIXME Implement with finding accurate edge position + # FIXME Allow specification of start of pre-edge area + # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit + if not options['edge_start']: + pre_edge_limit_offsets = { + 'Mn': 0.03, + 'Fe': 0.03, + 'Co': 0.03, + 'Ni': 0.03 + } + + data['edge'] = find_element(data) + + edge_position = estimate_edge_position(data, options, index=0) + pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] + + # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to + # limit the interval + # Making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) + pre_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] < pre_edge_limit] + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) + + #Fitting linear function to the background + params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(pre_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + pre_edge_fit_data.insert(1,filename,background) + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' + + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() + + + if options['log']: + aux.write_log(message=f'Pre edge fitting done.', options=options) + + return pre_edge_fit_data + + + +def pre_edge_subtraction(data: dict, options={}): + + required_options = ['log', 'logfile', 'save_plots', 'save_folder'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + 'save_plots': False, + 'save_folder': './' + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + if options['log']: + aux.write_log(message='Starting pre edge subtraction', options=options) + + xanes_data_bkgd_subtracted = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Subtracting background on {filename} ({i} / {len(data["path"])}', options=options) + + xanes_data_bkgd_subtracted.insert(1, filename, data['xanes_data_original'][filename] - data['pre_edge_fit_data'][filename]) + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_subtraction.png' + + fig, ax = plt.subplots(figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax) + xanes_data_bkgd_subtracted.plot(x='ZapEnergy', y=filename, color='red', ax=ax) + ax.set_title(f'{os.path.basename(filename)} - After subtraction', size=20) + + plt.savefig(dst) + plt.close() + + return xanes_data_bkgd_subtracted + + +def estimate_edge_position(data: dict, options={}, index=0): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['print','periods'] default_options = { 'print': False, - 'troubleshoot': False + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - filenames = xas.io.get_filenames(path) - df= xas.io.put_in_dataframe(path) - edge=finding_edge(df) - - #Defining the end of the region used to define the background, thus start of the edge - - #######================================================================================================================================================ - #FIXME Trying to implement automatical region determination based on an estimate of the edge shift - #print(df) - #estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df, filenames,options=options) + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) - #print(estimated_edge_shift) - #estimated_edge_shift - ###========================================================================================================================================================================= - #implement widget - if edge == 'Mn': - edge_start = 6.42 - #edge_start = estimated_edge_shift - if edge == 'Ni': - edge_start = 8.3 + #shifting column values up so that average differential fits right between the points used in the calculation + df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + df_diff_max = df_diff[data['path'][index]].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] - #making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined) - df_start=df.loc[df["ZapEnergy"] < edge_start] - - #Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data - df_bkgd = pd.DataFrame(df["ZapEnergy"]) + # FIXME Add logging option to see the result - for files in filenames: + if options['log']: + aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) - #Fitting linear function to the background - d = np.polyfit(df_start["ZapEnergy"],df_start[files],1) - function_bkgd = np.poly1d(d) - - #making a list, y_pre,so the background will be applied to all ZapEnergy-values - y_bkgd=function_bkgd(df["ZapEnergy"]) - - #adding a new column in df_background with the y-values of the background - df_bkgd.insert(1,files,y_bkgd) - - - if options['troubleshoot'] == True: - ### FOR FIGURING OUT WHERE IT GOES WRONG/WHICH FILE IS CORRUPT - ax = df.plot(x = "ZapEnergy",y=files) - #Plotting the calculated pre-edge background with the region used for the regression - if options['print'] == True: - #Plotting an example of the edge_start region and the fitted background that will later be subtracted - fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,5)) - df.plot(x="ZapEnergy", y=filenames,color="Black",ax=ax1) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax1) - plt.axvline(x = max(df_start["ZapEnergy"])) - #fig = plt.figure(figsize=(15,15)) - df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax2) - ax1.set_title('Data and fitted background') - #Zooming into bacground region to confirm fit and limits looks reasonable - df.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y) - ax2.set_xlim([min(df_start["ZapEnergy"]),max(df_start["ZapEnergy"])+0.01]) - #finding maximum and minimum values in the backgrounds - min_values=[] - max_values=[] - for file in filenames: - min_values.append(min(df_start[file])) - max_values.append(max(df_start[file])) - ax2.set_ylim([min(min_values),max(max_values)]) - plt.axvline(x = max(df_start["ZapEnergy"])) - #ax2.set_xlim([25, 50]) - ###################### Subtracting the pre edge from xmap_roi00 ################ + return estimated_edge_shift - #making a new dataframe to insert the background subtracted intensities - df_bkgd_sub = pd.DataFrame(df["ZapEnergy"]) - #inserting the background subtracted original xmap_roi00 data - - for files in filenames: - newintensity_calc=df[files]-df_bkgd[files] - df_bkgd_sub.insert(1,files,newintensity_calc) - - if options['print'] == True: - df.plot(x = "ZapEnergy",y=filenames, color="Black", ax=ax3, legend=False) - #plt.axvline(x = max(df_start["ZapEnergy"])) - df_bkgd_sub.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax3, legend=False) - ax3.set_title('Data and background-subtracted data') - - return df_bkgd_sub,filenames,edge def post_edge_fit(path, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) @@ -205,25 +284,7 @@ def smoothing(path, options={}): return df_smooth, filenames -def find_pos_maxdiff(df, filenames,options={}): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['print','periods'] - default_options = { - 'print': False, - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #making new dataframe to keep the differentiated data - df_diff = pd.DataFrame(df["ZapEnergy"]) - df_diff[filenames]=df[filenames].diff(periods=options['periods']) - - #shifting column values up so that average differential fits right between the points used in the calculation - df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) - df_diff_max = df_diff[filenames].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] - - return estimated_edge_shift, df_diff, df_diff_max def find_nearest(array, value): #function to find the value closes to "value" in an "array" @@ -250,7 +311,7 @@ def finding_e0(path, options={}): print("MORE THAN ONE FILE --> generalize") ##### - estimated_edge_shift, df_diff, df_diff_max = find_pos_maxdiff(df_smooth, filenames,options=options) + estimated_edge_shift, df_diff, df_diff_max = estimate_edge_position(df_smooth, filenames,options=options) print(estimated_edge_shift) #### ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 527f300..816b8f5 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -2,12 +2,17 @@ import pandas as pd import matplotlib.pyplot as plt import os import numpy as np +import nafuma.auxillary as aux + def split_xanes_scan(root, destination=None, replace=False): #root is the path to the beamtime-folder #destination should be the path to the processed data #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw + + # FIXME Only adding this variable to pass the Linting-tests - will refactor this later + filename = 'dummy' with open(filename, 'r') as f: lines = f.readlines() @@ -84,65 +89,70 @@ def split_xanes_scan(root, destination=None, replace=False): df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) -#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements -def get_filenames(path): - - - cwd = os.getcwd() - - # Change into path provided - os.chdir(path) - - filenames = [os.path.join(path, filename) for filename in os.listdir() if os.path.isfile(filename) and filename[-4:] == '.dat'] #changed - - - - # Change directory back to where you ran the script from - os.chdir(cwd) - - return filenames -def put_in_dataframe(path): - filenames = get_filenames(path) - #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy - column_names = ["ZapEnergy"] - for i in range(len(filenames)): - column_names.append(filenames[i]) - #Taking the first file in the folder and extracting ZapEnergies and intensity from that (only need the intensity from the rest) - first = pd.read_csv(filenames[0], skiprows=0) +def read_data(data: dict, options={}) -> pd.DataFrame: - #Making a data frame with the correct columns, and will fill inn data afterwards - df = pd.DataFrame(columns = column_names) - #First putting in the 2theta-values - df["ZapEnergy"]=first["ZapEnergy"] - #filling in the intensities from all files into the corresponding column in the dataframe - for i in range(len(filenames)): - df2 = pd.read_csv(filenames[i]) - df2 = df2.drop(['Mon','Det1','Det2','Det3','Det4','Det5', 'Det6','Ion1'], axis=1) #, axis=1) - df2 = df2.drop(['MonEx','Ion2','Htime','MusstEnc1','MusstEnc3','MusstEnc4', 'TwoTheta', 'ZCryo'], axis=1) - df2 = df2.drop(['ZBlower1', 'ZBlower2', 'ZSrcur'], axis=1)#, axis=19) #removing the sigma at this point - - ############## THIS PART PICKS OUT WHICH ROI IS OF INTEREST, BUT MUST BE FIXED IF LOOKING AT THREE EDGES (roi00,roi01,roi02) ##################### - if 'xmap_roi01' in df2.columns: - #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift - if max(df2["xmap_roi00"])-min(df2["xmap_roi00"])>max(df2["xmap_roi01"])-min(df2["xmap_roi01"]): - df[filenames[i]]=df2["xmap_roi00"] #forMn - else: - df[filenames[i]]=df2["xmap_roi01"] #forNi + # FIXME Handle the case when dataseries are not the same size + + required_options = [] + default_options = { + + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + columns = ['ZapEnergy'] + + # Initialise DataFrame with only ZapEnergy-column + xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + + if not isinstance(data['path'], list): + data['path'] = [data['path']] + + for filename in data['path']: + columns.append(filename) + + scan_data = pd.read_csv(filename) + scan_data = scan_data[[determine_active_roi(scan_data)]] + xanes_data = pd.concat([xanes_data, scan_data], axis=1) + + + xanes_data.columns = columns + + + return xanes_data + + + + + +def determine_active_roi(scan_data): + + # FIXME For Co-edge, this gave a wrong scan + + #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift + # if max(scan_data["xmap_roi00"])-min(scan_data["xmap_roi00"])>max(scan_data["xmap_roi01"])-min(scan_data["xmap_roi01"]): + # active_roi = 'xmap_roi00' + # else: + # active_roi = 'xmap_roi01' + + if (scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean()) and (scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean()): + if (scan_data['xmap_roi00'].max()-scan_data['xmap_roi00'].min()) > (scan_data['xmap_roi01'].max() - scan_data['xmap_roi01'].min()): + active_roi = 'xmap_roi00' else: - df[filenames[i]]=df2["xmap_roi00"] - ############################################################################################### + active_roi = 'xmap_roi01' - i=i+1 + elif scan_data['xmap_roi00'].iloc[0:100].mean() < scan_data['xmap_roi00'].iloc[-100:].mean(): + active_roi = 'xmap_roi00' + + elif scan_data['xmap_roi01'].iloc[0:100].mean() < scan_data['xmap_roi01'].iloc[-100:].mean(): + active_roi = 'xmap_roi01' + else: + active_roi = None - #print(df) - #If I want to make a csv-file of the raw data. Decided that was not necessary: - #df.to_csv('static-Mn-edge.csv') #writing it to a csv, first row is datapoint (index), second column is 2theta, and from there the scans starts - - - return df \ No newline at end of file + return active_roi