From 9c6a7d5991af452759fdf2f11afb76623e7d6aae Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Fri, 17 Jun 2022 16:59:37 +0200 Subject: [PATCH 1/9] Refactor post_edge_fit --- nafuma/xanes/calib.py | 100 ++++++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 39 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index a45457b..10afda9 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -41,9 +41,9 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Add log-file - required_options = ['edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] + required_options = ['pre_edge_start', 'log', 'logfile', 'save_plots', 'save_folder'] default_options = { - 'edge_start': None, + 'pre_edge_start': None, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_fit.log', 'save_plots': False, @@ -60,18 +60,13 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # FIXME Implement with finding accurate edge position # FIXME Allow specification of start of pre-edge area # Find the cutoff point at which the edge starts - everything to the LEFT of this point will be used in the pre edge function fit - if not options['edge_start']: - pre_edge_limit_offsets = { - 'Mn': 0.03, - 'Fe': 0.03, - 'Co': 0.03, - 'Ni': 0.03 - } + if not options['pre_edge_start']: + pre_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - pre_edge_limit = edge_position - pre_edge_limit_offsets[data['edge']] + pre_edge_limit = edge_position - pre_edge_limit_offset # FIXME There should be an option to specify the interval in which to fit the background - now it is taking everything to the left of edge_start parameter, but if there are some artifacts in this area, it should be possible to # limit the interval @@ -169,6 +164,7 @@ def estimate_edge_position(data: dict, options={}, index=0): #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. required_options = ['print','periods'] default_options = { + 'print': False, 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } @@ -191,45 +187,71 @@ def estimate_edge_position(data: dict, options={}, index=0): return estimated_edge_shift -def post_edge_fit(path, options={}): +def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['print'] + required_options = ['post_edge_start', 'print'] default_options = { + 'post_edge_start': None, 'print': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + #FIXME Allow min and max limits + + if not options['post_edge_start']: + post_edge_limit_offset = 0.03 + + data['edge'] = find_element(data) + + edge_position = estimate_edge_position(data, options, index=0) + post_edge_limit = edge_position + post_edge_limit_offset + + + post_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] > post_edge_limit] + post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit + + # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data + post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) - df_bkgd_sub,filenames,edge = pre_edge_subtraction(path, options=options) - #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge - #FIXME Use rought edge shift estimate, add X eV as first guess, have an option to adjust this value with widget - if edge == 'Mn': - edge_stop = 6.565 - if edge == 'Ni': - edge_stop = 8.361 + for i, filename in enumerate(data['path']): + if options['log']: + aux.write_log(message=f'Fitting post edge on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) - df_end= df_bkgd_sub.loc[df_bkgd_sub["ZapEnergy"] > edge_stop] # new dataframe only containing the post edge, where a regression line will be calculated in the for-loop below - df_end.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit - df_postedge = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #making a new dataframe + #Fitting linear function to the background + params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) + fit_function = np.poly1d(params) + + #making a list, y_pre,so the background will be applied to all ZapEnergy-values + background=fit_function(post_edge_fit_data["ZapEnergy"]) + + #adding a new column in df_background with the y-values of the background + post_edge_fit_data.insert(1,filename,background) + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - function_post_list=[] - for files in filenames: - d = np.polyfit(df_end["ZapEnergy"],df_end[files],1) - function_post = np.poly1d(d) - y_post=function_post(df_bkgd_sub["ZapEnergy"]) - function_post_list.append(function_post) - df_postedge.insert(1,files,y_post) #adding a new column with the y-values of the fitted post edge + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_post_edge_fit.png' - #Plotting the background subtracted signal with the post-edge regression line and the start point for the linear regression line - if options['print'] == True: - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames) #defining x and y - plt.axvline(x = min(df_end["ZapEnergy"])) - fig = plt.figure(figsize=(15,15)) - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, legend=False) #defining x and y - df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False) - plt.axvline(x = min(df_end["ZapEnergy"])) + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - return df_bkgd_sub, df_postedge, filenames, edge + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + post_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(post_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(post_edge_data['ZapEnergy']), max(post_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(post_edge_data[filename]), max(post_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() + + + return post_edge_fit_data def smoothing(path, options={}): required_options = ['print','window_length','polyorder'] From 7214746af18475257e041e54483f456424aca8dc Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 20 Jun 2022 16:08:36 +0200 Subject: [PATCH 2/9] Refactor split_scans --- nafuma/xanes/io.py | 159 ++++++++++++++++++++++++++------------------- 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 816b8f5..53ad1ca 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -2,94 +2,120 @@ import pandas as pd import matplotlib.pyplot as plt import os import numpy as np -import nafuma.auxillary as aux +import nafuma.auxillary as aux +from nafuma.xanes.calib import find_element -def split_xanes_scan(root, destination=None, replace=False): +def split_scan_data(data: dict, options={}): + + + required_options = ['save', 'save_folder', 'replace', 'add_rois'] + + default_options = { + 'save': False, + 'save_folder': '.', + 'replace': False, + 'add_rois': False + } + + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) #root is the path to the beamtime-folder #destination should be the path to the processed data #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw # FIXME Only adding this variable to pass the Linting-tests - will refactor this later - filename = 'dummy' + + if not isinstance(data['path'], list): + data['path'] = [data['path']] + + all_scans = [] - with open(filename, 'r') as f: - lines = f.readlines() + for filename in data['path']: + + with open(filename, 'r') as f: + lines = f.readlines() + + scan_datas, scan_data = [], [] + headers, header = [], '' + read_data = False - datas = [] - data = [] - headers = [] - header = '' - start = False - - for line in lines: - if line[0:2] == "#L": - start = True - header = line[2:].split() - continue - - elif line[0:2] == "#C": - start = False - - if data: - datas.append(data) - data = [] + for line in lines: + # Header line starts with #L - reads headers, and toggles data read-in on + if line[0:2] == "#L": + header, read_data = line[2:].split(), True + continue + + # First line after data started with #C - stops data read-in + elif line[0:2] == "#C": + read_data = False - if header: - headers.append(header) - header = '' + if scan_data: + scan_datas.append(scan_data); scan_data = [] + + if header: + headers.append(header); header = '' + + # Ignore line if read-in not toggled + if read_data == False: + continue + + # Read in data if it is + else: + scan_data.append(line.split()) - - if start == False: - continue - - else: - data.append(line.split()) - - - - - edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]} - edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0} - - - for ind, data in enumerate(datas): - df = pd.DataFrame(data) - df.columns = headers[ind] - - edge_start = np.round((float(df["ZapEnergy"].min())), 1) - - for edge, energies in edges.items(): - if edge_start in energies: - edge_actual = edge - edge_count[edge] += 1 - + edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} + + for i, scan_data in enumerate(scan_datas): + xanes_df = pd.DataFrame(scan_data).apply(pd.to_numeric) + xanes_df.columns = headers[i] + + if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): + continue + + + edge = find_element({'xanes_data_original': xanes_df}) + edges[edge].append(xanes_df) + - filename = filename.split('/')[-1] - count = str(edge_count[edge_actual]).zfill(4) + if options['add']: + + added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} + for edge, scans in edges.items(): + if scans: + xanes_df = scans[0] - - # Save - if destination: - cwd = os.getcwd() + for i, scan in enumerate(scans): + if i > 0: - if not os.path.isdir(destination): - os.mkdir(destination) - - os.chdir(destination) + if 'xmap_roi00' in xanes_df.columns: + xanes_df['xmap_roi00'] += scan['xmap_roi00'] + if 'xmap_roi01' in xanes_df.columns: + xanes_df['xmap_roi01'] += scan['xmap_roi01'] - df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) + added_edges[edge].append(xanes_df) - os.chdir(cwd) - - else: - df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count)) + edges = added_edges + + if options['save']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + filename = os.path.basename(filename).split('.')[0] + for edge, scans in edges.items(): + for i, scan in enumerate(scans): + count = '' if options['add'] else '_'+str(i).zfill(4) + path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') + scan.to_csv(path) + + all_scans.append(edges) + + + return all_scans @@ -117,6 +143,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: columns.append(filename) scan_data = pd.read_csv(filename) + scan_data = scan_data[[determine_active_roi(scan_data)]] xanes_data = pd.concat([xanes_data, scan_data], axis=1) From cc80a48259dfae683a46752e76001488d035d605 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Mon, 20 Jun 2022 19:16:20 +0200 Subject: [PATCH 3/9] Add logging --- nafuma/xanes/io.py | 78 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 68 insertions(+), 10 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 53ad1ca..40ea0c2 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -4,18 +4,23 @@ import os import numpy as np import nafuma.auxillary as aux from nafuma.xanes.calib import find_element +from datetime import datetime -def split_scan_data(data: dict, options={}): +def split_scan_data(data: dict, options={}) -> list: + ''' Splits a XANES-file from BM31 into different files depending on the edge. Has the option to add intensities of all scans of same edge into the same file. + As of now only picks out xmap_rois (fluoresence mode) and for Mn, Fe, Co and Ni K-edges.''' - required_options = ['save', 'save_folder', 'replace', 'add_rois'] + required_options = ['log', 'logfile', 'save', 'save_folder', 'replace', 'add_rois'] default_options = { - 'save': False, - 'save_folder': '.', - 'replace': False, - 'add_rois': False + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_split_edges.log', + 'save': False, # whether to save the files or not + 'save_folder': '.', # root folder of where to save the files + 'replace': False, # whether to replace the files if they already exist + 'add_rois': False # Whether to add the rois of individual scans of the same edge together } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -30,9 +35,15 @@ def split_scan_data(data: dict, options={}): data['path'] = [data['path']] all_scans = [] + + if options['log']: + aux.write_log(message='Starting file splitting...', options=options) for filename in data['path']: + if options['log']: + aux.write_log(message=f'Reading {filename}...', options=options) + with open(filename, 'r') as f: lines = f.readlines() @@ -44,6 +55,9 @@ def split_scan_data(data: dict, options={}): # Header line starts with #L - reads headers, and toggles data read-in on if line[0:2] == "#L": header, read_data = line[2:].split(), True + + if options['log']: + aux.write_log(message='... Found scan data. Starting read-in...', options=options) continue # First line after data started with #C - stops data read-in @@ -69,27 +83,46 @@ def split_scan_data(data: dict, options={}): for i, scan_data in enumerate(scan_datas): + xanes_df = pd.DataFrame(scan_data).apply(pd.to_numeric) xanes_df.columns = headers[i] + edge = find_element({'xanes_data_original': xanes_df}) + + if options['log']: + aux.write_log(message=f'Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) + if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): + if options['log']: + aux.write_log(message='... Did not find fluoresence data. Skipping...', options=options) + continue - edge = find_element({'xanes_data_original': xanes_df}) + edges[edge].append(xanes_df) if options['add']: + + if options['log']: + aux.write_log(message=f'Addition of rois enabled. Starting addition...', options=options) added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} for edge, scans in edges.items(): + + if options['log']: + aux.write_log(message=f'... Adding rois of the {edge}-edge...', options=options) + if scans: xanes_df = scans[0] for i, scan in enumerate(scans): if i > 0: + if options['log']: + aux.write_log(message=f'... ... Adding {i}/{len(scans)}', options=options) + if 'xmap_roi00' in xanes_df.columns: xanes_df['xmap_roi00'] += scan['xmap_roi00'] if 'xmap_roi01' in xanes_df.columns: @@ -100,7 +133,14 @@ def split_scan_data(data: dict, options={}): edges = added_edges if options['save']: + + if options['log']: + aux.write_log(message=f'Saving data to {options["save_folder"]}', options=options) + if not os.path.isdir(options['save_folder']): + if options['log']: + aux.write_log(message=f'... {options["save_folder"]} does not exist. Creating folder.', options=options) + os.makedirs(options['save_folder']) @@ -110,10 +150,26 @@ def split_scan_data(data: dict, options={}): for i, scan in enumerate(scans): count = '' if options['add'] else '_'+str(i).zfill(4) path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat') - scan.to_csv(path) + + if not os.path.isfile(path): + scan.to_csv(path) + if options['log']: + aux.write_log(message=f'... Scan saved to {path}', options=options) + + elif options['replace'] and os.path.isfile(path): + scan.to_csv(path) + if options['log']: + aux.write_log(message=f'... File already exists. Overwriting to {path}', options=options) + + elif not options['replace'] and os.path.isfile(path): + if options['log']: + aux.write_log(message=f'... File already exists. Skipping...', options=options) all_scans.append(edges) + if options['log']: + aux.write_log(message=f'All done!', options=options) + return all_scans @@ -124,9 +180,9 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # FIXME Handle the case when dataseries are not the same size - required_options = [] + required_options = ['adjust'] default_options = { - + 'adjust': 0 } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -135,6 +191,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: # Initialise DataFrame with only ZapEnergy-column xanes_data = pd.read_csv(data['path'][0])[['ZapEnergy']] + xanes_data['ZapEnergy'] += options['adjust'] if not isinstance(data['path'], list): data['path'] = [data['path']] @@ -157,6 +214,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame: + def determine_active_roi(scan_data): # FIXME For Co-edge, this gave a wrong scan From 054311ca102893b2aca555a9360e13f847dd01f2 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 18:01:53 +0200 Subject: [PATCH 4/9] Small adjustments to logging --- nafuma/xanes/io.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py index 40ea0c2..f623a38 100644 --- a/nafuma/xanes/io.py +++ b/nafuma/xanes/io.py @@ -16,7 +16,7 @@ def split_scan_data(data: dict, options={}) -> list: default_options = { 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_split_edges.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_split_edges.log', 'save': False, # whether to save the files or not 'save_folder': '.', # root folder of where to save the files 'replace': False, # whether to replace the files if they already exist @@ -24,12 +24,6 @@ def split_scan_data(data: dict, options={}) -> list: } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #root is the path to the beamtime-folder - #destination should be the path to the processed data - - #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw - - # FIXME Only adding this variable to pass the Linting-tests - will refactor this later if not isinstance(data['path'], list): data['path'] = [data['path']] @@ -89,12 +83,12 @@ def split_scan_data(data: dict, options={}) -> list: edge = find_element({'xanes_data_original': xanes_df}) if options['log']: - aux.write_log(message=f'Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) + aux.write_log(message=f'... Starting data clean-up ({edge}-edge)... ({i+1}/{len(scan_datas)})', options=options) if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]): if options['log']: - aux.write_log(message='... Did not find fluoresence data. Skipping...', options=options) + aux.write_log(message='... ... Did not find fluoresence data. Skipping...', options=options) continue @@ -106,13 +100,13 @@ def split_scan_data(data: dict, options={}) -> list: if options['add']: if options['log']: - aux.write_log(message=f'Addition of rois enabled. Starting addition...', options=options) + aux.write_log(message=f'... Addition of rois enabled. Starting addition...', options=options) added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []} for edge, scans in edges.items(): if options['log']: - aux.write_log(message=f'... Adding rois of the {edge}-edge...', options=options) + aux.write_log(message=f'... ... Adding rois of the {edge}-edge...', options=options) if scans: xanes_df = scans[0] @@ -121,7 +115,7 @@ def split_scan_data(data: dict, options={}) -> list: if i > 0: if options['log']: - aux.write_log(message=f'... ... Adding {i}/{len(scans)}', options=options) + aux.write_log(message=f'... ... ... Adding {i+1}/{len(scans)}', options=options) if 'xmap_roi00' in xanes_df.columns: xanes_df['xmap_roi00'] += scan['xmap_roi00'] @@ -135,11 +129,11 @@ def split_scan_data(data: dict, options={}) -> list: if options['save']: if options['log']: - aux.write_log(message=f'Saving data to {options["save_folder"]}', options=options) + aux.write_log(message=f'... Saving data to {options["save_folder"]}', options=options) if not os.path.isdir(options['save_folder']): if options['log']: - aux.write_log(message=f'... {options["save_folder"]} does not exist. Creating folder.', options=options) + aux.write_log(message=f'... ... {options["save_folder"]} does not exist. Creating folder.', options=options) os.makedirs(options['save_folder']) @@ -154,16 +148,16 @@ def split_scan_data(data: dict, options={}) -> list: if not os.path.isfile(path): scan.to_csv(path) if options['log']: - aux.write_log(message=f'... Scan saved to {path}', options=options) + aux.write_log(message=f'... ... Scan saved to {path}', options=options) elif options['replace'] and os.path.isfile(path): scan.to_csv(path) if options['log']: - aux.write_log(message=f'... File already exists. Overwriting to {path}', options=options) + aux.write_log(message=f'... ... File already exists. Overwriting to {path}', options=options) elif not options['replace'] and os.path.isfile(path): if options['log']: - aux.write_log(message=f'... File already exists. Skipping...', options=options) + aux.write_log(message=f'... ... File already exists. Skipping...', options=options) all_scans.append(edges) From 1cf949e36bbd1c9ef8e377535c0782f7507ccefa Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 18:02:08 +0200 Subject: [PATCH 5/9] Start clean-up of smoothing --- nafuma/xanes/calib.py | 97 ++++++++++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 38 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 10afda9..4692446 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -126,7 +126,7 @@ def pre_edge_subtraction(data: dict, options={}): required_options = ['log', 'logfile', 'save_plots', 'save_folder'] default_options = { 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S.log")}_pre_edge_subtraction.log', + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_pre_edge_subtraction.log', 'save_plots': False, 'save_folder': './' } @@ -162,10 +162,10 @@ def pre_edge_subtraction(data: dict, options={}): def estimate_edge_position(data: dict, options={}, index=0): #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['print','periods'] + required_options = ['log','logfile', 'periods'] default_options = { - - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) @@ -189,25 +189,29 @@ def estimate_edge_position(data: dict, options={}, index=0): def post_edge_fit(data: dict, options={}): #FIXME should be called "fitting post edge" (normalization is not done here, need edge shift position) - required_options = ['post_edge_start', 'print'] + required_options = ['log', 'logfile', 'post_edge_interval'] default_options = { - 'post_edge_start': None, - 'print': False + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_post_edge_fit.log', + 'post_edge_interval': [None, None], } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #FIXME Allow min and max limits - if not options['post_edge_start']: + if not options['post_edge_interval'][0]: post_edge_limit_offset = 0.03 data['edge'] = find_element(data) edge_position = estimate_edge_position(data, options, index=0) - post_edge_limit = edge_position + post_edge_limit_offset + options['post_edge_interval'][0] = edge_position + post_edge_limit_offset - post_edge_data = data['xanes_data_original'].loc[data['xanes_data_original']["ZapEnergy"] > post_edge_limit] + if not options['post_edge_interval'][1]: + options['post_edge_interval'][1] = data['xanes_data_original']['ZapEnergy'].max() + + + post_edge_data = data['xanes_data_original'].loc[(data['xanes_data_original']["ZapEnergy"] > options['post_edge_interval'][0]) & (data['xanes_data_original']["ZapEnergy"] < options['post_edge_interval'][1])] post_edge_data.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data @@ -253,40 +257,31 @@ def post_edge_fit(data: dict, options={}): return post_edge_fit_data -def smoothing(path, options={}): - required_options = ['print','window_length','polyorder'] +def smoothing(data: dict, options={}): + + # FIXME Add logging + # FIXME Add saving of files + + required_options = ['log', 'logfile', 'window_length','polyorder'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', + 'save_plots': False, + 'save_folder': './', 'window_length': 3, 'polyorder': 2 } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_bkgd_sub, df_postedge, filenames, edge = post_edge_fit(path,options=options) - #================= SMOOTHING - df_smooth = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) - df_default = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) - #df_smooth[filenames] = df_bkgd_sub.iloc[:,2].rolling(window=rolling_av).mean() - #df_smooth[filenames] = df_smooth[filenames].shift(-int((rolling_av)/2)) - for filename in filenames: - x_smooth=savgol_filter(df_bkgd_sub[filename], options['window_length'],options['polyorder']) - df_smooth[filename] = x_smooth - x_default=savgol_filter(df_bkgd_sub[filename],default_options['window_length'],default_options['polyorder']) - df_default[filename] = x_default - - + + # FIXME Add other types of filters + for filename in data['path']: + xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) + default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + #printing the smoothed curves vs data - if options['print'] == True: - - ## ================================================ - #df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) - #df_diff_estimated_max = df_diff[filenames].dropna().max() - - - #estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] - # ========================================== - + if options['save_folder'] == True: fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) x_range_zoom=[6.54,6.55] #make into widget @@ -303,8 +298,34 @@ def smoothing(path, options={}): ax2.set_xlim(x_range_zoom) ax2.set_ylim(y_range_zoom) ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + + + # FIXME Clear up these two plotting functions + + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) + + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' + + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) + + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) + ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') + ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) + ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) + ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() - return df_smooth, filenames + return xanes_smooth, default_smooth From 9e39135f0022903502fd81d6ba879da6e4bd8de0 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Tue, 21 Jun 2022 19:04:04 +0200 Subject: [PATCH 6/9] Update smoothing function --- nafuma/xanes/calib.py | 89 +++++++++++++++++++++++-------------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 4692446..d81ac0f 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -262,70 +262,75 @@ def smoothing(data: dict, options={}): # FIXME Add logging # FIXME Add saving of files - required_options = ['log', 'logfile', 'window_length','polyorder'] + required_options = ['log', 'logfile', 'window_length','polyorder', 'save_default'] default_options = { 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_smoothing.log', 'save_plots': False, 'save_folder': './', 'window_length': 3, - 'polyorder': 2 + 'polyorder': 2, + 'save_default': False } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + if options['save_default']: + data['xanes_data_smooth_default'] = data['xanes_data']['ZapEnergy'] # FIXME Add other types of filters + # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should + # then be returned for filename in data['path']: xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) - default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + if options['save_default']: + default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) + data['xanes_data'][filename] = xanes_smooth - #printing the smoothed curves vs data - if options['save_folder'] == True: - - fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) - x_range_zoom=[6.54,6.55] #make into widget - y_range_zoom=[20000,80000] #make into widget - - df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax1, color="Red") - df_smooth.plot(x = "ZapEnergy",y=filenames, ax=ax1, color="Blue") - ax1.set_xlim(x_range_zoom) - ax1.set_ylim(y_range_zoom) - ax1.set_title("Smoothed curve (blue) vs data (red) used for further analysis") - - df_bkgd_sub.plot.scatter(x = "ZapEnergy",y=filenames, ax=ax2, color="Red") - df_default.plot(x = "ZapEnergy",y=filenames, ax=ax2, color="Green") - ax2.set_xlim(x_range_zoom) - ax2.set_ylim(y_range_zoom) - ax2.set_title("Smoothed curve (green) vs data (red) using default window_length and polyorder") + if options['save_default']: + data['xanes_data_smooth_default'][filename] = default_smooth - # FIXME Clear up these two plotting functions + if options['save_plots']: + if not os.path.isdir(options['save_folder']): + os.makedirs(options['save_folder']) - if options['save_plots']: - if not os.path.isdir(options['save_folder']): - os.makedirs(options['save_folder']) + dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' - dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_pre_edge_fit.png' - - fig, (ax1, ax2) = plt.subplots(1,2,figsize=(10,5)) - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') - ax1.set_title(f'{os.path.basename(filename)} - Full view', size=20) - - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) - pre_edge_fit_data.plot(x='ZapEnergy', y=filename, color='red', ax=ax2) - ax2.axvline(x = max(pre_edge_data['ZapEnergy']), ls='--') - ax2.set_xlim([min(pre_edge_data['ZapEnergy']), max(pre_edge_data['ZapEnergy'])]) - ax2.set_ylim([min(pre_edge_data[filename]), max(pre_edge_data[filename])]) - ax2.set_title(f'{os.path.basename(filename)} - Fit region', size=20) + edge_pos = estimate_edge_position(data=data, options=options) + intensity_midpoint = data['xanes_data'][filename].max() - data['xanes_data'][filename].min() - plt.savefig(dst, transparent=False) - plt.close() + if options['save_default']: + fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + + ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + + data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) + data['xanes_data_smooth_default'].plot(x='ZapEnergy', y=filename, color='green', ax=ax2) + ax2.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax2.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) + + elif not options['save_default']: + fig, ax = plt.subplots(figsize=(10,5)) + data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) + xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) + ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) + ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + + ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + + + plt.savefig(dst, transparent=False) + plt.close() - return xanes_smooth, default_smooth + # FIXME See comment above about return values + return None From 4d501adb729b4d48f0a3f5417e418a33462e2547 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Wed, 22 Jun 2022 15:56:34 +0200 Subject: [PATCH 7/9] Complete smooth and get determine_edge_position going --- nafuma/xanes/calib.py | 351 ++++++++++++++++++++++-------------------- 1 file changed, 187 insertions(+), 164 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index d81ac0f..dfe5b07 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -1,3 +1,5 @@ +from logging import raiseExceptions +from jinja2 import TemplateRuntimeError import pandas as pd import numpy as np import os @@ -160,31 +162,7 @@ def pre_edge_subtraction(data: dict, options={}): return xanes_data_bkgd_subtracted -def estimate_edge_position(data: dict, options={}, index=0): - #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. - required_options = ['log','logfile', 'periods'] - default_options = { - 'log': False, - 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', - 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly - } - options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #making new dataframe to keep the differentiated data - df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) - df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) - - #shifting column values up so that average differential fits right between the points used in the calculation - df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) - df_diff_max = df_diff[data['path'][index]].dropna().max() - estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] - - # FIXME Add logging option to see the result - - if options['log']: - aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) - - return estimated_edge_shift def post_edge_fit(data: dict, options={}): @@ -274,22 +252,20 @@ def smoothing(data: dict, options={}): } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + df_smooth = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['save_default']: - data['xanes_data_smooth_default'] = data['xanes_data']['ZapEnergy'] + df_smooth_default = pd.DataFrame(data['xanes_data']['ZapEnergy']) # FIXME Add other types of filters # FIXME Instead of assigning values directly to the data dictionary, these should be made into an own DataFrame that you can decide later what to do with - these variables should # then be returned for filename in data['path']: - xanes_smooth = savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder']) - if options['save_default']: - default_smooth = savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder']) - - data['xanes_data'][filename] = xanes_smooth + df_smooth.insert(1, filename, savgol_filter(data['xanes_data'][filename], options['window_length'], options['polyorder'])) if options['save_default']: - data['xanes_data_smooth_default'][filename] = default_smooth - + df_smooth_default.insert(1, filename, savgol_filter(data['xanes_data'][filename], default_options['window_length'], default_options['polyorder'])) + if options['save_plots']: if not os.path.isdir(options['save_folder']): @@ -298,39 +274,35 @@ def smoothing(data: dict, options={}): dst = os.path.join(options['save_folder'], os.path.basename(filename)) + '_smooth.png' edge_pos = estimate_edge_position(data=data, options=options) - intensity_midpoint = data['xanes_data'][filename].max() - data['xanes_data'][filename].min() - - + intensity_midpoint = df_smooth[filename].iloc[np.where(df_smooth['ZapEnergy'] == find_nearest(df_smooth['ZapEnergy'], edge_pos))].values[0] + if options['save_default']: fig, (ax1, ax2) = plt.subplots(1,2,figsize=(20,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) - + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax1, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax1) ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) - data['xanes_data_original'].plot(x='ZapEnergy', y=filename, color='black', ax=ax2) - data['xanes_data_smooth_default'].plot(x='ZapEnergy', y=filename, color='green', ax=ax2) - ax2.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax2.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax2, kind='scatter') + df_smooth_default.loc[(df_smooth_default['ZapEnergy'] > edge_pos-0.0015) & (df_smooth_default['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax2) ax2.set_title(f'{os.path.basename(filename)} - Smooth (default values)', size=20) elif not options['save_default']: fig, ax = plt.subplots(figsize=(10,5)) - data['xanes_data'].plot(x='ZapEnergy', y=filename, color='black', ax=ax1) - xanes_smooth.plot(x='ZapEnergy', y=filename, color='red', ax=ax1) - ax1.set_xlim([edge_pos-0.5, edge_pos+0.5]) - ax1.set_ylim([intensity_midpoint*0.98, intensity_midpoint*1.02]) + data['xanes_data'].loc[(data['xanes_data']['ZapEnergy'] > edge_pos-0.0015) & (data['xanes_data']['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='black', ax=ax, kind='scatter') + df_smooth.loc[(df_smooth['ZapEnergy'] > edge_pos-0.0015) & (df_smooth['ZapEnergy'] < edge_pos+0.0015)].plot(x='ZapEnergy', y=filename, color='red', ax=ax) + ax.set_xlim([edge_pos-0.0015, edge_pos+0.0015]) + ax.set_ylim([intensity_midpoint*0.9, intensity_midpoint*1.1]) - ax1.set_title(f'{os.path.basename(filename)} - Smooth', size=20) + ax.set_title(f'{os.path.basename(filename)} - Smooth', size=20) plt.savefig(dst, transparent=False) plt.close() - # FIXME See comment above about return values - return None + if not options['save_default']: + df_smooth_default = None + + return df_smooth, df_smooth_default @@ -340,133 +312,184 @@ def find_nearest(array, value): idx = (np.abs(array - value)).argmin() return array[idx] -def finding_e0(path, options={}): - required_options = ['print','periods'] + +def estimate_edge_position(data: dict, options={}, index=0): + #a dataset is differentiated to find a first estimate of the edge shift to use as starting point. + required_options = ['log','logfile', 'periods'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_edge_position_estimation.log', 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_smooth, filenames = smoothing(path, options=options) #This way the smoothing is printed as long as the "finding e0" is printed. - - if options['periods'] % 2 == 1: - print("NB!!!!!!!!!!!!!!!!! Periods needs to be an even number for the shifting of values to work properly") - ###df_diff = pd.DataFrame(df_smooth["ZapEnergy"]) # - if len(filenames) == 1: - filenames=filenames[0] - else: - print("MORE THAN ONE FILE --> generalize") - - ##### - estimated_edge_shift, df_diff, df_diff_max = estimate_edge_position(df_smooth, filenames,options=options) - print(estimated_edge_shift) - #### - ###df_diff[filenames]=df_smooth[filenames].diff(periods=options['periods']) # - df_doublediff=pd.DataFrame(df_smooth["ZapEnergy"]) - df_doublediff[filenames]=df_diff[filenames].diff(periods=options['periods']) - - if options['print'] == True: - fig, (ax1,ax2) = plt.subplots(1,2,figsize=(15,5)) - - df_diff.plot(x = "ZapEnergy",y=filenames, ax=ax1) #defining x and y - df_doublediff.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y + #making new dataframe to keep the differentiated data + df_diff = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + df_diff[data['path'][index]]=data['xanes_data_original'][data['path'][index]].diff(periods=options['periods']) #shifting column values up so that average differential fits right between the points used in the calculation - #df_diff[filenames]=df_diff[filenames].shift(-int(options['periods']/2)) # - df_doublediff[filenames]=df_doublediff[filenames].shift(-int(options['periods'])) + df_diff[data['path'][index]]=df_diff[data['path'][index]].shift(-int(options['periods']/2)) + df_diff_max = df_diff[data['path'][index]].dropna().max() + estimated_edge_shift =df_diff.loc[df_diff[data['path'][index]] == df_diff_max,'ZapEnergy'].values[0] + + # FIXME Add logging option to see the result + + if options['log']: + aux.write_log(message=f'Estimated edge shift for determination of pre-edge area is: {estimated_edge_shift} keV', options=options) + + return estimated_edge_shift + +def determine_edge_position(data: dict, options={}): - #finding maximum value to maneuver to the correct part of the data set - #df_diff_max = df_diff[filenames].dropna().max() - + required_options = ['log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', + 'save_plots': False, + 'save_folder': './', + 'periods': 2, #Periods needs to be an even number for the shifting of values to work properly, + 'diff': True, + 'double_diff': False, + 'fit_region': 0.0005 - estimated_edge_shift=df_diff.loc[df_diff[filenames] == df_diff_max,'ZapEnergy'].values[0] + } - fit_region = 0.0004 - df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] - df_diff_edge=df_diff_edge.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-fit_region)] - - - - - df_doublediff_edge=df_doublediff.loc[(df_doublediff["ZapEnergy"] < estimated_edge_shift+fit_region)]# and (df_diff["ZapEnergy"] > estimated_edge_shift-0.05)] - df_doublediff_edge=df_doublediff_edge.loc[(df_doublediff["ZapEnergy"] > estimated_edge_shift-fit_region)] - #df_diff_edge=df_diff.loc[(df_diff["ZapEnergy"] > estimated_edge_shift-0.15) and (df_diff["ZapEnergy"] < estimated_edge_shift+0.15)] + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #df_diff_edge=df_diff.loc[df_diff["ZapEnergy"] > estimated_edge_shift-0.15] - #print(df_diff_edge) - if options['print'] == True: - fig, (ax3,ax4) = plt.subplots(1,2,figsize=(15,5)) + if options['periods'] % 2 == 1: + raise Exception("NB! Periods needs to be an even number for the shifting of values to work properly") - df_diff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax3) #defining x and y - ax3.set_title("Zoomed into edge region (derivative))") - ax3.axvline(x = estimated_edge_shift) - - df_doublediff_edge.plot(x = "ZapEnergy",y=filenames,ax=ax4,kind="scatter") #defining x and y - ax4.set_title("Zoomed into edge region (double derivative)") - ax4.axvline(x = estimated_edge_shift) - ax4.axhline(0) - - - - #ax1.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) - #ax1.set_title("not sure what this is tbh") - - #ax2.set_xlim([estimated_edge_shift-fit_region,estimated_edge_shift+fit_region]) - #ax2.set_title("not sure what this is either tbh") - - #============== - #df_smooth=df_smooth2 - #================= - - - - - #========================== fitting first differential ========== - df_diff = df_diff[df_diff[filenames].notna()] - - #fitting a function to the chosen interval - d = np.polyfit(df_diff_edge["ZapEnergy"],df_diff_edge[filenames],2) - function_diff = np.poly1d(d) - - x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=1000) - y_diff=function_diff(x_diff) - #print(df_diff_edge["ZapEnergy"].iloc[-1]) - if options['print'] == True: - ax3.plot(x_diff,y_diff,color='Green') - - #y_diff_max=np.amax(y_diff,0) - y_diff_max_index = np.where(y_diff == np.amax(y_diff)) - #print(y_diff_max_index[0]) - edge_shift_diff=float(x_diff[y_diff_max_index]) - print("Edge shift estimated by the differential maximum is "+str(round(edge_shift_diff,5))) - if options['print'] == True: - ax3.axvline(x=edge_shift_diff,color="green") - #print(df_doublediff_edge["ZapEnergy"].iloc[0]) - #ax4.plot(x_doublediff,y_doublediff,color='Green')) - - - #fitting double differentiate - df_doublediff = df_doublediff[df_doublediff[filenames].notna()] - d = np.polyfit(df_doublediff_edge["ZapEnergy"],df_doublediff_edge[filenames],2) - function_doublediff = np.poly1d(d) - - x_doublediff=np.linspace(df_doublediff_edge["ZapEnergy"].iloc[0],df_doublediff_edge["ZapEnergy"].iloc[-1],num=10000) - y_doublediff=function_doublediff(x_doublediff) - - if options['print'] == True: - ax4.plot(x_doublediff,y_doublediff,color='Green') - - y_doublediff_zero=find_nearest(y_doublediff,0) - y_doublediff_zero_index = np.where(y_doublediff == y_doublediff_zero) - - edge_shift_doublediff=float(x_doublediff[y_doublediff_zero_index]) - print("Edge shift estimated by the double differential zero-point is "+str(round(edge_shift_doublediff,5))) - if options['print'] == True: - ax4.axvline(x=edge_shift_doublediff,color="green") + ##### - return df_smooth, filenames, edge_shift_diff + if options['diff']: + df_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['double_diff']: + df_double_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + + for i, filename in enumerate(data['path']): + estimated_edge_pos = estimate_edge_position(data, options=options, index=i) + + + #========================== fitting first differential ========== + + if options['diff']: + df_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']) + df_diff[filename]=df_diff[filename].shift(-int(options['periods']/2)) + + df_diff_edge = df_diff.loc[(df_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + + + # Fitting a function to the chosen interval + params = np.polyfit(df_diff_edge["ZapEnergy"], df_diff_edge[filename], 2) + diff_function = np.poly1d(params) + + x_diff=np.linspace(df_diff_edge["ZapEnergy"].iloc[0],df_diff_edge["ZapEnergy"].iloc[-1],num=10000) + y_diff=diff_function(x_diff) + + df_diff_fit_function = pd.DataFrame(x_diff) + df_diff_fit_function['y_diff'] = y_diff + df_diff_fit_function.columns = ['x_diff', 'y_diff'] + + # Picks out the x-value where the y-value is at a maximum + edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] + + if options['log']: + aux.write_log(message=f"Edge shift estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + + + if options['double_diff']: + df_double_diff[filename] = data['xanes_data'][filename].diff(periods=options['periods']).diff(periods=options['periods']) + df_double_diff[filename]=df_double_diff[filename].shift(-int(options['periods'])) + + # Pick out region of interest + df_double_diff_edge = df_double_diff.loc[(df_double_diff["ZapEnergy"] < estimated_edge_pos+options['fit_region']) & ((df_double_diff["ZapEnergy"] > estimated_edge_pos-options['fit_region']))] + + # Fitting a function to the chosen interval + params = np.polyfit(df_double_diff_edge["ZapEnergy"], df_double_diff_edge[filename], 2) + double_diff_function = np.poly1d(params) + + x_double_diff=np.linspace(df_double_diff_edge["ZapEnergy"].iloc[0], df_double_diff_edge["ZapEnergy"].iloc[-1],num=10000) + y_double_diff=double_diff_function(x_double_diff) + + df_double_diff_fit_function = pd.DataFrame(x_double_diff) + df_double_diff_fit_function['y_diff'] = y_double_diff + df_double_diff_fit_function.columns = ['x_diff', 'y_diff'] + + + # Picks out the x-value where the y-value is closest to 0 + edge_pos_double_diff=x_double_diff[np.where(y_double_diff == find_nearest(y_double_diff,0))][0] + + if options['log']: + aux.write_log(message=f"Edge shift estimated by the double differential zero-point is {str(round(edge_pos_double_diff,5))}", options=options) + + if options['save_plots']: + + if options['diff'] and options['double_diff']: + + fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(ncols=2, nrows=2, figsize=(20,20)) + df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax1) + ax1.set_xlim([edge_pos_diff-0.0015, edge_pos_diff+0.0015]) + ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + ax1.set_title('Fit region of differentiated data') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2, kind='scatter') + df_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax2) + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + ax2.set_title('Fit of differentiated data') + + + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax3, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax3) + ax3.set_xlim([edge_pos_double_diff-0.0015, edge_pos_double_diff+0.0015]) + ax3.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') + ax3.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax3.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax4, kind='scatter') + df_double_diff_fit_function.plot(x='x_diff', y='y_diff', ax=ax4) + ax4.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax4.axvline(x=estimated_edge_pos, ls='--', c='red') + + + + + elif options['diff']: + fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) + df_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + ax1.set_xlim([edge_pos_diff-0.5, edge_pos_diff+0.5]) + ax1.axvline(x=edge_pos_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_diff+options['fit_region'], ls='--', c='black') + + df_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + ax2.axvline(x=edge_pos_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + + + elif options['double_diff']: + fig, (ax1, ax2) = plt.subplots(ncols=2,nrows=1, figsize=(20, 10)) + df_double_diff.plot(x='ZapEnergy', y=filename, ax=ax1, kind='scatter') + ax1.set_xlim([edge_pos_double_diff-0.5, edge_pos_double_diff+0.5]) + ax1.axvline(x=edge_pos_double_diff-options['fit_region'], ls='--', c='black') + ax1.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax1.axvline(x=edge_pos_double_diff+options['fit_region'], ls='--', c='black') + + df_double_diff_edge.plot(x='ZapEnergy', y=filename, ax=ax2) + ax2.axvline(x=edge_pos_double_diff, ls='--', c='green') + ax2.axvline(x=estimated_edge_pos, ls='--', c='red') + + + if not options['diff']: + edge_pos_diff = None + if not options['double_diff']: + edge_pos_double_diff = None + + return edge_pos_diff, edge_pos_double_diff def normalization(data,options={}): required_options = ['print'] From ec1fba1c829a478d4e3ac3364617660dd9c37009 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 23 Jun 2022 11:46:06 +0200 Subject: [PATCH 8/9] Refactor normalisation and flattening functions --- nafuma/xanes/calib.py | 45 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index dfe5b07..5c111f5 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -491,25 +491,52 @@ def determine_edge_position(data: dict, options={}): return edge_pos_diff, edge_pos_double_diff -def normalization(data,options={}): - required_options = ['print'] +def normalise(data: dict, options={}): + required_options = ['log', 'logfile', 'save_values'] default_options = { - 'print': False, + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_normalisation.log', + 'save_values': True } options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - #Finding the normalization constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 - normalization_constant=post_edge_fit_function(e0) - pre_edge_fit_function(e0) - - #subtracting background (as in pre_edge_subtraction) + normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) - #dividing the background-subtracted data with the normalization constant + #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 + for filename in data['path']: + normalisation_constant = data['post_edge_fit_function'][filename].loc[data['post_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] - data['pre_edge_fit_function'].loc[data['pre_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] + + normalised_df.insert(1, filename, data['xanes_data'] / normalisation_constant) + + if options['save_values']: + data['xanes_data'] = normalised_df + + + return normalised_df -def flattening(data,options={}): +def flatten(data:dict, options={}): #only picking out zapenergy-values higher than edge position (edge pos and below remains untouched) + + required_options = ['log', 'logfile', 'save_values'] + default_options = { + 'log': False, + 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_flattening.log', + 'save_values': True + } + options = aux.update_options(options=options, required_options=required_options, default_options=default_options) + + df_e0_and_above=df.loc[df['ZapEnergy'] > edge_shift_diff] + flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) + + for filename in data['path']: + above_e0 = data['xanes_data'][filename].loc(data['xanes_data']['ZapEnergy'] > data['e0'][filename]) + flattened_data = data['post_edge_fit_function'][filename] - + + + flattened_data = post_edge_fit_function(df_e0_and_above['ZapEnergy']) - pre_edge_fit_function(df_e0_and_above['ZapEnergy']) #make a new dataframe with flattened values From 2b14a64c4bbab1590e6028a0f75ed5d699372ff6 Mon Sep 17 00:00:00 2001 From: rasmusvt Date: Thu, 23 Jun 2022 15:32:29 +0200 Subject: [PATCH 9/9] Attempt to get flattening and normalisation to behave properly --- nafuma/xanes/calib.py | 45 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 9 deletions(-) diff --git a/nafuma/xanes/calib.py b/nafuma/xanes/calib.py index 5c111f5..2b12c78 100644 --- a/nafuma/xanes/calib.py +++ b/nafuma/xanes/calib.py @@ -78,6 +78,8 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data pre_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + data['pre_edge_params'] = {} + for i, filename in enumerate(data['path']): if options['log']: aux.write_log(message=f'Fitting background on {os.path.basename(filename)} ({i+1} / {len(data["path"])})', options=options) @@ -85,6 +87,8 @@ def pre_edge_fit(data: dict, options={}) -> pd.DataFrame: #Fitting linear function to the background params = np.polyfit(pre_edge_data["ZapEnergy"],pre_edge_data[filename],1) fit_function = np.poly1d(params) + + data['pre_edge_params'][filename] = params #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(pre_edge_fit_data["ZapEnergy"]) @@ -194,6 +198,8 @@ def post_edge_fit(data: dict, options={}): # Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data post_edge_fit_data = pd.DataFrame(data['xanes_data_original']["ZapEnergy"]) + + data['post_edge_params'] = {} for i, filename in enumerate(data['path']): if options['log']: @@ -202,6 +208,8 @@ def post_edge_fit(data: dict, options={}): #Fitting linear function to the background params = np.polyfit(post_edge_data["ZapEnergy"], post_edge_data[filename], 2) fit_function = np.poly1d(params) + + data['post_edge_params'][filename] = params #making a list, y_pre,so the background will be applied to all ZapEnergy-values background=fit_function(post_edge_fit_data["ZapEnergy"]) @@ -341,8 +349,9 @@ def estimate_edge_position(data: dict, options={}, index=0): def determine_edge_position(data: dict, options={}): - required_options = ['log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] + required_options = ['save_values', 'log', 'logfile', 'save_plots', 'save_folder', 'periods', 'diff', 'double_diff', 'fit_region'] default_options = { + 'save_values': True, 'log': False, 'logfile': f'{datetime.now().strftime("%Y-%m-%d-%H-%M-%S")}_determine_edge_position.log', 'save_plots': False, @@ -366,6 +375,9 @@ def determine_edge_position(data: dict, options={}): df_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) if options['double_diff']: df_double_diff = pd.DataFrame(data['xanes_data']['ZapEnergy']) + if options['save_values']: + data['e0'] = {} + for i, filename in enumerate(data['path']): estimated_edge_pos = estimate_edge_position(data, options=options, index=i) @@ -395,7 +407,10 @@ def determine_edge_position(data: dict, options={}): edge_pos_diff=x_diff[np.where(y_diff == np.amax(y_diff))][0] if options['log']: - aux.write_log(message=f"Edge shift estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + aux.write_log(message=f"Edge position estimated by the differential maximum is: {str(round(edge_pos_diff,5))}", options=options) + + if options['save_values']: + data['e0'][filename] = edge_pos_diff if options['double_diff']: @@ -501,12 +516,21 @@ def normalise(data: dict, options={}): options = aux.update_options(options=options, required_options=required_options, default_options=default_options) normalised_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) + data['normalisation_constants'] = {} #Finding the normalisation constant µ_0(E_0), by subtracting the value of the pre-edge-line from the value of the post-edge line at e0 for filename in data['path']: - normalisation_constant = data['post_edge_fit_function'][filename].loc[data['post_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] - data['pre_edge_fit_function'].loc[data['pre_edge_fit_function']['ZapEnergy'] == data['e0'][filename]] + e0_ind = data['post_edge_fit_data'].loc[data['post_edge_fit_data']['ZapEnergy'] == find_nearest(data['post_edge_fit_data']['ZapEnergy'], data['e0'][filename])].index.values[0] + #norm = data['post_edge_fit_data'][filename].iloc[find_nearest(data['post_edge_fit_data'][filename], data['e0'][filename])] + normalisation_constant = data['post_edge_fit_data'][filename].iloc[e0_ind] - data['pre_edge_fit_data'][filename].iloc[e0_ind] + normalised_df.insert(1, filename, data['xanes_data'][filename] / normalisation_constant) - normalised_df.insert(1, filename, data['xanes_data'] / normalisation_constant) + + # Normalise the pre-edge and post-edge fit function data + data['pre_edge_fit_data'][filename] = data['pre_edge_fit_data'][filename] / normalisation_constant + data['post_edge_fit_data'][filename] = data['post_edge_fit_data'][filename] / normalisation_constant + + data['normalisation_constants'][filename] = normalisation_constant if options['save_values']: data['xanes_data'] = normalised_df @@ -527,17 +551,20 @@ def flatten(data:dict, options={}): options = aux.update_options(options=options, required_options=required_options, default_options=default_options) - df_e0_and_above=df.loc[df['ZapEnergy'] > edge_shift_diff] - flattened_df = pd.DataFrame(data['xanes_data']['ZapEnergy']) for filename in data['path']: - above_e0 = data['xanes_data'][filename].loc(data['xanes_data']['ZapEnergy'] > data['e0'][filename]) - flattened_data = data['post_edge_fit_function'][filename] - + fit_function_diff = -data['post_edge_fit_data'][filename] + data['pre_edge_params'][filename][0] + fit_function_diff.loc[flattened_df['ZapEnergy'] <= data['e0'][filename]] = 0 + + flattened_df[filename] = data['xanes_data'][filename] - fit_function_diff + if options['save_values']: + data['xanes_data'] = flattened_df + - flattened_data = post_edge_fit_function(df_e0_and_above['ZapEnergy']) - pre_edge_fit_function(df_e0_and_above['ZapEnergy']) + return flattened_df, fit_function_diff #make a new dataframe with flattened values