Merge pull request #1 from rasmusthog/halvor_xanes

Halvor xanes
2022-04-08 13:36:30 +02:00 · 2022-04-08 13:36:30 +02:00 · 092ecfa380
commit 092ecfa380
parent de2616067d b242602eba
3 changed files with 281 additions and 71 deletions
--- a/nafuma/xanes/calib.py
+++ b/nafuma/xanes/calib.py
@ -1,7 +1,10 @@
 import pandas as pd
 import numpy as np
 import os
-
+import matplotlib.pyplot as plt
 import beamtime.auxillary as aux
 import beamtime.xanes as xas
 import beamtime.xanes.io as io
 def rbkerbest():
    print("ROSENBORG!<3")
@ -12,83 +15,143 @@ def rbkerbest():
 ##Better to make a new function that loops through the files, and performing the split_xanes_scan on
 #Tryiung to make a function that can decide which edge it is based on the first ZapEnergy-value
 def finding_edge(df):
    if 5.9 < df["ZapEnergy"][0] < 6.5:
        edge='Mn'
        return(edge)
    if 8.0 < df["ZapEnergy"][0] < 8.6:
        edge='Ni'
        return(edge)
 <<<<<<< HEAD:beamtime/xanes/calib.py
 #def pre_edge_subtraction(df,filenames, options={}):
 def test(innmat):
    df_test= xas.io.put_in_dataframe(innmat)
    print(df_test)
 def pre_edge_subtraction(path, options={}):
    required_options = ['print','troubleshoot']
    default_options = {
        'print': False,
        'troubleshoot': False
    }
    options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
    filenames = xas.io.get_filenames(path)
    df= xas.io.put_in_dataframe(path)
    edge=finding_edge(df)
 =======
 def split_xanes_scan(filename, destination=None, replace=False):
    #root is the path to the beamtime-folder
    #destination should be the path to the processed data
 >>>>>>> master:nafuma/xanes/calib.py
-    #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw
+    #Defining the end of the region used to define the background, thus start of the edge
    #implement widget
    if edge == 'Mn':
        edge_start = 6.42
    if edge == 'Ni':
        edge_start = 8.3
-    with open(filename, 'r') as f:
+    #making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined)
-        lines = f.readlines()
+    df_start=df.loc[df["ZapEnergy"] < edge_start]
-    datas = []
+    #Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data
-    data = []
+    df_bkgd = pd.DataFrame(df["ZapEnergy"])
    headers = []
    header = ''
    start = False
-    for line in lines:
+    for files in filenames:
        if line[0:2] == "#L":
            start = True
            header = line[2:].split()
            continue
-        elif line[0:2] == "#C":
+    #Fitting linear function to the background
-            start = False
+        d = np.polyfit(df_start["ZapEnergy"],df_start[files],1)
        function_bkgd = np.poly1d(d)
-            if data:
+    #making a list, y_pre,so the background will be applied to all ZapEnergy-values
-                datas.append(data)
+        y_bkgd=function_bkgd(df["ZapEnergy"])
                data = []
-            if header:
+    #adding a new column in df_background with the y-values of the background
-                headers.append(header)
+        df_bkgd.insert(1,files,y_bkgd) 
                header = ''
        if options['troubleshoot'] == True:
        ###     FOR FIGURING OUT WHERE IT GOES WRONG/WHICH FILE IS CORRUPT
            ax = df.plot(x = "ZapEnergy",y=files)  
    #Plotting the calculated pre-edge background with the region used for the regression   
    if options['print'] == True:
    #Plotting an example of the edge_start region and the fitted background that will later be subtracted
        fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,5))
        df.plot(x="ZapEnergy", y=filenames,color="Black",ax=ax1)
        df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax1)
        plt.axvline(x = max(df_start["ZapEnergy"])) 
        #fig = plt.figure(figsize=(15,15))
        df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax2)
        ax1.set_title('Data and fitted background')
        #Zooming into bacground region to confirm fit and limits looks reasonable
        df.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y)
        ax2.set_xlim([min(df_start["ZapEnergy"]),max(df_start["ZapEnergy"])+0.01])
        #finding maximum and minimum values in the backgrounds
        min_values=[]
        max_values=[]
        for file in filenames:
            min_values.append(min(df_start[file]))
            max_values.append(max(df_start[file]))
        ax2.set_ylim([min(min_values),max(max_values)])
        plt.axvline(x = max(df_start["ZapEnergy"]))
        #ax2.set_xlim([25, 50])
    ###################### Subtracting the pre edge from xmap_roi00   ################
-        if start == False:
+    #making a new dataframe to insert the background subtracted intensities
-            continue
+    df_bkgd_sub = pd.DataFrame(df["ZapEnergy"])
    #inserting the background subtracted original xmap_roi00 data
-        else:
+    for files in filenames:
-            data.append(line.split())
+        newintensity_calc=df[files]-df_bkgd[files]
        df_bkgd_sub.insert(1,files,newintensity_calc) 
    if options['print'] == True:
        df.plot(x = "ZapEnergy",y=filenames, color="Black", ax=ax3, legend=False)
        #plt.axvline(x = max(df_start["ZapEnergy"])) 
        df_bkgd_sub.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax3, legend=False)
        ax3.set_title('Data and background-subtracted data')
    return df_bkgd_sub,filenames,edge
 def post_edge_normalization(path, options={}):
-    edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]}
+    required_options = ['print']
-    edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0}
+    default_options = {
        'print': False
    }
    options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
    df_bkgd_sub,filenames,edge = pre_edge_subtraction(path)
    #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge
    #Implement widget
    if edge == 'Mn':
        edge_stop = 6.565
    if edge == 'Ni':
        edge_stop = 8.361
-    for ind, data in enumerate(datas):
+    df_end= df_bkgd_sub.loc[df_bkgd_sub["ZapEnergy"] > edge_stop] # new dataframe only containing the post edge, where a regression line will be calculated in the for-loop below
-        df = pd.DataFrame(data)
+    df_end.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit
-        df.columns = headers[ind]
+    df_postedge = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #making a new dataframe 
-        edge_start = np.round((float(df["ZapEnergy"].min())), 1)
+    function_post_list=[]
    for files in filenames: 
        d = np.polyfit(df_end["ZapEnergy"],df_end[files],1)
        function_post = np.poly1d(d)
        y_post=function_post(df_bkgd_sub["ZapEnergy"]) 
        function_post_list.append(function_post)
        df_postedge.insert(1,files,y_post) #adding a new column with the y-values of the fitted post edge
-        for edge, energies in edges.items():
+    #Plotting the background subtracted signal with the post-edge regression line and the start point for the linear regression line
-            if edge_start in energies:
+    if options['print'] == True:
-                edge_actual = edge
+        ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames) #defining x and y
-                edge_count[edge] += 1
+        plt.axvline(x = min(df_end["ZapEnergy"])) 
        fig = plt.figure(figsize=(15,15))
        df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False)  
        ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, legend=False) #defining x and y
        df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False)  
        plt.axvline(x = min(df_end["ZapEnergy"])) 
-        
+    return df_bkgd_sub, df_postedge
        filename = filename.split('/')[-1]
        count = str(edge_count[edge_actual]).zfill(4)
        # Save 
        if destination:
            cwd = os.getcwd()
            if not os.path.isdir(destination):
                os.mkdir(destination)
            os.chdir(destination)
            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
            os.chdir(cwd)
        else:
            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
--- a/nafuma/xanes/io.py
+++ b/nafuma/xanes/io.py
@ -1,2 +1,148 @@
-#hello
+import pandas as pd
-#yeah
+import matplotlib.pyplot as plt
 import os
 import numpy as np
 def split_xanes_scan(root, destination=None, replace=False):
    #root is the path to the beamtime-folder
    #destination should be the path to the processed data
    #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw
    with open(filename, 'r') as f:
        lines = f.readlines()
    datas = []
    data = []
    headers = []
    header = ''
    start = False
    for line in lines:
        if line[0:2] == "#L":
            start = True
            header = line[2:].split()
            continue
        elif line[0:2] == "#C":
            start = False
            if data:
                datas.append(data)
                data = []
            if header:
                headers.append(header)
                header = ''
        if start == False:
            continue
        else:
            data.append(line.split())
    edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]}
    edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0}
    for ind, data in enumerate(datas):
        df = pd.DataFrame(data)
        df.columns = headers[ind]
        edge_start = np.round((float(df["ZapEnergy"].min())), 1)
        for edge, energies in edges.items():
            if edge_start in energies:
                edge_actual = edge
                edge_count[edge] += 1
        filename = filename.split('/')[-1]
        count = str(edge_count[edge_actual]).zfill(4)
        # Save 
        if destination:
            cwd = os.getcwd()
            if not os.path.isdir(destination):
                os.mkdir(destination)
            os.chdir(destination)
            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
            os.chdir(cwd)
        else:
            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
 #Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements
 def get_filenames(path):
    cwd = os.getcwd()
    # Change into path provided
    os.chdir(path)
    filenames = [os.path.join(path, filename) for filename in os.listdir() if os.path.isfile(filename) and filename[-4:] == '.dat'] #changed
    # Change directory back to where you ran the script from
    os.chdir(cwd)
    return filenames
 def put_in_dataframe(path):
    filenames = get_filenames(path) 
    #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy
    column_names = ["ZapEnergy"]
    for i in range(len(filenames)):
        column_names.append(filenames[i])
    #Taking the first file in the folder and extracting ZapEnergies and intensity from that (only need the intensity from the rest)
    first = pd.read_csv(filenames[0], skiprows=0)
    #Making a data frame with the correct columns, and will fill inn data afterwards
    df = pd.DataFrame(columns = column_names)
    #First putting in the 2theta-values
    df["ZapEnergy"]=first["ZapEnergy"]
    #filling in the intensities from all files into the corresponding column in the dataframe
    for i in range(len(filenames)):
        df2 = pd.read_csv(filenames[i])
        df2 = df2.drop(['Mon','Det1','Det2','Det3','Det4','Det5', 'Det6','Ion1'], axis=1) #, axis=1)
        df2 = df2.drop(['MonEx','Ion2','Htime','MusstEnc1','MusstEnc3','MusstEnc4', 'TwoTheta', 'ZCryo'], axis=1)
        df2 = df2.drop(['ZBlower1', 'ZBlower2', 'ZSrcur'], axis=1)#, axis=19) #removing the sigma at this point
    ##############     THIS PART PICKS OUT WHICH ROI IS OF INTEREST, BUT MUST BE FIXED IF LOOKING AT THREE EDGES (roi00,roi01,roi02)    #####################
        if 'xmap_roi01' in df2.columns: 
            #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift
            if max(df2["xmap_roi00"])-min(df2["xmap_roi00"])>max(df2["xmap_roi01"])-min(df2["xmap_roi01"]):
                df[filenames[i]]=df2["xmap_roi00"] #forMn
            else: 
                df[filenames[i]]=df2["xmap_roi01"] #forNi
        else:
            df[filenames[i]]=df2["xmap_roi00"]
    ###############################################################################################
        i=i+1
    #print(df)
    #If I want to make a csv-file of the raw data. Decided that was not necessary:
    #df.to_csv('static-Mn-edge.csv') #writing it to a csv, first row is datapoint (index), second column is 2theta, and from there the scans starts
    return df
--- a/test.txt
+++ b/test.txt
@ -0,0 +1 @@
 hei på dej