Merge pull request #1 from rasmusthog/halvor_xanes

Halvor xanes
2022-04-08 13:36:30 +02:00 · 2022-04-08 13:36:30 +02:00 · 092ecfa380
commit 092ecfa380
parent de2616067d b242602eba
3 changed files with 281 additions and 71 deletions
--- a/nafuma/xanes/calib.py
+++ b/nafuma/xanes/calib.py
@ -1,7 +1,10 @@
 import pandas as pd
 import numpy as np
 import os
-
+import matplotlib.pyplot as plt
+import beamtime.auxillary as aux
+import beamtime.xanes as xas
+import beamtime.xanes.io as io
 def rbkerbest():
    print("ROSENBORG!<3")

@ -12,83 +15,143 @@ def rbkerbest():

 ##Better to make a new function that loops through the files, and performing the split_xanes_scan on

+#Tryiung to make a function that can decide which edge it is based on the first ZapEnergy-value
+def finding_edge(df):
+    if 5.9 < df["ZapEnergy"][0] < 6.5:
+        edge='Mn'
+        return(edge)
+    if 8.0 < df["ZapEnergy"][0] < 8.6:
+        edge='Ni'
+        return(edge)

+<<<<<<< HEAD:beamtime/xanes/calib.py
+#def pre_edge_subtraction(df,filenames, options={}):
+def test(innmat):
+    df_test= xas.io.put_in_dataframe(innmat)
+    print(df_test)
+
+def pre_edge_subtraction(path, options={}):
+    required_options = ['print','troubleshoot']
+    default_options = {
+        'print': False,
+        'troubleshoot': False
+    }
+    options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
+
+    filenames = xas.io.get_filenames(path)
+    df= xas.io.put_in_dataframe(path)
+    edge=finding_edge(df)
+=======
 def split_xanes_scan(filename, destination=None, replace=False):
    #root is the path to the beamtime-folder
    #destination should be the path to the processed data
+>>>>>>> master:nafuma/xanes/calib.py
    
-    #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw
+    #Defining the end of the region used to define the background, thus start of the edge
+    #implement widget
+    if edge == 'Mn':
+        edge_start = 6.42
+    if edge == 'Ni':
+        edge_start = 8.3
+
+    #making a dataframe only containing the rows that are included in the background subtraction (points lower than where the edge start is defined)
+    df_start=df.loc[df["ZapEnergy"] < edge_start]
+        
+    #Making a new dataframe, with only the ZapEnergies as the first column -> will be filled to include the background data
+    df_bkgd = pd.DataFrame(df["ZapEnergy"])
+
+    for files in filenames:
+
+    #Fitting linear function to the background
+        d = np.polyfit(df_start["ZapEnergy"],df_start[files],1)
+        function_bkgd = np.poly1d(d)
+        
+    #making a list, y_pre,so the background will be applied to all ZapEnergy-values
+        y_bkgd=function_bkgd(df["ZapEnergy"])
+        
+    #adding a new column in df_background with the y-values of the background
+        df_bkgd.insert(1,files,y_bkgd) 
    
-    with open(filename, 'r') as f:
-        lines = f.readlines()
        
-    datas = []
-    data = []
-    headers = []
-    header = ''
-    start = False
+        if options['troubleshoot'] == True:
+        ###     FOR FIGURING OUT WHERE IT GOES WRONG/WHICH FILE IS CORRUPT
+            ax = df.plot(x = "ZapEnergy",y=files)  
+    #Plotting the calculated pre-edge background with the region used for the regression   
+    if options['print'] == True:
+    #Plotting an example of the edge_start region and the fitted background that will later be subtracted
+        fig, (ax1,ax2,ax3) = plt.subplots(1,3,figsize=(15,5))
+        df.plot(x="ZapEnergy", y=filenames,color="Black",ax=ax1)
+        df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax1)
+        plt.axvline(x = max(df_start["ZapEnergy"])) 
+        #fig = plt.figure(figsize=(15,15))
+        df_bkgd.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax2)
+        ax1.set_title('Data and fitted background')
+        #Zooming into bacground region to confirm fit and limits looks reasonable
+        df.plot(x = "ZapEnergy",y=filenames,ax=ax2) #defining x and y)
+        ax2.set_xlim([min(df_start["ZapEnergy"]),max(df_start["ZapEnergy"])+0.01])
+        #finding maximum and minimum values in the backgrounds
+        min_values=[]
+        max_values=[]
+        for file in filenames:
+            min_values.append(min(df_start[file]))
+            max_values.append(max(df_start[file]))
+        ax2.set_ylim([min(min_values),max(max_values)])
+        plt.axvline(x = max(df_start["ZapEnergy"]))
+        #ax2.set_xlim([25, 50])
+    ###################### Subtracting the pre edge from xmap_roi00   ################
+
+    #making a new dataframe to insert the background subtracted intensities
+    df_bkgd_sub = pd.DataFrame(df["ZapEnergy"])
+    #inserting the background subtracted original xmap_roi00 data
+
+    for files in filenames:
+        newintensity_calc=df[files]-df_bkgd[files]
+        df_bkgd_sub.insert(1,files,newintensity_calc) 
+
+    if options['print'] == True:
+        df.plot(x = "ZapEnergy",y=filenames, color="Black", ax=ax3, legend=False)
+        #plt.axvline(x = max(df_start["ZapEnergy"])) 
+        df_bkgd_sub.plot(x="ZapEnergy", y=filenames,color="Red",ax=ax3, legend=False)
+        ax3.set_title('Data and background-subtracted data')
+
+    return df_bkgd_sub,filenames,edge
+
+def post_edge_normalization(path, options={}):
+
+    required_options = ['print']
+    default_options = {
+        'print': False
+    }
+    options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
    
-    for line in lines:
-        if line[0:2] == "#L":
-            start = True
-            header = line[2:].split()
-            continue
-            
-        elif line[0:2] == "#C":
-            start = False
-            
-            if data:
-                datas.append(data)
-                data = []
-                
-            if header:
-                headers.append(header)
-                header = ''
-                
-                
+    df_bkgd_sub,filenames,edge = pre_edge_subtraction(path)
+    #Defining the end of the pre-edge-region for Mn/Ni, thus start of the edge
+    #Implement widget
+    if edge == 'Mn':
+        edge_stop = 6.565
+    if edge == 'Ni':
+        edge_stop = 8.361

-        if start == False:
-            continue
-            
-        else:
-            data.append(line.split())
-            
-            
-            
-            
-    edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]}
-    edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0}
-    
+    df_end= df_bkgd_sub.loc[df_bkgd_sub["ZapEnergy"] > edge_stop] # new dataframe only containing the post edge, where a regression line will be calculated in the for-loop below
+    df_end.dropna(inplace=True) #Removing all indexes without any value, as some of the data sets misses the few last data points and fucks up the fit
+    df_postedge = pd.DataFrame(df_bkgd_sub["ZapEnergy"]) #making a new dataframe 

-    for ind, data in enumerate(datas):
-        df = pd.DataFrame(data)
-        df.columns = headers[ind]
+    function_post_list=[]
+    for files in filenames: 
+        d = np.polyfit(df_end["ZapEnergy"],df_end[files],1)
+        function_post = np.poly1d(d)
+        y_post=function_post(df_bkgd_sub["ZapEnergy"]) 
+        function_post_list.append(function_post)
+        df_postedge.insert(1,files,y_post) #adding a new column with the y-values of the fitted post edge

-        edge_start = np.round((float(df["ZapEnergy"].min())), 1)
+    #Plotting the background subtracted signal with the post-edge regression line and the start point for the linear regression line
+    if options['print'] == True:
+        ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames) #defining x and y
+        plt.axvline(x = min(df_end["ZapEnergy"])) 
+        fig = plt.figure(figsize=(15,15))
+        df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False)  
+        ax = df_bkgd_sub.plot(x = "ZapEnergy",y=filenames, legend=False) #defining x and y
+        df_postedge.plot(x="ZapEnergy", y=filenames,color="Green",ax=ax, legend=False)  
+        plt.axvline(x = min(df_end["ZapEnergy"])) 

-        for edge, energies in edges.items():
-            if edge_start in energies:
-                edge_actual = edge
-                edge_count[edge] += 1
-
-        
-        
-        filename = filename.split('/')[-1]
-        count = str(edge_count[edge_actual]).zfill(4)
-
-        
-        # Save 
-        if destination:
-            cwd = os.getcwd()
-
-            if not os.path.isdir(destination):
-                os.mkdir(destination)
-                
-            os.chdir(destination)
-
-            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
-
-            os.chdir(cwd)
-
-        else:
-            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
+    return df_bkgd_sub, df_postedge
--- a/nafuma/xanes/io.py
+++ b/nafuma/xanes/io.py
@ -1,2 +1,148 @@
-#hello
-#yeah
+import pandas as pd
+import matplotlib.pyplot as plt
+import os
+import numpy as np
+
+def split_xanes_scan(root, destination=None, replace=False):
+    #root is the path to the beamtime-folder
+    #destination should be the path to the processed data
+    
+    #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw
+    
+    with open(filename, 'r') as f:
+        lines = f.readlines()
+        
+    datas = []
+    data = []
+    headers = []
+    header = ''
+    start = False
+    
+    for line in lines:
+        if line[0:2] == "#L":
+            start = True
+            header = line[2:].split()
+            continue
+            
+        elif line[0:2] == "#C":
+            start = False
+            
+            if data:
+                datas.append(data)
+                data = []
+                
+            if header:
+                headers.append(header)
+                header = ''
+                
+                
+
+        if start == False:
+            continue
+            
+        else:
+            data.append(line.split())
+            
+            
+            
+            
+    edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]}
+    edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0}
+    
+
+    for ind, data in enumerate(datas):
+        df = pd.DataFrame(data)
+        df.columns = headers[ind]
+
+        edge_start = np.round((float(df["ZapEnergy"].min())), 1)
+
+        for edge, energies in edges.items():
+            if edge_start in energies:
+                edge_actual = edge
+                edge_count[edge] += 1
+
+        
+        
+        filename = filename.split('/')[-1]
+        count = str(edge_count[edge_actual]).zfill(4)
+
+        
+        # Save 
+        if destination:
+            cwd = os.getcwd()
+
+            if not os.path.isdir(destination):
+                os.mkdir(destination)
+                
+            os.chdir(destination)
+
+            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
+
+            os.chdir(cwd)
+
+        else:
+            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
+
+
+#Function that "collects" all the files in a folder, only accepting .dat-files from xanes-measurements
+def get_filenames(path):
+    
+    
+    cwd = os.getcwd()
+    
+    # Change into path provided
+    os.chdir(path)
+    
+    filenames = [os.path.join(path, filename) for filename in os.listdir() if os.path.isfile(filename) and filename[-4:] == '.dat'] #changed
+    
+    
+    
+    # Change directory back to where you ran the script from
+    os.chdir(cwd)
+    
+    return filenames
+
+def put_in_dataframe(path):
+    filenames = get_filenames(path) 
+
+    #making the column names to be used in the dataframe, making sure the first column is the ZapEnergy
+    column_names = ["ZapEnergy"]
+
+    for i in range(len(filenames)):
+        column_names.append(filenames[i])
+
+    #Taking the first file in the folder and extracting ZapEnergies and intensity from that (only need the intensity from the rest)
+    first = pd.read_csv(filenames[0], skiprows=0)
+
+    #Making a data frame with the correct columns, and will fill inn data afterwards
+    df = pd.DataFrame(columns = column_names)
+    #First putting in the 2theta-values
+    df["ZapEnergy"]=first["ZapEnergy"]
+
+    #filling in the intensities from all files into the corresponding column in the dataframe
+    for i in range(len(filenames)):
+        df2 = pd.read_csv(filenames[i])
+        df2 = df2.drop(['Mon','Det1','Det2','Det3','Det4','Det5', 'Det6','Ion1'], axis=1) #, axis=1)
+        df2 = df2.drop(['MonEx','Ion2','Htime','MusstEnc1','MusstEnc3','MusstEnc4', 'TwoTheta', 'ZCryo'], axis=1)
+        df2 = df2.drop(['ZBlower1', 'ZBlower2', 'ZSrcur'], axis=1)#, axis=19) #removing the sigma at this point
+        
+    ##############     THIS PART PICKS OUT WHICH ROI IS OF INTEREST, BUT MUST BE FIXED IF LOOKING AT THREE EDGES (roi00,roi01,roi02)    #####################
+        if 'xmap_roi01' in df2.columns: 
+            #Trying to pick the roi with the highest difference between maximum and minimum intensity --> biggest edge shift
+            if max(df2["xmap_roi00"])-min(df2["xmap_roi00"])>max(df2["xmap_roi01"])-min(df2["xmap_roi01"]):
+                df[filenames[i]]=df2["xmap_roi00"] #forMn
+            else: 
+                df[filenames[i]]=df2["xmap_roi01"] #forNi
+        else:
+            df[filenames[i]]=df2["xmap_roi00"]
+    ###############################################################################################
+
+        i=i+1
+
+
+    #print(df)
+    #If I want to make a csv-file of the raw data. Decided that was not necessary:
+    #df.to_csv('static-Mn-edge.csv') #writing it to a csv, first row is datapoint (index), second column is 2theta, and from there the scans starts
+
+
+    return df
--- a/test.txt
+++ b/test.txt
@ -0,0 +1 @@
+hei på dej