From 7214746af18475257e041e54483f456424aca8dc Mon Sep 17 00:00:00 2001
From: rasmusvt <r.v.thogersen@kjemi.uio.no>
Date: Mon, 20 Jun 2022 16:08:36 +0200
Subject: [PATCH] Refactor split_scans

---
 nafuma/xanes/io.py | 159 ++++++++++++++++++++++++++-------------------
 1 file changed, 93 insertions(+), 66 deletions(-)

diff --git a/nafuma/xanes/io.py b/nafuma/xanes/io.py
index 816b8f5..53ad1ca 100644
--- a/nafuma/xanes/io.py
+++ b/nafuma/xanes/io.py
@@ -2,94 +2,120 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import os
 import numpy as np
-import nafuma.auxillary as aux 
+import nafuma.auxillary as aux
+from nafuma.xanes.calib import find_element
 
 
-def split_xanes_scan(root, destination=None, replace=False):
+def split_scan_data(data: dict, options={}):
+    
+
+    required_options = ['save', 'save_folder', 'replace', 'add_rois']
+
+    default_options = {
+        'save': False,
+        'save_folder': '.',
+        'replace': False,
+        'add_rois': False
+    }
+
+    options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
     #root is the path to the beamtime-folder
     #destination should be the path to the processed data
     
     #insert a for-loop to go through all the folders.dat-files in the folder root\xanes\raw
 
     # FIXME Only adding this variable to pass the Linting-tests - will refactor this later
-    filename = 'dummy'
+
+    if not isinstance(data['path'], list):
+        data['path'] = [data['path']]
+
+    all_scans = []
     
-    with open(filename, 'r') as f:
-        lines = f.readlines()
+    for filename in data['path']:
+
+        with open(filename, 'r') as f:
+            lines = f.readlines()
+            
+        scan_datas, scan_data = [], []
+        headers, header = [], ''
+        read_data = False
         
-    datas = []
-    data = []
-    headers = []
-    header = ''
-    start = False
-    
-    for line in lines:
-        if line[0:2] == "#L":
-            start = True
-            header = line[2:].split()
-            continue
-            
-        elif line[0:2] == "#C":
-            start = False
-            
-            if data:
-                datas.append(data)
-                data = []
+        for line in lines:
+            # Header line starts with #L - reads headers, and toggles data read-in on
+            if line[0:2] == "#L":
+                header, read_data = line[2:].split(), True
+                continue
+
+            # First line after data started with #C - stops data read-in
+            elif line[0:2] == "#C":
+                read_data = False
                 
-            if header:
-                headers.append(header)
-                header = ''
+                if scan_data:
+                    scan_datas.append(scan_data); scan_data = []
+                    
+                if header:
+                    headers.append(header); header = ''
+                    
+            # Ignore line if read-in not toggled       
+            if read_data == False:
+                continue
+            
+            # Read in data if it is
+            else:
+                scan_data.append(line.split())
                 
                 
-
-        if start == False:
-            continue
-            
-        else:
-            data.append(line.split())
-            
-            
-            
-            
-    edges = {'Mn': [6.0, 6.1, 6.2, 6.3, 6.4, 6.5], 'Fe': [6.8, 6.9, 7.0, 7.1, 7.2], 'Co': [7.6, 7.7, 7.8, 7.9], 'Ni': [8.1, 8.2, 8.3, 8.4, 8.5]}
-    edge_count = {'Mn': 0, 'Fe': 0, 'Co': 0, 'Ni': 0}
-    
-
-    for ind, data in enumerate(datas):
-        df = pd.DataFrame(data)
-        df.columns = headers[ind]
-
-        edge_start = np.round((float(df["ZapEnergy"].min())), 1)
-
-        for edge, energies in edges.items():
-            if edge_start in energies:
-                edge_actual = edge
-                edge_count[edge] += 1
-
+        edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []}
         
+
+        for i, scan_data in enumerate(scan_datas):
+            xanes_df = pd.DataFrame(scan_data).apply(pd.to_numeric)
+            xanes_df.columns = headers[i]
+
+            if not ('xmap_roi00' in headers[i]) and (not 'xmap_roi01' in headers[i]):
+                continue 
+
+            
+            edge = find_element({'xanes_data_original': xanes_df})
+            edges[edge].append(xanes_df)
+            
         
-        filename = filename.split('/')[-1]
-        count = str(edge_count[edge_actual]).zfill(4)
+        if options['add']:
+   
+            added_edges = {'Mn': [], 'Fe': [], 'Co': [], 'Ni': []}
+            for edge, scans in edges.items():
+                if scans:
+                    xanes_df = scans[0]
 
-        
-        # Save 
-        if destination:
-            cwd = os.getcwd()
+                    for i, scan in enumerate(scans):
+                        if i > 0:
 
-            if not os.path.isdir(destination):
-                os.mkdir(destination)
-                
-            os.chdir(destination)
+                            if 'xmap_roi00' in xanes_df.columns:
+                                xanes_df['xmap_roi00'] += scan['xmap_roi00']
+                            if 'xmap_roi01' in xanes_df.columns:
+                                xanes_df['xmap_roi01'] += scan['xmap_roi01']
 
-            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
+                    added_edges[edge].append(xanes_df)
 
-            os.chdir(cwd)
-
-        else:
-            df.to_csv('{}_{}_{}.dat'.format(filename.split('.')[0], edge_actual, count))
+            edges = added_edges
+            
+        if options['save']:
+            if not os.path.isdir(options['save_folder']):
+                os.makedirs(options['save_folder'])
 
 
+            filename = os.path.basename(filename).split('.')[0]
 
+            for edge, scans in edges.items():
+                for i, scan in enumerate(scans):
+                    count = '' if options['add'] else '_'+str(i).zfill(4)
+                    path = os.path.join(options['save_folder'], f'{filename}_{edge}{count}.dat')
+                    scan.to_csv(path)
+
+        all_scans.append(edges)
+
+
+    return all_scans
 
 
 
@@ -117,6 +143,7 @@ def read_data(data: dict, options={}) -> pd.DataFrame:
         columns.append(filename)
 
         scan_data = pd.read_csv(filename)
+
         scan_data = scan_data[[determine_active_roi(scan_data)]]
         xanes_data = pd.concat([xanes_data, scan_data], axis=1)