Rewrite read_pdos

2022-10-09 18:38:25 +02:00 · 2022-10-09 18:38:25 +02:00 · 0e7a566f01
commit 0e7a566f01
parent f72bd4e77f
1 changed files with 276 additions and 17 deletions
--- a/nafuma/dft/io.py
+++ b/nafuma/dft/io.py
@ -1,6 +1,7 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
+from scipy.signal import savgol_filter

 import warnings

@ -42,15 +43,18 @@ def get_atoms(poscar):
        atoms = lines[5].split()
        atom_num = lines[6].split()

-
        atom_num = [int(num) for num in atom_num]

-    atoms_dict = {}

-    for ind, atom in enumerate(atoms):
-        atoms_dict[atom] = atom_num[ind]
+    atoms_list = make_atoms_list(atoms, atom_num)

-    return atoms, atom_num, atoms_dict
+    atoms_info = {
+        'specie': atoms, # list of specie in sequential order
+        'number': atom_num, # list of number of per specie in sequential order
+        'list': atoms_list # list of every individual atom in sequential order
+    }
+
+    return atoms_info
    
    
 def get_fermi_level(doscar='DOSCAR', headerlines=6):
@ -276,6 +280,269 @@ def read_dos(path, flip_down=True):


 def read_pdos(data: dict, options={}):
+    ''' data-dictionary should be structured like this:
+    data["path"] - dictionary with path to POSCAR/CONTCAR mapped to key 'poscar' and path to DOSCAR in key 'doscar'. '''
+
+    default_options = {
+        'sum_atoms': False,
+        'sum_orbitals': False,
+        'adjust': False,        # Manually adjust the energy scale if the automatic Fermi-level detection didn't work.
+        'normalise': False,
+    }
+
+    options = aux.update_options(options=options, default_options=default_options)
+
+    # Grab some metadata
+    data['atoms'] = get_atoms(data['path']['poscar'])
+    data['info'] = get_doscar_information(data['path']['doscar'])
+
+    
+    # Read the data
+    with open(data['path']['doscar'], 'r') as f:
+        doscar_raw = f.readlines()
+
+    # Read basis functions - this will only yield a non-empty list if DOSCAR is generated by LOBSTER. It will only contain columns for the used basis set, so to be able to make proper column headers, this information is needed.
+    data['basis_functions'] = get_basis_functions(doscar_raw)
+
+    # If DOSCAR is from VASP and not LOBSTER, the basis functions will not be listed in DOSCAR (but all columns will be present)
+    if not data['basis_functions']:
+        data['basis_functions'] = [None for _ in data['atoms']['list']]
+
+    
+    data['pdos'] = []
+    for i, (atom, basis_functions) in enumerate(zip(data['atoms']['list'], data['basis_functions'])):
+
+        # Define line to start read-in: (1+i) to skip total DOS in the start, plus every previous PDOS, including the headerline. Then the initial 6 header lines in the start of the file.
+        start = int( (1 + i) * ( data['info']['NEDOS'] + 1 ) + 6 )
+
+        pdos_atom = []
+        for j in range(0,data['info']['NEDOS']):
+            pdos_atom.append(doscar_raw[start+j].split())
+
+        # Convert the data to a DataFrame and convert to float
+        pdos_atom = pd.DataFrame(pdos_atom)
+        pdos_atom = pdos_atom.astype(float)
+
+
+        # If DOSCAR was written by VASP, set the standard columns to the basis_functions variable
+        # FIXME This does not allow for f-orbitals. Could do a check vs. shape of pdos_atom to determine
+        if not basis_functions:
+            basis_functions = ['s', 'p1', 'p2', 'p3', '2p1', '2p2', '2p3', 'd1', 'd2', 'd3', 'd4', 'd5']
+
+    
+        # Split the basis functions into up and down spin channels
+        if data['info']['spin_polarised']:
+            columns = ['Energy']
+            for function in basis_functions:
+                columns.append(function+'_up')
+                columns.append(function+'_down')
+
+        else:
+            columns = ['Energy'] + basis_functions
+        
+        # Set the new column names
+        pdos_atom.columns = columns
+
+        if options['adjust']:
+            pdos_atom['Energy'] -= options['adjust']
+
+        if options['smooth']:
+            pdos_atom = smoothing(pdos=pdos_atom, options=options)
+
+        if options['normalise']:
+            orbitals = [orbital for orbital in columns if 'Energy' not in orbital]
+            for k, specie in enumerate(data['atoms']['specie']):
+                if specie == atom:
+                    index = k
+
+            for orbital in orbitals:
+                pdos_atom[orbital] = pdos_atom[orbital] / data['atoms']['number'][index]
+
+        # Make total columns
+        if data['info']['spin_polarised']:
+            up_functions = [orbital for orbital in columns if '_up' in orbital]
+            down_functions = [orbital for orbital in columns if '_down' in orbital]
+
+            pdos_atom['total_up'] = pdos_atom[up_functions].sum(axis=1)
+            pdos_atom['total_down'] = pdos_atom[down_functions].sum(axis=1)
+            pdos_atom['total'] = pdos_atom[['total_up', 'total_down']].sum(axis=1)
+
+            # Flip all the values for spin down
+            down_functions.append('total_down')
+            for orbital in down_functions:
+                pdos_atom[orbital] = (-1)*pdos_atom[orbital]
+
+        data['pdos'].append(pdos_atom)
+
+    if options['sum_atoms']:
+        data['pdos'] = sum_atoms(data, options)
+    if options['sum_orbitals']:
+        data['pdos'] = sum_orbitals(data, options)
+
+    return data
+
+
+def sum_atoms(data: dict, options={}):
+    ''' Needs to have read data through read_pdos() '''
+
+    default_options = {
+
+    }
+
+    options = aux.update_options(options=options, default_options=default_options)
+
+
+    total_atoms = 0
+
+    # Sort all the DataFrames into a dictionary
+    sorted_dfs = {}
+    for i, (specie, number) in enumerate(zip(data['atoms']['specie'], data['atoms']['number'])):
+        
+        sorted_dfs[specie] = []
+
+        for j in range(number):
+            index = j if i == 0 else j + total_atoms
+            
+            sorted_dfs[specie].append(data['pdos'][index])
+
+        total_atoms += number
+
+    # Sum all the DataFrames for each specie
+    summed_dfs = {}
+    for i, (specie, dfs) in enumerate(sorted_dfs.items()):
+        for j, df in enumerate(dfs):
+            if j > 0:
+                dfs[0] += df
+
+        dfs[0]['Energy'] = dfs[0]['Energy'] / data['atoms']['number'][i]
+        summed_dfs[specie] = dfs[0]
+
+    return summed_dfs
+
+
+def sum_orbitals(data: dict, options={}):
+    
+    shells = [
+        '1s', '2s', '3s', '4s', '5s', '6s',
+        '2p', '3p', '4p', '5p', '6p',
+        '3d', '4d', '5d', '6d',
+        '4f', '5f', '6f'
+    ]
+
+    summed_dfs = []
+
+    # If sum_atoms() is already called
+    if isinstance(data['pdos'], dict):
+        summed_dfs = {}
+
+        for specie, specie_pdos in data['pdos'].items():
+            orbitals = [orbital for orbital in specie_pdos.columns if 'Energy' not in orbital]
+            new_pdos = pd.DataFrame(specie_pdos['Energy'])
+
+            for shell in shells:
+                if data['info']['spin_polarised']:
+                    sub_columns_up = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_up')]
+                    sub_columns_down = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_down')]
+                    
+                    new_pdos[shell+'_up'] = data['pdos'][specie][sub_columns_up].sum(axis=1)
+                    new_pdos[shell+'_down'] = data['pdos'][specie][sub_columns_down].sum(axis=1)
+                
+                else:
+                    sub_columns = [orbital for orbital in orbitals if orbital.startswith(shell)]
+                    
+                    new_pdos[shell] = data['pdos'][specie][sub_columns].sum(axis=1)
+            new_pdos = new_pdos.loc[:, (new_pdos != 0).any(axis=0)]
+            summed_dfs[specie] = new_pdos
+                
+
+    else:
+        for pdos in data['pdos']:
+            orbitals = [orbital for orbital in pdos.columns if 'Energy' not in orbital]
+            new_pdos = pd.DataFrame(pdos['Energy'])
+            for shell in shells:
+                if data['info']['spin_polarised']:
+                    sub_columns_up = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_up')]
+                    sub_columns_down = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_down')]
+                    new_pdos[shell+'_up'] = pdos[sub_columns_up].sum(axis=1)
+                    new_pdos[shell+'_down'] = pdos[sub_columns_down].sum(axis=1)
+                else:
+                    sub_columns = [orbital for orbital in orbitals if orbital.startswith(shell)]
+                    new_pdos[shell] = pdos[sub_columns].sum(axis=1)
+            new_pdos = new_pdos.loc[:, (new_pdos != 0).any(axis=0)]
+            summed_dfs.append(new_pdos)
+    
+    
+    return summed_dfs
+
+
+def smoothing(pdos: pd.DataFrame, options={}):
+    ' Smoothes the data using the Savitzky-Golay filter. This is the only algorithm at this moment.  '
+
+    default_options = {
+        'smooth_window_length': 3, # Determines the window length of smoothing that the savgol-filter uses for smoothing
+        'smooth_polyorder': 2, # Determines the order of the polynomial used in the smoothing algorithm
+        'smooth_algorithm': 'savgol', # At the present, only Savitzky-Golay filter is implemented. Add Gaussian and Boxcar later.
+    }
+
+    options = aux.update_options(options=options, default_options=default_options)
+
+    # Initialise new DataFrame with correct x-values
+    pdos_smooth = pd.DataFrame(pdos['Energy'])
+
+    orbitals = [orbital for orbital in pdos.columns if 'Energy' not in orbital]
+
+    if options['smooth_algorithm'] == 'savgol':
+        
+        for orbital in orbitals:
+            pdos_smooth.insert(1, orbital, savgol_filter(pdos[orbital], options['smooth_window_length'], options['smooth_polyorder']))
+
+    return pdos_smooth
+
+
+
+def get_basis_functions(doscar):
+
+    basis_functions = []
+    for line in doscar:
+        if 'Z=' in line:
+            basis_functions.append(line.split(';')[-1].split())
+
+
+    return basis_functions
+
+
+
+def get_doscar_information(path):
+    ''' Reads information from the DOSCAR'''
+
+    kind = 'LOBSTER' if 'LOBSTER' in linecache.getline(path, 5) else 'VASP'
+    dos_info_raw = linecache.getline(path, 6).split()
+    spin_polarised = True if len(linecache.getline(path, 7).split()) == 5 else False
+
+
+    dos_info = {
+        'ENMIN': float(dos_info_raw[0]), 
+        'ENMAX': float(dos_info_raw[1]), 
+        'NEDOS': int(dos_info_raw[2]), 
+        'EFERMI': float(dos_info_raw[3]), 
+        'spin_polarised': spin_polarised, 
+        'kind': kind
+    }
+
+    return dos_info
+
+
+def make_atoms_list(atoms, number_of_atoms):
+
+    atoms_list = []
+    for atom, num in zip(atoms, number_of_atoms):
+        atoms_list = atoms_list + [atom for _ in range(num)]
+
+    return atoms_list
+        
+
+
+def read_pdos_legacy(data: dict, options={}):

    required_options = ['flip_down', 'sum_atoms', 'sum_orbitals', 'collapse_spin', 'adjust', 'manual_adjust', 'normalise', 'normalise_unit_cell', 'normalisation_factor']

@ -319,10 +586,12 @@ def read_pdos(data: dict, options={}):
        # Define line to start reading
        start = int((1 + ind) * dos_info["NEDOS"] + 6 + (ind * 1))
        
-
-
        add_d_orbitals = False
        add_p_orbitals = False
+
+
+        
+        
        # Check if d-orbitals are included (if DOSCAR comes from LOBSTER), otherwise they will have to be added to make the DataFrames the same size    
        if dos_info['kind'] == 'LOBSTER':
            # Extract basis sets from each atom
@ -532,17 +801,7 @@ def read_pdos(data: dict, options={}):
    return pdos_full, dos_info


-def get_doscar_information(path):
-    ''' Reads information from the DOSCAR'''

-    kind = 'LOBSTER' if 'LOBSTER' in linecache.getline(path, 5) else 'VASP'
-    dos_info_raw = linecache.getline(path, 6).split()
-    spin_polarised = True if len(linecache.getline(path, 7).split()) == 5 else False
-
-
-    dos_info = {'ENMIN': float(dos_info_raw[0]), 'ENMAX': float(dos_info_raw[1]), 'NEDOS': int(dos_info_raw[2]), 'EFERMI': float(dos_info_raw[3]), 'spin_polarised': spin_polarised, 'kind': kind}
-
-    return dos_info


 #def get_bader_charges(poscar='POSCAR', acf='ACF.dat'):