Rewrite read_pdos

2022-10-09 18:38:25 +02:00 · 2022-10-09 18:38:25 +02:00 · 0e7a566f01
commit 0e7a566f01
parent f72bd4e77f
1 changed files with 276 additions and 17 deletions
--- a/nafuma/dft/io.py
+++ b/nafuma/dft/io.py
@ -1,6 +1,7 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.signal import savgol_filter
 import warnings
@ -42,15 +43,18 @@ def get_atoms(poscar):
        atoms = lines[5].split()
        atom_num = lines[6].split()
        atom_num = [int(num) for num in atom_num]
    atoms_dict = {}
-    for ind, atom in enumerate(atoms):
+    atoms_list = make_atoms_list(atoms, atom_num)
        atoms_dict[atom] = atom_num[ind]
-    return atoms, atom_num, atoms_dict
+    atoms_info = {
        'specie': atoms, # list of specie in sequential order
        'number': atom_num, # list of number of per specie in sequential order
        'list': atoms_list # list of every individual atom in sequential order
    }
    return atoms_info
 def get_fermi_level(doscar='DOSCAR', headerlines=6):
@ -276,6 +280,269 @@ def read_dos(path, flip_down=True):
 def read_pdos(data: dict, options={}):
    ''' data-dictionary should be structured like this:
    data["path"] - dictionary with path to POSCAR/CONTCAR mapped to key 'poscar' and path to DOSCAR in key 'doscar'. '''
    default_options = {
        'sum_atoms': False,
        'sum_orbitals': False,
        'adjust': False,        # Manually adjust the energy scale if the automatic Fermi-level detection didn't work.
        'normalise': False,
    }
    options = aux.update_options(options=options, default_options=default_options)
    # Grab some metadata
    data['atoms'] = get_atoms(data['path']['poscar'])
    data['info'] = get_doscar_information(data['path']['doscar'])
    # Read the data
    with open(data['path']['doscar'], 'r') as f:
        doscar_raw = f.readlines()
    # Read basis functions - this will only yield a non-empty list if DOSCAR is generated by LOBSTER. It will only contain columns for the used basis set, so to be able to make proper column headers, this information is needed.
    data['basis_functions'] = get_basis_functions(doscar_raw)
    # If DOSCAR is from VASP and not LOBSTER, the basis functions will not be listed in DOSCAR (but all columns will be present)
    if not data['basis_functions']:
        data['basis_functions'] = [None for _ in data['atoms']['list']]
    data['pdos'] = []
    for i, (atom, basis_functions) in enumerate(zip(data['atoms']['list'], data['basis_functions'])):
        # Define line to start read-in: (1+i) to skip total DOS in the start, plus every previous PDOS, including the headerline. Then the initial 6 header lines in the start of the file.
        start = int( (1 + i) * ( data['info']['NEDOS'] + 1 ) + 6 )
        pdos_atom = []
        for j in range(0,data['info']['NEDOS']):
            pdos_atom.append(doscar_raw[start+j].split())
        # Convert the data to a DataFrame and convert to float
        pdos_atom = pd.DataFrame(pdos_atom)
        pdos_atom = pdos_atom.astype(float)
        # If DOSCAR was written by VASP, set the standard columns to the basis_functions variable
        # FIXME This does not allow for f-orbitals. Could do a check vs. shape of pdos_atom to determine
        if not basis_functions:
            basis_functions = ['s', 'p1', 'p2', 'p3', '2p1', '2p2', '2p3', 'd1', 'd2', 'd3', 'd4', 'd5']
        # Split the basis functions into up and down spin channels
        if data['info']['spin_polarised']:
            columns = ['Energy']
            for function in basis_functions:
                columns.append(function+'_up')
                columns.append(function+'_down')
        else:
            columns = ['Energy'] + basis_functions
        # Set the new column names
        pdos_atom.columns = columns
        if options['adjust']:
            pdos_atom['Energy'] -= options['adjust']
        if options['smooth']:
            pdos_atom = smoothing(pdos=pdos_atom, options=options)
        if options['normalise']:
            orbitals = [orbital for orbital in columns if 'Energy' not in orbital]
            for k, specie in enumerate(data['atoms']['specie']):
                if specie == atom:
                    index = k
            for orbital in orbitals:
                pdos_atom[orbital] = pdos_atom[orbital] / data['atoms']['number'][index]
        # Make total columns
        if data['info']['spin_polarised']:
            up_functions = [orbital for orbital in columns if '_up' in orbital]
            down_functions = [orbital for orbital in columns if '_down' in orbital]
            pdos_atom['total_up'] = pdos_atom[up_functions].sum(axis=1)
            pdos_atom['total_down'] = pdos_atom[down_functions].sum(axis=1)
            pdos_atom['total'] = pdos_atom[['total_up', 'total_down']].sum(axis=1)
            # Flip all the values for spin down
            down_functions.append('total_down')
            for orbital in down_functions:
                pdos_atom[orbital] = (-1)*pdos_atom[orbital]
        data['pdos'].append(pdos_atom)
    if options['sum_atoms']:
        data['pdos'] = sum_atoms(data, options)
    if options['sum_orbitals']:
        data['pdos'] = sum_orbitals(data, options)
    return data
 def sum_atoms(data: dict, options={}):
    ''' Needs to have read data through read_pdos() '''
    default_options = {
    }
    options = aux.update_options(options=options, default_options=default_options)
    total_atoms = 0
    # Sort all the DataFrames into a dictionary
    sorted_dfs = {}
    for i, (specie, number) in enumerate(zip(data['atoms']['specie'], data['atoms']['number'])):
        sorted_dfs[specie] = []
        for j in range(number):
            index = j if i == 0 else j + total_atoms
            sorted_dfs[specie].append(data['pdos'][index])
        total_atoms += number
    # Sum all the DataFrames for each specie
    summed_dfs = {}
    for i, (specie, dfs) in enumerate(sorted_dfs.items()):
        for j, df in enumerate(dfs):
            if j > 0:
                dfs[0] += df
        dfs[0]['Energy'] = dfs[0]['Energy'] / data['atoms']['number'][i]
        summed_dfs[specie] = dfs[0]
    return summed_dfs
 def sum_orbitals(data: dict, options={}):
    shells = [
        '1s', '2s', '3s', '4s', '5s', '6s',
        '2p', '3p', '4p', '5p', '6p',
        '3d', '4d', '5d', '6d',
        '4f', '5f', '6f'
    ]
    summed_dfs = []
    # If sum_atoms() is already called
    if isinstance(data['pdos'], dict):
        summed_dfs = {}
        for specie, specie_pdos in data['pdos'].items():
            orbitals = [orbital for orbital in specie_pdos.columns if 'Energy' not in orbital]
            new_pdos = pd.DataFrame(specie_pdos['Energy'])
            for shell in shells:
                if data['info']['spin_polarised']:
                    sub_columns_up = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_up')]
                    sub_columns_down = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_down')]
                    new_pdos[shell+'_up'] = data['pdos'][specie][sub_columns_up].sum(axis=1)
                    new_pdos[shell+'_down'] = data['pdos'][specie][sub_columns_down].sum(axis=1)
                else:
                    sub_columns = [orbital for orbital in orbitals if orbital.startswith(shell)]
                    new_pdos[shell] = data['pdos'][specie][sub_columns].sum(axis=1)
            new_pdos = new_pdos.loc[:, (new_pdos != 0).any(axis=0)]
            summed_dfs[specie] = new_pdos
    else:
        for pdos in data['pdos']:
            orbitals = [orbital for orbital in pdos.columns if 'Energy' not in orbital]
            new_pdos = pd.DataFrame(pdos['Energy'])
            for shell in shells:
                if data['info']['spin_polarised']:
                    sub_columns_up = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_up')]
                    sub_columns_down = [orbital for orbital in orbitals if orbital.startswith(shell) and orbital.endswith('_down')]
                    new_pdos[shell+'_up'] = pdos[sub_columns_up].sum(axis=1)
                    new_pdos[shell+'_down'] = pdos[sub_columns_down].sum(axis=1)
                else:
                    sub_columns = [orbital for orbital in orbitals if orbital.startswith(shell)]
                    new_pdos[shell] = pdos[sub_columns].sum(axis=1)
            new_pdos = new_pdos.loc[:, (new_pdos != 0).any(axis=0)]
            summed_dfs.append(new_pdos)
    return summed_dfs
 def smoothing(pdos: pd.DataFrame, options={}):
    ' Smoothes the data using the Savitzky-Golay filter. This is the only algorithm at this moment.  '
    default_options = {
        'smooth_window_length': 3, # Determines the window length of smoothing that the savgol-filter uses for smoothing
        'smooth_polyorder': 2, # Determines the order of the polynomial used in the smoothing algorithm
        'smooth_algorithm': 'savgol', # At the present, only Savitzky-Golay filter is implemented. Add Gaussian and Boxcar later.
    }
    options = aux.update_options(options=options, default_options=default_options)
    # Initialise new DataFrame with correct x-values
    pdos_smooth = pd.DataFrame(pdos['Energy'])
    orbitals = [orbital for orbital in pdos.columns if 'Energy' not in orbital]
    if options['smooth_algorithm'] == 'savgol':
        for orbital in orbitals:
            pdos_smooth.insert(1, orbital, savgol_filter(pdos[orbital], options['smooth_window_length'], options['smooth_polyorder']))
    return pdos_smooth
 def get_basis_functions(doscar):
    basis_functions = []
    for line in doscar:
        if 'Z=' in line:
            basis_functions.append(line.split(';')[-1].split())
    return basis_functions
 def get_doscar_information(path):
    ''' Reads information from the DOSCAR'''
    kind = 'LOBSTER' if 'LOBSTER' in linecache.getline(path, 5) else 'VASP'
    dos_info_raw = linecache.getline(path, 6).split()
    spin_polarised = True if len(linecache.getline(path, 7).split()) == 5 else False
    dos_info = {
        'ENMIN': float(dos_info_raw[0]), 
        'ENMAX': float(dos_info_raw[1]), 
        'NEDOS': int(dos_info_raw[2]), 
        'EFERMI': float(dos_info_raw[3]), 
        'spin_polarised': spin_polarised, 
        'kind': kind
    }
    return dos_info
 def make_atoms_list(atoms, number_of_atoms):
    atoms_list = []
    for atom, num in zip(atoms, number_of_atoms):
        atoms_list = atoms_list + [atom for _ in range(num)]
    return atoms_list
 def read_pdos_legacy(data: dict, options={}):
    required_options = ['flip_down', 'sum_atoms', 'sum_orbitals', 'collapse_spin', 'adjust', 'manual_adjust', 'normalise', 'normalise_unit_cell', 'normalisation_factor']
@ -319,10 +586,12 @@ def read_pdos(data: dict, options={}):
        # Define line to start reading
        start = int((1 + ind) * dos_info["NEDOS"] + 6 + (ind * 1))
        add_d_orbitals = False
        add_p_orbitals = False
        # Check if d-orbitals are included (if DOSCAR comes from LOBSTER), otherwise they will have to be added to make the DataFrames the same size    
        if dos_info['kind'] == 'LOBSTER':
            # Extract basis sets from each atom
@ -532,17 +801,7 @@ def read_pdos(data: dict, options={}):
    return pdos_full, dos_info
 def get_doscar_information(path):
    ''' Reads information from the DOSCAR'''
    kind = 'LOBSTER' if 'LOBSTER' in linecache.getline(path, 5) else 'VASP'
    dos_info_raw = linecache.getline(path, 6).split()
    spin_polarised = True if len(linecache.getline(path, 7).split()) == 5 else False
    dos_info = {'ENMIN': float(dos_info_raw[0]), 'ENMAX': float(dos_info_raw[1]), 'NEDOS': int(dos_info_raw[2]), 'EFERMI': float(dos_info_raw[3]), 'spin_polarised': spin_polarised, 'kind': kind}
    return dos_info
 #def get_bader_charges(poscar='POSCAR', acf='ACF.dat'):