nafuma/beamtime/xrd/io.py

502 lines
14 KiB
Python
Raw Normal View History

from sympy import re
2021-10-21 14:41:10 +02:00
import fabio, pyFAI
2021-10-23 18:37:21 +02:00
import pandas as pd
2021-10-21 14:41:10 +02:00
import numpy as np
import os
2021-10-27 15:33:56 +02:00
import shutil
2021-10-21 14:41:10 +02:00
2021-10-25 08:33:58 +02:00
import zipfile
import xml.etree.ElementTree as ET
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
import beamtime.auxillary as aux
2021-10-21 14:41:10 +02:00
def get_image_array(path):
image = fabio.open(path)
image_array = image.data
return image_array
def get_image_headers(path):
image = fabio.open(path)
return image.header
def integrate_1d(data, options={}, index=0):
2021-10-23 18:37:21 +02:00
''' Integrates an image file to a 1D diffractogram.
2021-10-21 14:41:10 +02:00
Required content of data:
calibrant (str): path to .poni-file
nbins (int): Number of bins to divide image into
2022-03-16 14:16:41 +01:00
path (str) (optional, dependent on image): path to image file - either this or image must be specified. If both is passed, image is prioritsed
image (NumPy 2D Array) (optional, dependent on path): image array as extracted from get_image_array
2021-10-23 18:37:21 +02:00
Output:
df: DataFrame contianing 1D diffractogram if option 'return' is True
'''
2021-10-21 14:41:10 +02:00
required_options = ['unit', 'nbins', 'save', 'save_filename', 'save_extension', 'save_folder', 'overwrite']
2021-10-23 18:37:21 +02:00
default_options = {
'unit': '2th_deg',
'nbins': 3000,
'extract_folder': 'tmp',
2022-03-16 14:16:41 +01:00
'save': False,
'save_filename': None,
'save_extension': '_integrated.xy',
2021-10-23 18:37:21 +02:00
'save_folder': '.',
2022-03-16 14:16:41 +01:00
'overwrite': False}
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
if not isinstance(data['path'], list):
data['path'] = [data['path']]
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
# Get image array from filename if not passed
if 'image' not in data.keys():
data['image'] = get_image_array(data['path'][index])
2021-10-23 18:37:21 +02:00
2022-03-16 14:16:41 +01:00
# Instanciate the azimuthal integrator from pyFAI from the calibrant (.poni-file)
ai = pyFAI.load(data['calibrant'])
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
# Determine filename
filename = make_filename(options=options, path=data['path'][index])
2022-03-16 14:16:41 +01:00
# Make save_folder if this does not exist already
if not os.path.isdir(options['extract_folder']):
os.makedirs(options['extract_folder'])
2021-10-23 18:37:21 +02:00
res = ai.integrate1d(data['image'], options['nbins'], unit=options['unit'], filename=filename)
2021-10-23 18:37:21 +02:00
data['path'][index] = filename
diffractogram, wavelength = read_xy(data=data, options=options, index=index)
2021-10-23 18:37:21 +02:00
2022-03-16 14:16:41 +01:00
if not options['save']:
os.remove(filename)
shutil.rmtree(f'tmp')
# Reset this option
options['save_folder'] = None
2022-03-16 14:16:41 +01:00
return diffractogram, wavelength
2022-03-16 14:16:41 +01:00
2021-10-23 18:37:21 +02:00
def make_filename(options, path=None):
2021-10-23 18:37:21 +02:00
2022-03-16 14:16:41 +01:00
# Define save location for integrated diffractogram data
if not options['save']:
filename = os.path.join(options['extract_folder'], 'tmp_diff.dat')
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
elif options['save']:
2021-10-21 14:41:10 +02:00
2022-03-16 14:16:41 +01:00
# Case 1: No filename is given.
if not options['save_filename']:
# If a path is given instead of an image array, the path is taken as the trunk of the savename
if path:
2022-03-16 14:16:41 +01:00
# Make filename by joining the save_folder, the filename (with extension deleted) and adding the save_extension
filename = os.path.join(options['save_folder'], os.path.split(path)[-1].split('.')[0] + options['save_extension'])
2022-03-16 14:16:41 +01:00
else:
# Make filename just "integrated.dat" in the save_folder
filename = os.path.join(options['save_folder'], 'integrated.xy')
else:
filename = os.path.join(options['save_folder'], options['save_filename'])
2021-10-23 18:37:21 +02:00
2022-03-16 14:16:41 +01:00
if not options['overwrite']:
trunk = filename.split('.')[0]
extension = filename.split('.')[-1]
counter = 0
while os.path.isfile(filename):
# Rename first file to match naming scheme if already exists
if counter == 0:
os.rename(filename, trunk + '_' + str(counter).zfill(4) + '.' + extension)
# Increment counter and make new filename
counter += 1
counter_string = str(counter)
filename = trunk + '_' + counter_string.zfill(4) + '.' + extension
return filename
2021-10-23 18:37:21 +02:00
2021-10-28 16:47:07 +02:00
def generate_image_list(path, options=None):
''' Generates a list of paths to pass to the average_images() function'''
required_options = ['scans_per_image']
default_options = {
'scans_per_image': 5
}
2021-10-23 18:37:21 +02:00
def average_images(images):
''' Takes a list of path to image files, reads them and averages them before returning the average image'''
image_arrays = []
for image in images:
2021-10-28 16:47:07 +02:00
image_array = xrd.io.get_image_array(image)
2021-10-23 18:37:21 +02:00
image_arrays.append(image_array)
image_arrays = np.array(image_arrays)
image_average = image_arrays.mean(axis=0)
return image_average
def subtract_dark(image, dark):
return image - dark
2021-10-21 14:41:10 +02:00
def view_integrator(calibrant):
''' Prints out information about the azimuthal integrator
Input:
calibrant: Path to the azimuthal integrator file (.PONI)
Output:
None'''
ai = pyFAI.load(calibrant)
print("pyFAI version:", pyFAI.version)
print("\nIntegrator: \n", ai)
2021-10-27 15:33:56 +02:00
def read_brml(data, options={}, index=0):
2021-10-27 15:33:56 +02:00
# FIXME: Can't read RECX1-data, apparently must be formatted differently from RECX2. Check the RawData-files and compare between the two files.
required_options = ['extract_folder', 'save_folder']
2021-10-27 15:33:56 +02:00
default_options = {
'extract_folder': 'tmp',
2021-10-27 15:33:56 +02:00
'save_folder': None
}
2022-03-16 16:16:05 +01:00
options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
2021-10-27 15:33:56 +02:00
if not os.path.isdir(options['extract_folder']):
os.mkdir(options['extract_folder'])
# Extract the RawData0.xml file from the brml-file
with zipfile.ZipFile(data['path'][index], 'r') as brml:
2021-10-25 08:33:58 +02:00
for info in brml.infolist():
2021-10-27 15:33:56 +02:00
if "RawData" in info.filename:
2021-10-28 16:47:07 +02:00
brml.extract(info.filename, options['extract_folder'])
2021-10-27 15:33:56 +02:00
# Parse the RawData0.xml file
2021-10-28 16:47:07 +02:00
path = os.path.join(options['extract_folder'], 'Experiment0/RawData0.xml')
2021-10-27 15:33:56 +02:00
tree = ET.parse(path)
root = tree.getroot()
shutil.rmtree(options['extract_folder'])
diffractogram = []
for chain in root.findall('./DataRoutes/DataRoute'):
# Get the scan type to be able to handle different data formats
scantype = chain.findall('ScanInformation')[0].get('VisibleName')
# Check if the chain is the right one to extract the data from
if chain.get('Description') == 'Originally measured data.':
if scantype == 'TwoTheta':
for scandata in chain.findall('Datum'):
scandata = scandata.text.split(',')
twotheta, intensity = float(scandata[2]), float(scandata[3])
if twotheta > 0:
diffractogram.append({'2th': twotheta, 'I': intensity})
elif scantype == 'Coupled TwoTheta/Theta':
for scandata in chain.findall('Datum'):
scandata = scandata.text.split(',')
twotheta, intensity = float(scandata[2]), float(scandata[4])
if twotheta > 0:
diffractogram.append({'2th': twotheta, 'I': intensity})
elif scantype == 'Still (Eiger2R_500K (1D mode))':
start = float(chain.findall('ScanInformation/ScaleAxes/ScaleAxisInfo/Start')[0].text)
stop = float(chain.findall('ScanInformation/ScaleAxes/ScaleAxisInfo/Stop')[0].text)
increment = float(chain.findall('ScanInformation/ScaleAxes/ScaleAxisInfo/Increment')[0].text)
for scandata in chain.findall('Datum'):
2022-03-16 16:16:05 +01:00
scandata = scandata.text.split(',')
raw = [float(i) for i in scandata]
intensity = []
for r in raw:
if r > 600:
intensity.append(r)
intensity = np.array(intensity)
twotheta = np.linspace(start, stop, len(intensity))
diffractogram = {'2th': twotheta, 'I': intensity}
2021-10-27 15:33:56 +02:00
2022-03-16 16:16:05 +01:00
#if 'wavelength' not in data.keys():
# Find wavelength
for chain in root.findall('./FixedInformation/Instrument/PrimaryTracks/TrackInfoData/MountedOptics/InfoData/Tube/WaveLengthAlpha1'):
wavelength = float(chain.attrib['Value'])
2022-03-16 16:16:05 +01:00
2021-10-27 15:33:56 +02:00
diffractogram = pd.DataFrame(diffractogram)
2021-10-27 15:33:56 +02:00
if options['save_folder']:
if not os.path.isdir(options['save_folder']):
os.makedirs(options['save_folder'])
diffractogram.to_csv(options['save_folder'])
return diffractogram, wavelength
2021-10-27 15:33:56 +02:00
def read_xy(data, options={}, index=0):
#if 'wavelength' not in data.keys():
# Get wavelength from scan
wavelength = find_wavelength_from_xy(path=data['path'][index])
2021-10-27 15:33:56 +02:00
with open(data['path'][index], 'r') as f:
position = 0
2022-03-16 15:36:40 +01:00
current_line = f.readline()
2022-03-16 15:36:40 +01:00
while current_line[0] == '#' or current_line[0] == '\'':
position = f.tell()
current_line = f.readline()
2021-10-25 08:33:58 +02:00
f.seek(position)
diffractogram = pd.read_csv(f, header=None, delim_whitespace=True)
2022-03-16 14:16:41 +01:00
if diffractogram.shape[1] == 2:
diffractogram.columns = ['2th', 'I']
elif diffractogram.shape[1] == 3:
diffractogram.columns = ['2th', 'I', 'sigma']
return diffractogram, wavelength
def read_data(data, options={}, index=0):
2022-03-16 14:16:41 +01:00
beamline_extensions = ['mar3450', 'edf', 'cbf']
file_extension = data['path'][index].split('.')[-1]
2022-03-16 14:16:41 +01:00
if file_extension in beamline_extensions:
diffractogram, wavelength = integrate_1d(data=data, options=options, index=index)
2022-03-16 14:16:41 +01:00
elif file_extension == 'brml':
diffractogram, wavelength = read_brml(data=data, options=options, index=index)
2022-03-16 14:16:41 +01:00
elif file_extension in['xy', 'xye']:
diffractogram, wavelength = read_xy(data=data, options=options, index=index)
2022-03-16 16:16:05 +01:00
if options['normalise']:
diffractogram['I'] = diffractogram['I'] / diffractogram['I'].max()
if options['offset']:
diffractogram = apply_offset(diffractogram, wavelength, index, options)
diffractogram = translate_wavelengths(data=diffractogram, wavelength=wavelength)
2022-03-16 16:16:05 +01:00
return diffractogram, wavelength
2021-10-27 15:33:56 +02:00
def apply_offset(diffractogram, wavelength, index, options):
#Apply offset along y-axis
diffractogram['I_org'] = diffractogram['I'] # make copy of original intensities
diffractogram['I'] = diffractogram['I'] + index*options['offset_y']
# Apply offset along x-axis
relative_shift = (wavelength / 1.54059)*options['offset_x'] # Adjusts the offset-factor to account for wavelength, so that offset_x given is given in 2th_cuka-units
diffractogram['2th_org'] = diffractogram['2th']
diffractogram['2th'] = diffractogram['2th'] + index*relative_shift
return diffractogram
def revert_offset(diffractogram,which=None):
if which == 'both':
diffractogram['2th'] = diffractogram['2th_org']
diffractogram['I'] = diffractogram['I_org']
if which == 'y':
diffractogram['I'] = diffractogram['I_org']
if which == 'x':
diffractogram['2th'] = diffractogram['2th_org']
return diffractogram
2021-10-27 15:33:56 +02:00
def load_reflection_table(data, options={}):
required_options = ['ref_wavelength', 'to_wavelength']
default_options = {
'ref_wavelength': 1.54059,
'to_wavelength': None
}
options = aux.update_options(options=options, required_options=required_options, default_options=default_options)
2022-03-12 22:50:09 +01:00
# VESTA outputs the file with a header that has a space between the parameter and units - so there is some extra code to rectify the issue
# that ensues from this formatting
reflections = pd.read_csv(data['path'], delim_whitespace=True)
2022-03-12 22:50:09 +01:00
# Remove the extra column that appears from the headers issue
reflections.drop(reflections.columns[-1], axis=1, inplace=True)
with open(data['path'], 'r') as f:
2022-03-12 22:50:09 +01:00
line = f.readline()
headers = line.split()
# Delete the fourth element which is '(Å)'
del headers[4]
# Change name of column to avoid using greek letters
headers[7] = '2th'
# Set the new modified headers as the headers of
reflections.columns = headers
reflections = translate_wavelengths(data=reflections, wavelength=options['ref_wavelength'], to_wavelength=options['to_wavelength'])
#print(reflections)
2022-03-16 14:16:41 +01:00
return reflections
def translate_wavelengths(data, wavelength, to_wavelength=None):
# FIXME Somewhere here there is an invalid arcsin-argument. Not sure where.
pd.options.mode.chained_assignment = None
2022-03-16 14:16:41 +01:00
# Translate to CuKalpha
cuka = 1.54059 # Å
if cuka > wavelength:
max_2th_cuka = 2*np.arcsin(wavelength/cuka) * 180/np.pi
else:
max_2th_cuka = data['2th'].max()
2022-03-16 14:16:41 +01:00
data['2th_cuka'] = np.NAN
2022-03-16 14:16:41 +01:00
data['2th_cuka'].loc[data['2th'] <= max_2th_cuka] = 2*np.arcsin(cuka/wavelength * np.sin((data['2th']/2) * np.pi/180)) * 180/np.pi
2022-03-16 14:16:41 +01:00
# Translate to MoKalpha
moka = 0.71073 # Å
if moka > wavelength:
max_2th_moka = 2*np.arcsin(wavelength/moka) * 180/np.pi
else:
max_2th_moka = data['2th'].max()
2022-03-16 14:16:41 +01:00
data['2th_moka'] = np.NAN
2022-03-16 14:16:41 +01:00
data['2th_moka'].loc[data['2th'] <= max_2th_moka] = 2*np.arcsin(moka/wavelength * np.sin((data['2th']/2) * np.pi/180)) * 180/np.pi
2022-03-16 14:16:41 +01:00
# Convert to other parameters
data['d'] = wavelength / (2*np.sin((2*data['2th']*np.pi/180)/2))
data['1/d'] = 1/data['d']
data['q'] = np.abs((4*np.pi/wavelength)*np.sin(data['2th']/2 * np.pi/180))
data['q2'] = data['q']**2
data['q4'] = data['q']**4
2022-03-16 14:16:41 +01:00
if to_wavelength:
if to_wavelength >= cuka:
max_2th = 2*np.arcsin(cuka/to_wavelength) * 180/np.pi
else:
max_2th = data['2th_cuka'].max()
data['2th'] = np.NAN
data['2th'].loc[data['2th_cuka'] <= max_2th] = 2*np.arcsin(to_wavelength/cuka * np.sin((data['2th_cuka']/2) * np.pi/180)) * 180/np.pi
return data
2022-03-16 14:16:41 +01:00
2022-03-16 15:21:05 +01:00
def find_wavelength_from_xy(path):
2022-03-16 15:21:05 +01:00
print(path)
2022-03-16 16:16:05 +01:00
wavelength_dict = {'Cu': 1.54059, 'Mo': 0.71073}
2022-03-16 15:21:05 +01:00
with open(path, 'r') as f:
2022-03-16 16:16:05 +01:00
lines = f.readlines()
2022-03-16 15:21:05 +01:00
2022-03-16 16:16:05 +01:00
for line in lines:
# For .xy-files output from EVA
2022-03-16 16:16:05 +01:00
if 'Anode' in line:
anode = line.split()[8].strip('"')
wavelength = wavelength_dict[anode]
2022-03-16 14:16:41 +01:00
# For .xy-files output from pyFAI integration
2022-03-16 16:16:05 +01:00
elif 'Wavelength' in line:
wavelength = float(line.split()[2])*10**10
2022-03-16 14:16:41 +01:00
return wavelength