import xml.etree.ElementTree as ET
import numpy as np
[docs]
class QE_XML:
"""Quantum ESPRESSO XML reader compatible with qes:qes-1.0 schema."""
def __init__(self, filename):
self.filename = filename
self.tree = ET.parse(filename)
self.root = self.tree.getroot()
self.ns = self._get_namespace()
print(self.root,self.tree, self.ns)
contents = ET.iterparse(self.filename)
#for event, elem in contents:
# print(event,elem)
def _get_namespace(self):
if self.root.tag[0] == "{":
return self.root.tag[1:].split("}")[0]
return ""
def _tag(self, name):
return f"{{{self.ns}}}{name}" if self.ns else name
# -------------------------------------------------
# Basic structure information
# -------------------------------------------------
def _local_name(self, tag):
"""Return the local-name of an Element tag (strip namespace)."""
if tag is None:
return None
return tag.split("}", 1)[-1] if "}" in tag else tag
[docs]
def get_lattice(self):
"""Return 3x3 lattice vectors in Bohr. Robust to namespaced and non-namespaced tags."""
# 1) Try the namespaced known locations first (fast, preferred)
search_paths = [
f".//{self._tag('input')}/{self._tag('atomic_structure')}/{self._tag('cell')}",
f".//{self._tag('atomic_structure')}/{self._tag('cell')}",
f".//{self._tag('basis')}",
]
cell = None
for path in search_paths:
cell = self.root.find(path)
# debug print (you can remove or comment out)
# print("Tried path:", path, "->", bool(cell))
if cell is not None:
# print("Found cell at namespaced path:", path)
break
# 2) If not found, fall back to searching by local-name (ignore namespace)
if cell is None:
# Search entire tree for element where local-name == 'cell'
for elem in self.root.iter():
if self._local_name(elem.tag) == "cell":
cell = elem
# print("Found cell by local-name at element:", elem)
break
if cell is None:
# helpful debug output: list first-level children local names
top_children = [self._local_name(c.tag) for c in list(self.root)][:20]
raise ValueError(
"No <cell> element found in XML (tried namespaced and local-name searches). "
f"Top-level child tags: {top_children}"
)
# Now extract a1,a2,a3 by local-name, not by namespace
vecs = []
for desired in ["a1", "a2", "a3"]:
found = None
# check direct children first
for child in list(cell):
if self._local_name(child.tag) == desired:
found = child
break
# if not direct child (rare), search descendant
if found is None:
for child in cell.iter():
if self._local_name(child.tag) == desired:
found = child
break
if found is None or not (found.text and found.text.strip()):
raise ValueError(f"Missing {desired} vector in <cell> (searched namespaced and local-name).")
vecs.append([float(x) for x in found.text.split()])
return np.array(vecs)
[docs]
def get_atomic_positions(self):
"""Return list of (element, [x,y,z]) positions in Bohr (handles namespace mix)."""
atoms = []
atoms_parent = None
# 1️⃣ Try the usual namespaced paths first
search_paths = [
f".//{self._tag('input')}/{self._tag('atomic_structure')}/{self._tag('atomic_positions')}",
f".//{self._tag('atomic_structure')}/{self._tag('atomic_positions')}",
]
for path in search_paths:
atoms_parent = self.root.find(path)
# print("Trying path:", path, "->", bool(atoms_parent))
if atoms_parent is not None:
break
# 2️⃣ Fallback: search by local-name (namespace-agnostic)
if atoms_parent is None:
for elem in self.root.iter():
if self._local_name(elem.tag) == "atomic_positions":
atoms_parent = elem
break
if atoms_parent is None:
top_children = [self._local_name(c.tag) for c in list(self.root)][:20]
raise ValueError(
"No <atomic_positions> found in XML (tried namespaced and local-name searches). "
f"Top-level child tags: {top_children}"
)
# 3️⃣ Parse <atom> entries (also by local-name)
for atom in atoms_parent.iter():
if self._local_name(atom.tag) == "atom":
name = atom.attrib.get("name", "X")
pos = [float(x) for x in atom.text.split()]
atoms.append((name, np.array(pos)))
if not atoms:
raise ValueError("<atomic_positions> found, but no <atom> elements parsed.")
return atoms
[docs]
def get_kpoints(self):
"""Return all k-points as a NumPy array (fractional coordinates). Handles multiple locations."""
kpts_parent = None
# Try starting_k_points first
for elem in self.root.iter():
if self._local_name(elem.tag) in ["starting_k_points", "k_points_IBZ", "k_points"]:
kpts_parent = elem
break
if kpts_parent is None:
return np.array([])
kpts = []
for kp in kpts_parent.iter():
if self._local_name(kp.tag) == "k_point":
kpts.append([float(x) for x in kp.text.split()])
return np.array(kpts)
[docs]
def get_eigenvalues(self):
"""
Return eigenvalues as a NumPy array (NxNbnd),
N = number of k-points, Nbnd = number of bands.
"""
eigenvalues = []
# Robust iteration over all elements
for elem in self.root.iter():
if self._local_name(elem.tag) == "ks_energies":
eig_elem = None
# Find eigenvalues inside this ks_energies
for child in elem:
if self._local_name(child.tag) == "eigenvalues":
eig_elem = child
break
if eig_elem is not None and eig_elem.text.strip():
eig_vals = [float(x) for x in eig_elem.text.split()]
eigenvalues.append(eig_vals)
if not eigenvalues:
raise ValueError("No eigenvalues found in the XML file.")
return np.array(eigenvalues)
[docs]
def get_qe_eigenvalues(xml_file):
"""
Parse a Quantum ESPRESSO XML file and return eigenvalues as a NumPy array.
Returns:
eigenvalues: NxNbnd array, N = number of k-points
"""
tree = ET.parse(xml_file)
root = tree.getroot()
# Namespace handling: get namespace if present
ns = ""
if root.tag.startswith("{"):
ns = root.tag.split("}")[0].strip("{")
def tag(t):
return f"{{{ns}}}{t}" if ns else t
eigenvalues = []
# Iterate over all ks_energies blocks
for ks in root.iter():
if ks.tag == tag("ks_energies"):
eig_elem = ks.find(tag("eigenvalues"))
if eig_elem is not None and eig_elem.text.strip():
eig_vals = [float(x) for x in eig_elem.text.split()]
eigenvalues.append(eig_vals)
if not eigenvalues:
raise ValueError("No eigenvalues found in the XML file.")
return np.array(eigenvalues)
[docs]
def get_kpoints_and_eigenvalues(self):
"""
Return tuple (kpoints, eigenvalues, occupations)
- kpoints: Nx3 array
- eigenvalues: NxNbnd array
- occupations: NxNbnd array
"""
kpts, eigs, occs = [], [], []
# Loop over all ks_energies blocks
for ks in self.root.iter():
if self._local_name(ks.tag) == "ks_energies":
# k-point coordinates
kp_elem = ks.find(self._tag("k_point"))
if kp_elem is None or not kp_elem.text.strip():
continue
kpts.append([float(x) for x in kp_elem.text.split()])
# eigenvalues
eig_elem = ks.find(self._tag("eigenvalues"))
if eig_elem is None or not eig_elem.text.strip():
eigs.append([])
else:
eigs.append([float(x) for x in eig_elem.text.split()])
# occupations (optional)
occ_elem = ks.find(self._tag("occupations"))
if occ_elem is None or not occ_elem.text.strip():
occs.append([])
else:
occs.append([float(x) for x in occ_elem.text.split()])
return np.array(kpts), np.array(eigs), np.array(occs)
[docs]
def summary(self):
lattice = self.get_lattice()
atoms = self.get_atomic_positions()
print("== Quantum ESPRESSO XML summary ==")
print(f"Lattice vectors (Bohr):\n{lattice}")
print(f"Number of atoms: {len(atoms)}")
for name, pos in atoms:
print(f" {name:2s} {pos}")
# ------------------------
# Example usage
# ------------------------
if __name__ == "__main__":
qe = QE_XML("si/bs/si.xml") # or your file name
qe.summary()
kpts = qe.get_kpoints()
eigs = qe.get_eigenvalues()
print(eigs)
print(f"\nFound {len(kpts)} k-points, {eigs.shape[1]} bands per k-point")