A series of Python3 script to lower the barrier of computing and simulating molecular and material systems.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

180 lines
5.5 KiB

2 years ago
from typing import List, Sequence, Set, Dict, Union, Iterator
import warnings
import collections.abc
import numpy as np
from ase.data import atomic_numbers, chemical_symbols
from ase.formula import Formula
def string2symbols(s: str) -> List[str]:
"""Convert string to list of chemical symbols."""
return list(Formula(s))
def symbols2numbers(symbols) -> List[int]:
if isinstance(symbols, str):
symbols = string2symbols(symbols)
numbers = []
for s in symbols:
if isinstance(s, str):
numbers.append(atomic_numbers[s])
else:
numbers.append(int(s))
return numbers
class Symbols(collections.abc.Sequence):
"""A sequence of chemical symbols.
``atoms.symbols`` is a :class:`ase.symbols.Symbols` object. This
object works like an editable view of ``atoms.numbers``, except
its elements are manipulated as strings.
Examples:
>>> from ase.build import molecule
>>> atoms = molecule('CH3CH2OH')
>>> atoms.symbols
Symbols('C2OH6')
>>> atoms.symbols[:3]
Symbols('C2O')
>>> atoms.symbols == 'H'
array([False, False, False, True, True, True, True, True, True], dtype=bool)
>>> atoms.symbols[-3:] = 'Pu'
>>> atoms.symbols
Symbols('C2OH3Pu3')
>>> atoms.symbols[3:6] = 'Mo2U'
>>> atoms.symbols
Symbols('C2OMo2UPu3')
>>> atoms.symbols.formula
Formula('C2OMo2UPu3')
The :class:`ase.formula.Formula` object is useful for extended
formatting options and analysis.
"""
def __init__(self, numbers) -> None:
self.numbers = np.asarray(numbers, int)
@classmethod
def fromsymbols(cls, symbols) -> 'Symbols':
numbers = symbols2numbers(symbols)
return cls(np.array(numbers))
@property
def formula(self) -> Formula:
"""Formula object."""
string = Formula.from_list(self).format('reduce')
return Formula(string)
def __getitem__(self, key) -> Union['Symbols', str]:
num = self.numbers[key]
if np.isscalar(num):
return chemical_symbols[num]
return Symbols(num)
def __iter__(self) -> Iterator[str]:
for num in self.numbers:
yield chemical_symbols[num]
def __setitem__(self, key, value) -> None:
numbers = symbols2numbers(value)
if len(numbers) == 1:
self.numbers[key] = numbers[0]
else:
self.numbers[key] = numbers
def __len__(self) -> int:
return len(self.numbers)
def __str__(self) -> str:
return self.get_chemical_formula('reduce')
def __repr__(self) -> str:
return 'Symbols(\'{}\')'.format(self)
def __eq__(self, obj) -> bool:
if not hasattr(obj, '__len__'):
return False
try:
symbols = Symbols.fromsymbols(obj)
except Exception:
# Typically this would happen if obj cannot be converged to
# atomic numbers.
return False
return self.numbers == symbols.numbers
def get_chemical_formula(
self,
mode: str = 'hill',
empirical: bool = False,
) -> str:
"""Get chemical formula.
See documentation of ase.atoms.Atoms.get_chemical_formula()."""
# XXX Delegate the work to the Formula object!
if mode in ('reduce', 'all') and empirical:
warnings.warn("Empirical chemical formula not available "
"for mode '{}'".format(mode))
if len(self) == 0:
return ''
numbers = self.numbers
if mode == 'reduce':
n = len(numbers)
changes = np.concatenate(([0], np.arange(1, n)[numbers[1:] !=
numbers[:-1]]))
symbols = [chemical_symbols[e] for e in numbers[changes]]
counts = np.append(changes[1:], n) - changes
tokens = []
for s, c in zip(symbols, counts):
tokens.append(s)
if c > 1:
tokens.append(str(c))
formula = ''.join(tokens)
elif mode == 'all':
formula = ''.join([chemical_symbols[n] for n in numbers])
else:
symbols = [chemical_symbols[Z] for Z in numbers]
f = Formula('', _tree=[(symbols, 1)])
if empirical:
f, _ = f.reduce()
if mode in {'hill', 'metal'}:
formula = f.format(mode)
else:
raise ValueError(
"Use mode = 'all', 'reduce', 'hill' or 'metal'.")
return formula
def search(self, symbols) -> Sequence[int]:
"""Return the indices of elements with given symbol or symbols."""
numbers = set(symbols2numbers(symbols))
indices = [i for i, number in enumerate(self.numbers)
if number in numbers]
return np.array(indices, int)
def species(self) -> Set[str]:
"""Return unique symbols as a set."""
return set(self)
def indices(self) -> Dict[str, Sequence[int]]:
"""Return dictionary mapping each unique symbol to indices.
>>> from ase.build import molecule
>>> atoms = molecule('CH3CH2OH')
>>> atoms.symbols.indices()
{'C': array([0, 1]), 'O': array([2]), 'H': array([3, 4, 5, 6, 7, 8])}
"""
dct: Dict[str, List[int]] = {}
for i, symbol in enumerate(self):
dct.setdefault(symbol, []).append(i)
return {key: np.array(value, int) for key, value in dct.items()}