from typing import List, Sequence, Set, Dict, Union, Iterator import warnings import collections.abc import numpy as np from ase.data import atomic_numbers, chemical_symbols from ase.formula import Formula def string2symbols(s: str) -> List[str]: """Convert string to list of chemical symbols.""" return list(Formula(s)) def symbols2numbers(symbols) -> List[int]: if isinstance(symbols, str): symbols = string2symbols(symbols) numbers = [] for s in symbols: if isinstance(s, str): numbers.append(atomic_numbers[s]) else: numbers.append(int(s)) return numbers class Symbols(collections.abc.Sequence): """A sequence of chemical symbols. ``atoms.symbols`` is a :class:`ase.symbols.Symbols` object. This object works like an editable view of ``atoms.numbers``, except its elements are manipulated as strings. Examples: >>> from ase.build import molecule >>> atoms = molecule('CH3CH2OH') >>> atoms.symbols Symbols('C2OH6') >>> atoms.symbols[:3] Symbols('C2O') >>> atoms.symbols == 'H' array([False, False, False, True, True, True, True, True, True], dtype=bool) >>> atoms.symbols[-3:] = 'Pu' >>> atoms.symbols Symbols('C2OH3Pu3') >>> atoms.symbols[3:6] = 'Mo2U' >>> atoms.symbols Symbols('C2OMo2UPu3') >>> atoms.symbols.formula Formula('C2OMo2UPu3') The :class:`ase.formula.Formula` object is useful for extended formatting options and analysis. """ def __init__(self, numbers) -> None: self.numbers = np.asarray(numbers, int) @classmethod def fromsymbols(cls, symbols) -> 'Symbols': numbers = symbols2numbers(symbols) return cls(np.array(numbers)) @property def formula(self) -> Formula: """Formula object.""" string = Formula.from_list(self).format('reduce') return Formula(string) def __getitem__(self, key) -> Union['Symbols', str]: num = self.numbers[key] if np.isscalar(num): return chemical_symbols[num] return Symbols(num) def __iter__(self) -> Iterator[str]: for num in self.numbers: yield chemical_symbols[num] def __setitem__(self, key, value) -> None: numbers = symbols2numbers(value) if len(numbers) == 1: self.numbers[key] = numbers[0] else: self.numbers[key] = numbers def __len__(self) -> int: return len(self.numbers) def __str__(self) -> str: return self.get_chemical_formula('reduce') def __repr__(self) -> str: return 'Symbols(\'{}\')'.format(self) def __eq__(self, obj) -> bool: if not hasattr(obj, '__len__'): return False try: symbols = Symbols.fromsymbols(obj) except Exception: # Typically this would happen if obj cannot be converged to # atomic numbers. return False return self.numbers == symbols.numbers def get_chemical_formula( self, mode: str = 'hill', empirical: bool = False, ) -> str: """Get chemical formula. See documentation of ase.atoms.Atoms.get_chemical_formula().""" # XXX Delegate the work to the Formula object! if mode in ('reduce', 'all') and empirical: warnings.warn("Empirical chemical formula not available " "for mode '{}'".format(mode)) if len(self) == 0: return '' numbers = self.numbers if mode == 'reduce': n = len(numbers) changes = np.concatenate(([0], np.arange(1, n)[numbers[1:] != numbers[:-1]])) symbols = [chemical_symbols[e] for e in numbers[changes]] counts = np.append(changes[1:], n) - changes tokens = [] for s, c in zip(symbols, counts): tokens.append(s) if c > 1: tokens.append(str(c)) formula = ''.join(tokens) elif mode == 'all': formula = ''.join([chemical_symbols[n] for n in numbers]) else: symbols = [chemical_symbols[Z] for Z in numbers] f = Formula('', _tree=[(symbols, 1)]) if empirical: f, _ = f.reduce() if mode in {'hill', 'metal'}: formula = f.format(mode) else: raise ValueError( "Use mode = 'all', 'reduce', 'hill' or 'metal'.") return formula def search(self, symbols) -> Sequence[int]: """Return the indices of elements with given symbol or symbols.""" numbers = set(symbols2numbers(symbols)) indices = [i for i, number in enumerate(self.numbers) if number in numbers] return np.array(indices, int) def species(self) -> Set[str]: """Return unique symbols as a set.""" return set(self) def indices(self) -> Dict[str, Sequence[int]]: """Return dictionary mapping each unique symbol to indices. >>> from ase.build import molecule >>> atoms = molecule('CH3CH2OH') >>> atoms.symbols.indices() {'C': array([0, 1]), 'O': array([2]), 'H': array([3, 4, 5, 6, 7, 8])} """ dct: Dict[str, List[int]] = {} for i, symbol in enumerate(self): dct.setdefault(symbol, []).append(i) return {key: np.array(value, int) for key, value in dct.items()}