diff --git a/src/openfermion/chem/molecular_data.py b/src/openfermion/chem/molecular_data.py index 9229f80ea..bea97e1df 100644 --- a/src/openfermion/chem/molecular_data.py +++ b/src/openfermion/chem/molecular_data.py @@ -12,7 +12,7 @@ """Class and functions to store quantum chemistry data.""" import os -import uuid +import tempfile import shutil import numpy import h5py @@ -699,157 +699,174 @@ def ccsd_double_amps(self): def ccsd_double_amps(self, value): self._ccsd_double_amps = value - def save(self): - """Method to save the class under a systematic name.""" - # Create a temporary file and swap it to the original name in case - # data needs to be loaded while saving - tmp_name = uuid.uuid4() - with h5py.File("{}.hdf5".format(tmp_name), "w") as f: - # Save geometry (atoms and positions need to be separate): - d_geom = f.create_group("geometry") - if not isinstance(self.geometry, basestring): - atoms = [numpy.bytes_(item[0]) for item in self.geometry] - positions = numpy.array([list(item[1]) for item in self.geometry]) - else: - atoms = numpy.bytes_(self.geometry) - positions = None - d_geom.create_dataset("atoms", data=(atoms if atoms is not None else False)) - d_geom.create_dataset("positions", data=(positions if positions is not None else False)) - # Save basis: - f.create_dataset("basis", data=numpy.bytes_(self.basis)) - # Save multiplicity: - f.create_dataset("multiplicity", data=self.multiplicity) - # Save charge: - f.create_dataset("charge", data=self.charge) - # Save description: - f.create_dataset("description", data=numpy.bytes_(self.description)) - # Save name: - f.create_dataset("name", data=numpy.bytes_(self.name)) - # Save n_atoms: - f.create_dataset("n_atoms", data=self.n_atoms) - # Save atoms: - f.create_dataset("atoms", data=numpy.bytes_(self.atoms)) - # Save protons: - f.create_dataset("protons", data=self.protons) - # Save n_electrons: - f.create_dataset("n_electrons", data=self.n_electrons) - # Save generic attributes from calculations: - f.create_dataset( - "n_orbitals", data=(self.n_orbitals if self.n_orbitals is not None else False) - ) - f.create_dataset( - "n_qubits", data=(self.n_qubits if self.n_qubits is not None else False) - ) - f.create_dataset( - "nuclear_repulsion", - data=(self.nuclear_repulsion if self.nuclear_repulsion is not None else False), - ) - # Save attributes generated from SCF calculation. - f.create_dataset( - "hf_energy", data=(self.hf_energy if self.hf_energy is not None else False) - ) - f.create_dataset( - "canonical_orbitals", - data=(self.canonical_orbitals if self.canonical_orbitals is not None else False), - compression=("gzip" if self.canonical_orbitals is not None else None), - ) - f.create_dataset( - "overlap_integrals", - data=(self.overlap_integrals if self.overlap_integrals is not None else False), - compression=("gzip" if self.overlap_integrals is not None else None), - ) - f.create_dataset( - "orbital_energies", - data=(self.orbital_energies if self.orbital_energies is not None else False), - ) - # Save attributes generated from integrals. - f.create_dataset( - "one_body_integrals", - data=(self.one_body_integrals if self.one_body_integrals is not None else False), - compression=("gzip" if self.one_body_integrals is not None else None), - ) - f.create_dataset( - "two_body_integrals", - data=(self.two_body_integrals if self.two_body_integrals is not None else False), - compression=("gzip" if self.two_body_integrals is not None else None), - ) - # Save attributes generated from MP2 calculation. - f.create_dataset( - "mp2_energy", data=(self.mp2_energy if self.mp2_energy is not None else False) - ) - # Save attributes generated from CISD calculation. - f.create_dataset( - "cisd_energy", data=(self.cisd_energy if self.cisd_energy is not None else False) - ) - f.create_dataset( - "cisd_one_rdm", - data=(self.cisd_one_rdm if self.cisd_one_rdm is not None else False), - compression=("gzip" if self.cisd_one_rdm is not None else None), - ) - f.create_dataset( - "cisd_two_rdm", - data=(self.cisd_two_rdm if self.cisd_two_rdm is not None else False), - compression=("gzip" if self.cisd_two_rdm is not None else None), - ) - # Save attributes generated from exact diagonalization. - f.create_dataset( - "fci_energy", data=(self.fci_energy if self.fci_energy is not None else False) - ) - f.create_dataset( - "fci_one_rdm", - data=(self.fci_one_rdm if self.fci_one_rdm is not None else False), - compression=("gzip" if self.fci_one_rdm is not None else None), - ) - f.create_dataset( - "fci_two_rdm", - data=(self.fci_two_rdm if self.fci_two_rdm is not None else False), - compression=("gzip" if self.fci_two_rdm is not None else None), - ) - # Save attributes generated from CCSD calculation. - f.create_dataset( - "ccsd_energy", data=(self.ccsd_energy if self.ccsd_energy is not None else False) - ) - f.create_dataset( - "ccsd_single_amps", - data=(self.ccsd_single_amps if self.ccsd_single_amps is not None else False), - compression=("gzip" if self.ccsd_single_amps is not None else None), - ) - f.create_dataset( - "ccsd_double_amps", - data=(self.ccsd_double_amps if self.ccsd_double_amps is not None else False), - compression=("gzip" if self.ccsd_double_amps is not None else None), - ) + def _write_hdf5_data(self, hdf5_file): + """Method to write the class data to an HDF5 file. - # Save general calculation data - key_list = list(self.general_calculations.keys()) - f.create_dataset( - "general_calculations_keys", - data=([numpy.bytes_(key) for key in key_list] if len(key_list) > 0 else False), - ) - f.create_dataset( - "general_calculations_values", - data=( - [self.general_calculations[key] for key in key_list] - if len(key_list) > 0 - else False - ), - ) + The hdf5_file parameter is expected to be an already-open h5py.File handle. + """ + # Save geometry (atoms and positions need to be separate): + d_geom = hdf5_file.create_group("geometry") + if not isinstance(self.geometry, basestring): + atoms = [numpy.bytes_(item[0]) for item in self.geometry] + positions = numpy.array([list(item[1]) for item in self.geometry]) + else: + atoms = numpy.bytes_(self.geometry) + positions = None + d_geom.create_dataset("atoms", data=(atoms if atoms is not None else False)) + d_geom.create_dataset("positions", data=(positions if positions is not None else False)) + # Save basis: + hdf5_file.create_dataset("basis", data=numpy.bytes_(self.basis)) + # Save multiplicity: + hdf5_file.create_dataset("multiplicity", data=self.multiplicity) + # Save charge: + hdf5_file.create_dataset("charge", data=self.charge) + # Save description: + hdf5_file.create_dataset("description", data=numpy.bytes_(self.description)) + # Save name: + hdf5_file.create_dataset("name", data=numpy.bytes_(self.name)) + # Save n_atoms: + hdf5_file.create_dataset("n_atoms", data=self.n_atoms) + # Save atoms: + hdf5_file.create_dataset("atoms", data=numpy.bytes_(self.atoms)) + # Save protons: + hdf5_file.create_dataset("protons", data=self.protons) + # Save n_electrons: + hdf5_file.create_dataset("n_electrons", data=self.n_electrons) + # Save generic attributes from calculations: + hdf5_file.create_dataset( + "n_orbitals", data=(self.n_orbitals if self.n_orbitals is not None else False) + ) + hdf5_file.create_dataset( + "n_qubits", data=(self.n_qubits if self.n_qubits is not None else False) + ) + hdf5_file.create_dataset( + "nuclear_repulsion", + data=(self.nuclear_repulsion if self.nuclear_repulsion is not None else False), + ) + # Save attributes generated from SCF calculation. + hdf5_file.create_dataset( + "hf_energy", data=(self.hf_energy if self.hf_energy is not None else False) + ) + hdf5_file.create_dataset( + "canonical_orbitals", + data=(self.canonical_orbitals if self.canonical_orbitals is not None else False), + compression=("gzip" if self.canonical_orbitals is not None else None), + ) + hdf5_file.create_dataset( + "overlap_integrals", + data=(self.overlap_integrals if self.overlap_integrals is not None else False), + compression=("gzip" if self.overlap_integrals is not None else None), + ) + hdf5_file.create_dataset( + "orbital_energies", + data=(self.orbital_energies if self.orbital_energies is not None else False), + ) + # Save attributes generated from integrals. + hdf5_file.create_dataset( + "one_body_integrals", + data=(self.one_body_integrals if self.one_body_integrals is not None else False), + compression=("gzip" if self.one_body_integrals is not None else None), + ) + hdf5_file.create_dataset( + "two_body_integrals", + data=(self.two_body_integrals if self.two_body_integrals is not None else False), + compression=("gzip" if self.two_body_integrals is not None else None), + ) + # Save attributes generated from MP2 calculation. + hdf5_file.create_dataset( + "mp2_energy", data=(self.mp2_energy if self.mp2_energy is not None else False) + ) + # Save attributes generated from CISD calculation. + hdf5_file.create_dataset( + "cisd_energy", data=(self.cisd_energy if self.cisd_energy is not None else False) + ) + hdf5_file.create_dataset( + "cisd_one_rdm", + data=(self.cisd_one_rdm if self.cisd_one_rdm is not None else False), + compression=("gzip" if self.cisd_one_rdm is not None else None), + ) + hdf5_file.create_dataset( + "cisd_two_rdm", + data=(self.cisd_two_rdm if self.cisd_two_rdm is not None else False), + compression=("gzip" if self.cisd_two_rdm is not None else None), + ) + # Save attributes generated from exact diagonalization. + hdf5_file.create_dataset( + "fci_energy", data=(self.fci_energy if self.fci_energy is not None else False) + ) + hdf5_file.create_dataset( + "fci_one_rdm", + data=(self.fci_one_rdm if self.fci_one_rdm is not None else False), + compression=("gzip" if self.fci_one_rdm is not None else None), + ) + hdf5_file.create_dataset( + "fci_two_rdm", + data=(self.fci_two_rdm if self.fci_two_rdm is not None else False), + compression=("gzip" if self.fci_two_rdm is not None else None), + ) + # Save attributes generated from CCSD calculation. + hdf5_file.create_dataset( + "ccsd_energy", data=(self.ccsd_energy if self.ccsd_energy is not None else False) + ) + hdf5_file.create_dataset( + "ccsd_single_amps", + data=(self.ccsd_single_amps if self.ccsd_single_amps is not None else False), + compression=("gzip" if self.ccsd_single_amps is not None else None), + ) + hdf5_file.create_dataset( + "ccsd_double_amps", + data=(self.ccsd_double_amps if self.ccsd_double_amps is not None else False), + compression=("gzip" if self.ccsd_double_amps is not None else None), + ) - # Remove old file first for compatibility with systems that don't allow - # rename replacement. Catching OSError for when file does not exist - # yet - try: - os.remove("{}.hdf5".format(self.filename)) - except OSError: - # Nothing to do but carry on. - pass + # Save general calculation data + key_list = list(self.general_calculations.keys()) + hdf5_file.create_dataset( + "general_calculations_keys", + data=([numpy.bytes_(key) for key in key_list] if len(key_list) > 0 else False), + ) + hdf5_file.create_dataset( + "general_calculations_values", + data=( + [self.general_calculations[key] for key in key_list] if len(key_list) > 0 else False + ), + ) - shutil.move("{}.hdf5".format(tmp_name), "{}.hdf5".format(self.filename)) + def save(self): + """Method to save the class under a systematic name.""" + # Create a temporary file in the same directory as self.filename, write data to it, then + # finally replace self.filename with the temp file. + output_dir = os.path.dirname(os.path.abspath(self.filename)) + final_filename = f"{self.filename}.hdf5" + tmp_name = None + try: + # Use delete=False for Windows compatibility (allows h5py to open the path). + with tempfile.NamedTemporaryFile( + delete=False, suffix='.hdf5', dir=output_dir + ) as tmp_file: + tmp_name = tmp_file.name + with h5py.File(tmp_file, "w") as hdf5_file: + self._write_hdf5_data(hdf5_file) + + # Determine the umask by setting it to an arbitrary value & immediately restoring it. + # Note: trying to set umask(0) can be problematic, so we use something else. + current_umask = os.umask(0o600) + os.umask(current_umask) + # Now apply the umask to the file and move it. + os.chmod(tmp_name, 0o666 & ~current_umask) + + # os.replace is atomic and works across platforms. + os.replace(tmp_name, final_filename) + tmp_name = None + finally: + # Clean up if something went wrong. + if tmp_name and os.path.exists(tmp_name): + os.remove(tmp_name) # pragma: no cover def load(self): geometry = [] - with h5py.File("{}.hdf5".format(self.filename), "r") as f: + with h5py.File(f"{self.filename}.hdf5", "r") as f: # Load geometry: data = f["geometry/atoms"] if data.shape != (()):