hoomd_gsd/
file_layer.rs

1// Copyright (c) 2024-2026 The Regents of the University of Michigan.
2// Part of hoomd-rs, released under the BSD 3-Clause License.
3
4//! Read and write data chunks in GSD files.
5//!
6//! Use [`GsdFile`] to interact with GSD files on the filesystem.
7
8use itertools::Itertools;
9use memmap2::Mmap;
10use std::{
11    cmp::{Ord, Ordering, PartialOrd},
12    collections::{HashMap, HashSet},
13    fs::File,
14    io::{self, SeekFrom, prelude::*},
15    num::TryFromIntError,
16    path::{Path, PathBuf},
17    string::FromUtf8Error,
18};
19use thiserror::Error;
20
21/// The name buffer is a multiple of `NAME_SIZE` bytes.
22const NAME_SIZE: u64 = 64;
23
24/// Number of bytes in an index entry.
25const INDEX_ENTRY_SIZE: u64 = 32;
26
27/// Index entry as a usize.
28const INDEX_ENTRY_USIZE: usize = 32;
29
30/// Number of bytes in the header.
31const HEADER_SIZE: u64 = 256;
32
33/// Header size as a usize.
34const HEADER_USIZE: usize = 256;
35
36/// Magic value identifying a GSD file
37const MAGIC_ID: u64 = 0x65df_65df_65df_65df;
38
39/// Current GSD file version
40const CURRENT_FILE_VERSION: (u16, u16) = (2, 1);
41
42/// The size of the file index in new GSD files.
43const INITIAL_INDEX_SIZE: u64 = 128;
44
45/// Initial name list size
46const INITIAL_NAME_LIST_SIZE: u64 = 1024;
47
48/// Initial maximum write buffer size.
49const INITIAL_MAXIMUM_WRITE_BUFFER_SIZE: usize = 1024 * 1024;
50
51/// Errors that can occur during while decoding file content.
52#[non_exhaustive]
53#[derive(Error, Debug)]
54pub enum DecodeError {
55    /// Encountered an IO error.
56    #[error("I/O error")]
57    IO(#[from] io::Error),
58
59    /// Invalid application string.
60    #[error("invalid `application`")]
61    InvalidApplication(#[source] FromUtf8Error),
62
63    /// Invalid schema string.
64    #[error("invalid `schema`")]
65    InvalidSchema(#[source] FromUtf8Error),
66
67    /// Invalid file identifier.
68    #[error("invalid file identifier `{0}`")]
69    InvalidFileIdentifier(u64),
70
71    /// Index outside the file.
72    #[error("index out of bounds (location={0}, length={1})")]
73    IndexOutOfBounds(u64, u64),
74
75    /// Name list outside the file.
76    #[error("name list out of bounds (location={0}, length={1})")]
77    NameListOutOfBounds(u64, u64),
78
79    /// Name list outside the file.
80    #[error("name list not terminated")]
81    NameListNotTerminated,
82
83    /// Cannot add any more chunk names.
84    #[error("too many chunk names")]
85    NameListOverflow,
86
87    /// Unsupported version.
88    #[error("unsupported GSD file version ({0}, {1})")]
89    UnsupportedVersion(u16, u16),
90
91    /// An index is not addressable.
92    #[error("file index not addressable")]
93    UnaddressableIndex(#[source] TryFromIntError),
94
95    /// File content is not addressable.
96    #[error("file content not addressable")]
97    UnaddressableContent(#[source] TryFromIntError),
98
99    /// Invalid chunk name string.
100    #[error("invalid chunk name")]
101    InvalidChunkName(#[source] FromUtf8Error),
102
103    /// Duplicate chunk name.
104    #[error("duplicate chunk name")]
105    DuplicateChunkName,
106
107    /// Corrupt index entry.
108    #[error("corrupt index entry: `{0:?}`")]
109    CorruptIndexEntry(IndexEntry),
110
111    /// Invalid type.
112    #[error("expected type {0}, found {1}")]
113    InvalidType(u8, u8),
114
115    /// Invalid number of columns.
116    #[error("expected to read read {0} columns, found {1}")]
117    InvalidColumns(usize, u32),
118
119    /// Invalid string in chunk.
120    #[error("invalid string")]
121    InvalidString(#[source] FromUtf8Error),
122}
123
124/// Errors that can occur while creating or opening a file.
125#[non_exhaustive]
126#[derive(Error, Debug)]
127pub enum OpenError {
128    /// Encountered an IO error.
129    #[error("I/O error while creating or opening `{0}`")]
130    IO(PathBuf, #[source] io::Error),
131
132    /// Cannot decode the file contents.
133    #[error("cannot decode `{0}`")]
134    Decode(PathBuf, #[source] DecodeError),
135
136    /// Name length overflow.
137    #[error("the name `{0}` is too long")]
138    NameTooLong(String),
139}
140
141/// Errors that can occur while reading from a file.
142#[non_exhaustive]
143#[derive(Error, Debug)]
144pub enum ReadError {
145    /// Chunk not found.
146    #[error("chunk `{0}` not found in frame {1}")]
147    ChunkNotFound(String, u64),
148
149    /// Cannot decode the file contents.
150    #[error("cannot decode chunk `{0}` at frame {1}")]
151    Decode(String, u64, #[source] DecodeError),
152}
153
154/// Errors that can occur while encoding data to write.
155#[non_exhaustive]
156#[derive(Error, Debug)]
157pub enum EncodeError {
158    /// Cannot add any more chunk names.
159    #[error("too many chunk names")]
160    NameListOverflow,
161
162    /// File is not writable.
163    #[error("file opened in read-only mode")]
164    NotWritable,
165
166    /// A chunk name was duplicated in a single frame.
167    #[error("chunk `{0}` has already been written in frame {1}")]
168    DuplicateChunkName(String, u64),
169
170    /// Invalid number of columns.
171    #[error("the number of columns must be greater than zero and fit in a u32, got {0}")]
172    InvalidColumns(usize),
173}
174
175/// Errors that can occur while synchronizing data to a file.
176#[non_exhaustive]
177#[derive(Error, Debug)]
178pub enum SyncError {
179    /// Encountered an I/O error.
180    #[error("I/O error")]
181    IO(#[from] io::Error),
182
183    /// File is not writable.
184    #[error("file opened in read-only mode")]
185    NotWritable,
186
187    /// Name list outside the file.
188    #[error("name list out of bounds (location={0}, length={1})")]
189    NameListOutOfBounds(u64, u64),
190
191    /// Index outside the file.
192    #[error("index out of bounds (location={0}, length={1})")]
193    IndexOutOfBounds(u64, u64),
194}
195
196/// Errors that can occur while writing to a file.
197#[non_exhaustive]
198#[derive(Error, Debug)]
199pub enum WriteError {
200    /// Cannot encode a write to the file.
201    #[error("cannot encode chunk `{0}` at frame {1}")]
202    Encode(String, u64, #[source] EncodeError),
203
204    /// Cannot synchronize to the file.
205    #[error("cannot synchronize while writing chunk `{0}` at frame {1}")]
206    Sync(String, u64, #[source] SyncError),
207}
208
209/// Iterate over arrays of size M
210struct ArrayChunks<I, const M: usize> {
211    /// The iterator over scalars
212    iter: I,
213}
214
215impl<T, I, const M: usize> Iterator for ArrayChunks<I, M>
216where
217    I: Iterator<Item = T>,
218{
219    type Item = [T; M];
220
221    fn next(&mut self) -> Option<Self::Item> {
222        self.iter.next_array::<M>()
223    }
224}
225
226impl<T, I, const M: usize> ExactSizeIterator for ArrayChunks<I, M>
227where
228    I: ExactSizeIterator<Item = T>,
229{
230    fn len(&self) -> usize {
231        self.iter.len() / M
232    }
233}
234
235/// Implement a sealed trait for each data type supported by GSD.
236///
237/// This enables generic implementations that operate on these types.
238mod private {
239    /// Seal the data type traits so that users cannot add new types.
240    pub trait Sealed {}
241
242    impl Sealed for u8 {}
243    impl Sealed for u16 {}
244    impl Sealed for u32 {}
245    impl Sealed for u64 {}
246    impl Sealed for i8 {}
247    impl Sealed for i16 {}
248    impl Sealed for i32 {}
249    impl Sealed for i64 {}
250    impl Sealed for f32 {}
251    impl Sealed for f64 {}
252}
253
254/// Data types that can be stored in chunk arrays.
255///
256/// GSD files store arrays of data of one of the following types:
257/// * [`u8`]
258/// * [`u16`]
259/// * [`u32`]
260/// * [`u64`]
261/// * [`i8`]
262/// * [`i16`]
263/// * [`i32`]
264/// * [`i64`]
265/// * [`f32`]
266/// * [`f64`]
267///
268/// The [`Type`] trait facilitates the generic methods including
269/// [`GsdFile::iter_scalars`], [`GsdFile::write_scalars`], and others. When needed,
270/// pass the type explicitly to these methods to read or write data chunks of the
271/// given type. In some cases, the Rust compiler may be able to determine the type
272/// from context.
273///
274/// # Example
275///
276/// ```
277/// use hoomd_gsd::file_layer::GsdFile;
278/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
279/// # use tempfile::tempdir;
280/// # let tmp_dir = tempdir().expect("temp dir should be created");
281/// # let path = tmp_dir.path().join("test.gsd");
282/// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
283/// gsd_file.write_scalars(
284///     "configuration/box",
285///     [10.0_f32, 20.0, 15.0, 0.0, 0.0, 0.0],
286/// )?;
287/// gsd_file.end_frame()?;
288/// gsd_file.sync_all()?;
289///
290/// let box_iter = gsd_file.iter_scalars::<f32>(0, "configuration/box")?;
291/// itertools::assert_equal(box_iter, [10.0, 20.0, 15.0, 0.0, 0.0, 0.0]);
292/// # Ok(())
293/// # }
294/// ```
295pub trait Type: private::Sealed {
296    /// Value denoting this type in the file layer.
297    #[doc(hidden)]
298    fn gsd_data_type() -> u8;
299
300    /// Convert a native endian byte slice to this type.
301    ///
302    /// This is not the proper idiomatic way to do this, but it gets the job done
303    /// with minimal lines of code.
304    #[doc(hidden)]
305    fn from_ne_byte_slice(bytes: &[u8]) -> Self;
306
307    /// Append this type to a native endian byte array.
308    ///
309    /// This is not the proper idiomatic way to do this, but it gets the job done
310    /// with minimal lines of code.
311    #[doc(hidden)]
312    fn append_ne_bytes(&self, v: &mut Vec<u8>);
313}
314
315impl Type for u8 {
316    #[inline]
317    fn gsd_data_type() -> u8 {
318        1
319    }
320    #[inline]
321    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
322        bytes[0]
323    }
324    #[inline]
325    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
326        v.extend(&self.to_ne_bytes());
327    }
328}
329impl Type for u16 {
330    #[inline]
331    fn gsd_data_type() -> u8 {
332        2
333    }
334    #[inline]
335    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
336        u16::from_ne_bytes(bytes.try_into().expect("byte slice should contain 2 bytes"))
337    }
338    #[inline]
339    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
340        v.extend(&self.to_ne_bytes());
341    }
342}
343impl Type for u32 {
344    #[inline]
345    fn gsd_data_type() -> u8 {
346        3
347    }
348    #[inline]
349    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
350        u32::from_ne_bytes(bytes.try_into().expect("byte slice should contain 4 bytes"))
351    }
352    #[inline]
353    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
354        v.extend(&self.to_ne_bytes());
355    }
356}
357impl Type for u64 {
358    #[inline]
359    fn gsd_data_type() -> u8 {
360        4
361    }
362    #[inline]
363    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
364        u64::from_ne_bytes(bytes.try_into().expect("byte slice should contain 8 bytes"))
365    }
366    #[inline]
367    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
368        v.extend(&self.to_ne_bytes());
369    }
370}
371impl Type for i8 {
372    #[inline]
373    fn gsd_data_type() -> u8 {
374        5
375    }
376    #[inline]
377    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
378        i8::from_ne_bytes(bytes.try_into().expect("byte slice should contain 1 byte"))
379    }
380    #[inline]
381    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
382        v.extend(&self.to_ne_bytes());
383    }
384}
385impl Type for i16 {
386    #[inline]
387    fn gsd_data_type() -> u8 {
388        6
389    }
390    #[inline]
391    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
392        i16::from_ne_bytes(bytes.try_into().expect("byte slice should contain 2 bytes"))
393    }
394    #[inline]
395    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
396        v.extend(&self.to_ne_bytes());
397    }
398}
399impl Type for i32 {
400    #[inline]
401    fn gsd_data_type() -> u8 {
402        7
403    }
404    #[inline]
405    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
406        i32::from_ne_bytes(bytes.try_into().expect("byte slice should contain 4 bytes"))
407    }
408    #[inline]
409    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
410        v.extend(&self.to_ne_bytes());
411    }
412}
413impl Type for i64 {
414    #[inline]
415    fn gsd_data_type() -> u8 {
416        8
417    }
418    #[inline]
419    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
420        i64::from_ne_bytes(bytes.try_into().expect("byte slice should contain 8 bytes"))
421    }
422    #[inline]
423    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
424        v.extend(&self.to_ne_bytes());
425    }
426}
427impl Type for f32 {
428    #[inline]
429    fn gsd_data_type() -> u8 {
430        9
431    }
432    #[inline]
433    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
434        f32::from_ne_bytes(bytes.try_into().expect("byte slice should contain 8 bytes"))
435    }
436    #[inline]
437    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
438        v.extend(&self.to_ne_bytes());
439    }
440}
441impl Type for f64 {
442    #[inline]
443    fn gsd_data_type() -> u8 {
444        10
445    }
446    #[inline]
447    fn from_ne_byte_slice(bytes: &[u8]) -> Self {
448        f64::from_ne_bytes(bytes.try_into().expect("byte slice should contain 8 bytes"))
449    }
450    #[inline]
451    fn append_ne_bytes(&self, v: &mut Vec<u8>) {
452        v.extend(&self.to_ne_bytes());
453    }
454}
455
456/// In memory representation of the GSD file header.
457#[derive(Debug, PartialEq)]
458pub(crate) struct GsdHeader {
459    /// Magic number marking that this is a GSD file.
460    magic: u64,
461
462    /// Location of the chunk index in the file.
463    index_location: u64,
464
465    /// Number of index entries that will fit in the space allocated.
466    index_allocated_entries: u64,
467
468    /// Location of the name list in the file.
469    namelist_location: u64,
470
471    /// Number of bytes in the namelist divided by `NAME_SIZE`.
472    namelist_allocated_entries: u64,
473
474    /// Schema version.
475    schema_version: (u16, u16),
476
477    /// GSD file format version.
478    gsd_version: (u16, u16),
479
480    /// Name of the application that generated this file.
481    application: String,
482
483    /// Name of data schema.
484    schema: String,
485}
486
487/// Details about the name list
488#[derive(Debug)]
489struct NameList {
490    /// Name/id mapping.
491    name_id: HashMap<String, u16>,
492
493    /// Number of names in the map.
494    n_names: u16,
495
496    /// Insert position in the name list.
497    insert_position: u64,
498
499    /// Name write buffer.
500    buffer: Vec<u8>,
501}
502
503/// Details about the index.
504///
505/// `n` counts the number of entries stored in the actual file.
506/// `buffer` stores index entries in memory that have not yet been written to the
507/// tile (as bytes).
508/// `pending` counts the number of entries that are pending in the current frame.
509///
510/// Pending entries are those where `write_*` has been called, but not yet
511/// `end_frame`. These should not be synced to the file to avoid having
512/// partial frames in the file.
513#[derive(Debug)]
514struct Index {
515    /// Number of index entries stored in the file.
516    n: u64,
517
518    /// Index entry buffer.
519    buffer: Vec<IndexEntry>,
520
521    /// Index entry byte buffer.
522    byte_buffer: Vec<u8>,
523
524    /// Pending entries.
525    pending: usize,
526
527    /// Chunk ids that have been written in this frame.
528    frame_names: HashSet<u16>,
529}
530
531/// Interact with GSD files on the filesystem.
532///
533/// # Overview
534///
535/// Open files with:
536/// * [`open`](GsdFile::open)
537/// * [`create`](GsdFile::create)
538/// * [`create_new`](GsdFile::create_new)
539///
540/// Access file metadata with:
541/// * [`n_frames`](GsdFile::n_frames)
542/// * [`schema`](GsdFile::schema)
543/// * [`schema_version`](GsdFile::schema_version)
544/// * [`name_id`](GsdFile::name_id)
545/// * [`find_chunk`](GsdFile::find_chunk)
546///
547/// Write data with:
548/// * [`write_scalars`](GsdFile::write_scalars)
549/// * [`write_arrays`](GsdFile::write_arrays)
550/// * [`write_string`](GsdFile::write_string)
551/// * [`end_frame`](GsdFile::end_frame)
552/// * [`sync_all`](GsdFile::sync_all)
553///
554/// Read data with:
555/// * [`iter_scalars`](GsdFile::iter_scalars)
556/// * [`iter_arrays`](GsdFile::iter_arrays)
557/// * [`read_string`](GsdFile::read_string)
558#[derive(Debug)]
559pub struct GsdFile {
560    /// The underlying file.
561    file: File,
562
563    /// The file's mode.
564    mode: Mode,
565
566    /// Parsed copy of the file's header.
567    header: GsdHeader,
568
569    /// Memory map of the file.
570    mmap: Mmap,
571
572    /// The name list.
573    name_list: NameList,
574
575    /// The index buffer.
576    index: Index,
577
578    /// The array data buffer.
579    data_buffer: Vec<u8>,
580
581    /// Record whether the data buffer has been flushed this frame.
582    data_buffer_flushed: bool,
583
584    /// Length of the file in bytes.
585    file_len: u64,
586
587    /// Index of the current buffered frame.
588    buffer_frame: u64,
589
590    /// Index of the current frame committed to the file.
591    file_frame: u64,
592
593    /// Write buffered data when more than `maximum_write_buffer_size` bytes are buffered.
594    maximum_write_buffer_size: usize,
595}
596
597/// Properties that describe a given data chunk.
598///
599/// GSD files store a set of arrays, uniquely identified by their *name* and
600/// frame*. The [`GsdFile::find_chunk`] method search for a matching index
601/// entry. The returned [`IndexEntry`] (if present) also carries information
602/// about the dimension and type of the array.
603#[derive(Clone, Debug, PartialEq, Eq)]
604pub struct IndexEntry {
605    /// Frame index of the chunk.
606    frame: u64,
607
608    /// Number of rows in the chunk.
609    n: u64,
610
611    /// Location of the chunk in the file.
612    location: u64,
613
614    /// Number of columns in the chunk.
615    m: u32,
616
617    /// Index of the chunk name in the name list.
618    id: u16,
619
620    /// Data type of the chunk.
621    data_type: u8,
622
623    /// Flags (unused)
624    flags: u8,
625}
626
627/// Data types that can be stored in chunks.
628///
629/// Provided by [`IndexEntry::data_type`].
630#[derive(Clone, Debug, Eq, PartialEq)]
631#[non_exhaustive]
632pub enum DataType {
633    /// [`u8`]
634    U8,
635    /// [`u16`]
636    U16,
637    /// [`u32`]
638    U32,
639    /// [`u64`]
640    U64,
641    /// [`i8`]
642    I8,
643    /// [`i16`]
644    I16,
645    /// [`i32`]
646    I32,
647    /// [`i64`]
648    I64,
649    /// [`f32`]
650    F32,
651    /// [`f64`]
652    F64,
653    /// [`String`]
654    String,
655}
656
657/// Choose how opened files can be accessed.
658///
659/// Pass a [`Mode`] value to [`GsdFile::open`].
660///
661/// In the [`Mode::Read`] mode, you can call methods that read the file, such as
662/// [`GsdFile::find_chunk`] and [`GsdFile::iter_scalars`]. Calling methods that
663/// write the file, such as [`GsdFile::write_scalars`] or [`GsdFile::sync_all`] will
664/// result in an error.
665///
666/// In the [`Mode::Write`] mode, you can call both read and write methods.
667#[derive(Clone, Debug, PartialEq)]
668#[non_exhaustive]
669pub enum Mode {
670    /// Read-only.
671    Read,
672    /// Allow both read and write operations.
673    Write,
674}
675
676/// Read the first u64 in a byte slice (native endian).
677///
678/// Returns the [`u64`] and the rest of the slice. Testing in Godbolt shows that
679/// repeated calls to this method can be optimized to a simple series of mov
680/// instructions.
681#[inline]
682fn extract_ne_u64(bytes: &[u8]) -> (u64, &[u8]) {
683    let (bytes, rest) = bytes.split_at(size_of::<u64>());
684    (
685        u64::from_ne_bytes(
686            bytes
687                .try_into()
688                .expect("bytes slice should contain 8 bytes"),
689        ),
690        rest,
691    )
692}
693
694/// Read the first u32 in a byte slice (native endian).
695#[inline]
696fn extract_ne_u32(bytes: &[u8]) -> (u32, &[u8]) {
697    let (bytes, rest) = bytes.split_at(size_of::<u32>());
698    (
699        u32::from_ne_bytes(
700            bytes
701                .try_into()
702                .expect("bytes slice should contain 4 bytes"),
703        ),
704        rest,
705    )
706}
707
708/// Read the first u16 in a byte slice (native endian).
709#[inline]
710fn extract_ne_u16(bytes: &[u8]) -> (u16, &[u8]) {
711    let (bytes, rest) = bytes.split_at(size_of::<u16>());
712    (
713        u16::from_ne_bytes(
714            bytes
715                .try_into()
716                .expect("bytes slice should contain 2 bytes"),
717        ),
718        rest,
719    )
720}
721
722/// Read the first null terminated string in a byte slice.
723///
724/// Returns the [`String`] without the null terminator. Also returns the rest of the
725/// slice after consuming 1 null terminator.
726#[inline]
727fn extract_null_terminated_utf8(bytes: &[u8]) -> Result<(String, &[u8]), FromUtf8Error> {
728    let null_range_end = bytes
729        .iter()
730        .position(|&c| c == b'\0')
731        .unwrap_or(bytes.len());
732    let (bytes, mut rest) = bytes.split_at(null_range_end);
733    let s = String::from_utf8(bytes.into())?;
734    if !rest.is_empty() {
735        (_, rest) = rest.split_at(1);
736    }
737    Ok((s, rest))
738}
739
740impl PartialOrd for IndexEntry {
741    #[inline]
742    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
743        Some(self.cmp(other))
744    }
745}
746impl Ord for IndexEntry {
747    #[inline]
748    fn cmp(&self, other: &Self) -> Ordering {
749        (self.frame, self.id).cmp(&(other.frame, other.id))
750    }
751}
752
753impl GsdHeader {
754    /// Parse the header.
755    fn try_from_ne_bytes(value: [u8; HEADER_USIZE]) -> Result<Self, DecodeError> {
756        // Validate the magic number first to ensure that we expect the rest
757        // of the header to be formatted appropriately. Otherwise, later
758        // error checks in this method will be examining undefined data.
759        let (magic, rest) = extract_ne_u64(&value);
760        if magic != MAGIC_ID {
761            return Err(DecodeError::InvalidFileIdentifier(magic));
762        }
763
764        let (index_location, rest) = extract_ne_u64(rest);
765        let (index_allocated_entries, rest) = extract_ne_u64(rest);
766        let (namelist_location, rest) = extract_ne_u64(rest);
767        let (namelist_allocated_entries, rest) = extract_ne_u64(rest);
768
769        // Verify that all locations are addressable in the memory map once on
770        // initialization. That way, it is safe to cast from the various byte
771        // locations to usize in the read methods.
772        usize::try_from(index_location).map_err(DecodeError::UnaddressableIndex)?;
773        usize::try_from(index_allocated_entries).map_err(DecodeError::UnaddressableIndex)?;
774        usize::try_from(namelist_location).map_err(DecodeError::UnaddressableIndex)?;
775        usize::try_from(namelist_allocated_entries).map_err(DecodeError::UnaddressableIndex)?;
776
777        let (schema_version, rest) = extract_ne_u32(rest);
778        let (gsd_version, rest) = extract_ne_u32(rest);
779        let (application, _) =
780            extract_null_terminated_utf8(rest).map_err(DecodeError::InvalidApplication)?;
781        let (schema, _) =
782            extract_null_terminated_utf8(&value[112..178]).map_err(DecodeError::InvalidSchema)?;
783
784        let schema_version = (
785            (schema_version >> 16) as u16,
786            (schema_version & 0xffff) as u16,
787        );
788        let gsd_version = ((gsd_version >> 16) as u16, (gsd_version & 0xffff) as u16);
789
790        // Cannot pretend to have a valid header unless we are also sure that
791        // the file version is one that we understand.
792        if !((2, 0)..(3, 0)).contains(&gsd_version) {
793            return Err(DecodeError::UnsupportedVersion(
794                gsd_version.0,
795                gsd_version.1,
796            ));
797        }
798
799        Ok(GsdHeader {
800            magic,
801            index_location,
802            index_allocated_entries,
803            namelist_location,
804            namelist_allocated_entries,
805            schema_version,
806            gsd_version,
807            application,
808            schema,
809        })
810    }
811
812    /// Encode the header into bytes following the GSD specification.
813    #[inline]
814    fn to_ne_bytes(&self) -> [u8; HEADER_USIZE] {
815        let mut result = [0u8; HEADER_USIZE];
816        result[0..8].copy_from_slice(&self.magic.to_ne_bytes());
817        result[8..16].copy_from_slice(&self.index_location.to_ne_bytes());
818        result[16..24].copy_from_slice(&self.index_allocated_entries.to_ne_bytes());
819        result[24..32].copy_from_slice(&self.namelist_location.to_ne_bytes());
820        result[32..40].copy_from_slice(&self.namelist_allocated_entries.to_ne_bytes());
821        let schema_version =
822            u32::from(self.schema_version.0) << 16 | u32::from(self.schema_version.1);
823        result[40..44].copy_from_slice(&schema_version.to_ne_bytes());
824        let gsd_version: u32 = u32::from(self.gsd_version.0) << 16 | u32::from(self.gsd_version.1);
825        result[44..48].copy_from_slice(&gsd_version.to_ne_bytes());
826        result[48..48 + self.application.len()].copy_from_slice(self.application.as_bytes());
827        result[112..112 + self.schema.len()].copy_from_slice(self.schema.as_bytes());
828
829        result
830    }
831}
832
833impl IndexEntry {
834    /// Trajectory frame number.
835    #[must_use]
836    #[inline]
837    pub fn frame(&self) -> u64 {
838        self.frame
839    }
840
841    /// Number of rows in the array.
842    #[must_use]
843    #[inline]
844    pub fn rows(&self) -> u64 {
845        self.n
846    }
847
848    /// Number of columns in the array.
849    #[must_use]
850    #[inline]
851    pub fn columns(&self) -> u32 {
852        self.m
853    }
854
855    /// The array's data type.
856    ///
857    /// Returns [`Some(data_type)`](Option::Some) when the type is known and
858    /// [`None`] when it is not.
859    ///
860    /// # Example
861    /// ```
862    /// use hoomd_gsd::file_layer::{DataType, IndexEntry};
863    ///
864    /// # fn do_something() { }
865    /// # fn func(index_entry: &IndexEntry) {
866    /// match index_entry.data_type() {
867    ///     Some(DataType::F32) => do_something(),
868    ///     _ => (),
869    /// }
870    /// # }
871    /// ```
872    #[must_use]
873    #[inline]
874    pub fn data_type(&self) -> Option<DataType> {
875        match self.data_type {
876            1 => Some(DataType::U8),
877            2 => Some(DataType::U16),
878            3 => Some(DataType::U32),
879            4 => Some(DataType::U64),
880            5 => Some(DataType::I8),
881            6 => Some(DataType::I16),
882            7 => Some(DataType::I32),
883            8 => Some(DataType::I64),
884            9 => Some(DataType::F32),
885            10 => Some(DataType::F64),
886            11 => Some(DataType::String),
887            _ => None,
888        }
889    }
890
891    /// Parse an index entry.
892    #[inline]
893    fn from_ne_bytes(value: [u8; 32]) -> Self {
894        let (frame, rest) = extract_ne_u64(&value);
895        let (n, rest) = extract_ne_u64(rest);
896        let (location, rest) = extract_ne_u64(rest);
897        let (m, rest) = extract_ne_u32(rest);
898        let (id, rest) = extract_ne_u16(rest);
899        let data_type = rest[0];
900        let flags = rest[1];
901        Self {
902            frame,
903            n,
904            location,
905            m,
906            id,
907            data_type,
908            flags,
909        }
910    }
911
912    /// Encode an index entry.
913    #[inline]
914    fn to_ne_bytes(&self) -> [u8; INDEX_ENTRY_USIZE] {
915        let mut result = [0u8; INDEX_ENTRY_USIZE];
916        result[0..8].copy_from_slice(&self.frame.to_ne_bytes());
917        result[8..16].copy_from_slice(&self.n.to_ne_bytes());
918        result[16..24].copy_from_slice(&self.location.to_ne_bytes());
919        result[24..28].copy_from_slice(&self.m.to_ne_bytes());
920        result[28..30].copy_from_slice(&self.id.to_ne_bytes());
921        result[30] = self.data_type;
922        result[31] = self.flags;
923
924        result
925    }
926}
927
928impl GsdFile {
929    /// Open a GSD file with the given mode.
930    ///
931    /// # Examples
932    ///
933    /// Open a file for reading:
934    /// ```
935    /// use hoomd_gsd::file_layer::{GsdFile, Mode};
936    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
937    /// # use tempfile::tempdir;
938    /// # let tmp_dir = tempdir().expect("temp dir should be created");
939    /// # let path = tmp_dir.path().join("test.gsd");
940    /// # GsdFile::create_new(path.clone(), "example", "hoomd", (1, 4))?;
941    /// let gsd_file = GsdFile::open(path, Mode::Read);
942    /// # Ok(())
943    /// # }
944    /// ```
945    ///
946    /// Open a file for both reading and writing:
947    /// ```
948    /// use hoomd_gsd::file_layer::{GsdFile, Mode};
949    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
950    /// # use tempfile::tempdir;
951    /// # let tmp_dir = tempdir().expect("temp dir should be created");
952    /// # let path = tmp_dir.path().join("test.gsd");
953    /// # GsdFile::create_new(path.clone(), "example", "hoomd", (1, 4))?;
954    /// let gsd_file = GsdFile::open(path, Mode::Write);
955    /// # Ok(())
956    /// # }
957    /// ```
958    ///
959    /// # Errors
960    ///
961    /// Returns a [`OpenError`] when any of the following occur:
962    /// * The file does not exist.
963    /// * The file is corrupt, unreadable, or there is an I/O error (see
964    ///   [`DecodeError`]).
965    #[inline]
966    pub fn open<P: AsRef<Path>>(path: P, mode: Mode) -> Result<Self, OpenError> {
967        let file = File::options()
968            .read(true)
969            .write(mode == Mode::Write)
970            .create(false)
971            .truncate(false)
972            .open(&path)
973            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
974        GsdFile::from_file(file, mode).map_err(|e| OpenError::Decode(path.as_ref().into(), e))
975    }
976
977    /// Overwrite an existing GSD file (or create a new file).
978    ///
979    /// Creates a GSD file at the given path, overwriting any file that may already
980    /// exist. When successful, return a [`GsdFile`] opened in write mode.
981    ///
982    /// Each GSD file contains metadata describing which application created the
983    /// file, the data chunk schema, and the schema's version. `application` and
984    /// `schema` are strings (and must each be less than 80 bytes). `schema_version`
985    /// is a tuple listing the major and minor version numbers. In your code,
986    /// replace `"example"` with the name of your application.
987    ///
988    /// # Example
989    ///
990    /// ```
991    /// use hoomd_gsd::file_layer::{GsdFile, Mode};
992    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
993    /// # use tempfile::tempdir;
994    /// # let tmp_dir = tempdir().expect("temp dir should be created");
995    /// # let path = tmp_dir.path().join("test.gsd");
996    /// let gsd_file = GsdFile::create(path, "example", "hoomd", (1, 4))?;
997    /// # Ok(())
998    /// # }
999    /// ```
1000    ///
1001    /// # Errors
1002    ///
1003    /// Returns a [`OpenError`] when any of the following occur:
1004    /// * The file cannot be created.
1005    /// * The file is corrupt, unreadable, or there is an I/O error (see
1006    ///   [`DecodeError`]).
1007    #[inline]
1008    pub fn create<P: AsRef<Path>>(
1009        path: P,
1010        application: &str,
1011        schema: &str,
1012        schema_version: (u16, u16),
1013    ) -> Result<Self, OpenError> {
1014        let mut file = File::options()
1015            .read(true)
1016            .write(true)
1017            .create(true)
1018            .truncate(true)
1019            .open(&path)
1020            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
1021        GsdFile::initialize_file(&mut file, &path, application, schema, schema_version)?;
1022        GsdFile::from_file(file, Mode::Write)
1023            .map_err(|e| OpenError::Decode(path.as_ref().into(), e))
1024    }
1025
1026    /// Create a new GSD file.
1027    ///
1028    /// Creates a new GSD file at the given path, returning an error when the
1029    /// path already exists. When successful, return a [`GsdFile`] opened in
1030    /// write mode.
1031    ///
1032    /// Each GSD file contains metadata describing which application created the
1033    /// file, the data chunk schema, and the schema's version. `application` and
1034    /// `schema` are strings (and must each be less than 80 bytes). `schema_version`
1035    /// is a tuple listing the major and minor version numbers. In your code,
1036    /// replace `"example"` with the name of your application.
1037    ///
1038    /// # Example
1039    ///
1040    /// ```
1041    /// use hoomd_gsd::file_layer::{GsdFile, Mode};
1042    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1043    /// # use tempfile::tempdir;
1044    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1045    /// # let path = tmp_dir.path().join("test.gsd");
1046    /// let gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1047    /// # Ok(())
1048    /// # }
1049    /// ```
1050    ///
1051    /// # Errors
1052    ///
1053    /// Returns a [`OpenError`] when any of the following occur:
1054    /// * The file cannot be created.
1055    /// * The file already exists.
1056    /// * The file is corrupt, unreadable, or there is an I/O error (see
1057    ///   [`DecodeError`]).
1058    #[inline]
1059    pub fn create_new<P: AsRef<Path>>(
1060        path: P,
1061        application: &str,
1062        schema: &str,
1063        schema_version: (u16, u16),
1064    ) -> Result<Self, OpenError> {
1065        let mut file = File::options()
1066            .read(true)
1067            .write(true)
1068            .create_new(true)
1069            .open(&path)
1070            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
1071        GsdFile::initialize_file(&mut file, &path, application, schema, schema_version)?;
1072        GsdFile::from_file(file, Mode::Write)
1073            .map_err(|e| OpenError::Decode(path.as_ref().into(), e))
1074    }
1075
1076    /// Initialize an empty file.
1077    fn initialize_file<P: AsRef<Path>>(
1078        file: &mut File,
1079        path: &P,
1080        application: &str,
1081        schema: &str,
1082        schema_version: (u16, u16),
1083    ) -> Result<(), OpenError> {
1084        let application = String::from(application);
1085        if application.len() as u64 > NAME_SIZE - 1 {
1086            return Err(OpenError::NameTooLong(application));
1087        }
1088        let schema = String::from(schema);
1089        if schema.len() as u64 > NAME_SIZE - 1 {
1090            return Err(OpenError::NameTooLong(schema));
1091        }
1092
1093        let header = GsdHeader {
1094            magic: MAGIC_ID,
1095            gsd_version: CURRENT_FILE_VERSION,
1096            application,
1097            schema,
1098            schema_version,
1099            index_location: HEADER_SIZE,
1100            index_allocated_entries: INITIAL_INDEX_SIZE,
1101            namelist_location: HEADER_SIZE + INDEX_ENTRY_SIZE * INITIAL_INDEX_SIZE,
1102            namelist_allocated_entries: INITIAL_NAME_LIST_SIZE / NAME_SIZE,
1103        };
1104
1105        file.write_all(&header.to_ne_bytes())
1106            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
1107
1108        file.set_len(HEADER_SIZE + INDEX_ENTRY_SIZE * INITIAL_INDEX_SIZE + INITIAL_NAME_LIST_SIZE)
1109            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
1110
1111        file.sync_all()
1112            .map_err(|e| OpenError::IO(path.as_ref().into(), e))?;
1113
1114        Ok(())
1115    }
1116
1117    /// Populate the fields in `GsdFile` given an open `File`.
1118    fn from_file(file: File, mode: Mode) -> Result<GsdFile, DecodeError> {
1119        let mut file = file;
1120        file.rewind()?;
1121
1122        let mut header_bytes = [0_u8; HEADER_USIZE];
1123        file.read_exact(&mut header_bytes)?;
1124        let header = GsdHeader::try_from_ne_bytes(header_bytes)?;
1125
1126        let file_len = file.seek(SeekFrom::End(0))?;
1127        // Verify that the entire file is addressable in the mmap. This makes
1128        // the usize::try_from checks in get_index will not fail.
1129        usize::try_from(file_len).map_err(DecodeError::UnaddressableContent)?;
1130
1131        // Provide the caller with helpful errors when the code would otherwise
1132        // access the memory map outside the contents of the file.
1133        if header.index_location > file_len
1134            || header.index_location + header.index_allocated_entries * INDEX_ENTRY_SIZE > file_len
1135            || header.index_allocated_entries == 0
1136        {
1137            return Err(DecodeError::IndexOutOfBounds(
1138                header.index_location,
1139                header.index_allocated_entries * INDEX_ENTRY_SIZE,
1140            ));
1141        }
1142        let namelist_range_end =
1143            header.namelist_location + header.namelist_allocated_entries * NAME_SIZE;
1144        if namelist_range_end > file_len || header.namelist_allocated_entries == 0 {
1145            return Err(DecodeError::NameListOutOfBounds(
1146                header.namelist_location,
1147                header.namelist_allocated_entries * NAME_SIZE,
1148            ));
1149        }
1150
1151        let mmap = unsafe { Mmap::map(&file)? };
1152        let last_namelist_offset =
1153            usize::try_from(namelist_range_end - 1).map_err(DecodeError::UnaddressableIndex)?;
1154        if mmap[last_namelist_offset] != 0 {
1155            return Err(DecodeError::NameListNotTerminated);
1156        }
1157
1158        let start =
1159            usize::try_from(header.namelist_location).map_err(DecodeError::UnaddressableIndex)?;
1160        let end = usize::try_from(namelist_range_end).map_err(DecodeError::UnaddressableIndex)?;
1161        let name_list = GsdFile::decode_name_map(&mmap[start..end])?;
1162        let index = Index {
1163            n: 0,
1164            buffer: Vec::new(),
1165            byte_buffer: Vec::new(),
1166            pending: 0,
1167            frame_names: HashSet::new(),
1168        };
1169
1170        let mut gsd_file = GsdFile {
1171            file,
1172            mode,
1173            header,
1174            mmap,
1175            file_len,
1176            name_list,
1177            index,
1178            data_buffer: Vec::new(),
1179            data_buffer_flushed: false,
1180            buffer_frame: 0,
1181            file_frame: 0,
1182            maximum_write_buffer_size: INITIAL_MAXIMUM_WRITE_BUFFER_SIZE,
1183        };
1184
1185        gsd_file.index.n = gsd_file.count_index_entries()?;
1186        if gsd_file.index.n > 0 {
1187            let last_entry = gsd_file.get_index(gsd_file.index.n - 1)?;
1188            gsd_file.file_frame = last_entry.frame + 1;
1189            gsd_file.buffer_frame = gsd_file.file_frame;
1190        }
1191
1192        // Silently upgrade writable files from a previous matching major version to the latest
1193        // minor version.
1194        if gsd_file.mode == Mode::Write
1195            && gsd_file.header.gsd_version.0 == CURRENT_FILE_VERSION.0
1196            && gsd_file.header.gsd_version.1 < CURRENT_FILE_VERSION.1
1197        {
1198            gsd_file.header.gsd_version.1 = CURRENT_FILE_VERSION.1;
1199
1200            gsd_file.file.seek(SeekFrom::Start(0))?;
1201            gsd_file.file.write_all(&gsd_file.header.to_ne_bytes())?;
1202        }
1203
1204        Ok(gsd_file)
1205    }
1206
1207    /// Read the initial name map from the file.
1208    fn decode_name_map(bytes: &[u8]) -> Result<NameList, DecodeError> {
1209        let mut name_id = HashMap::new();
1210        let mut bytes = bytes;
1211
1212        let mut current_id: u16 = 0;
1213        let mut insert_position: u64 = 0;
1214        while !bytes.is_empty() && bytes[0] != 0 {
1215            let (name, rest) =
1216                extract_null_terminated_utf8(bytes).map_err(DecodeError::InvalidChunkName)?;
1217            bytes = rest;
1218
1219            // The GSD spec ensures that all names in the map are always terminated.
1220            insert_position += (name.len() + 1) as u64;
1221
1222            let previous = name_id.insert(name, current_id);
1223            if previous.is_some() {
1224                return Err(DecodeError::DuplicateChunkName);
1225            }
1226
1227            if current_id == u16::MAX {
1228                return Err(DecodeError::NameListOverflow);
1229            }
1230
1231            current_id += 1;
1232        }
1233
1234        Ok(NameList {
1235            name_id,
1236            n_names: current_id,
1237            insert_position,
1238            buffer: Vec::new(),
1239        })
1240    }
1241
1242    /// Get the `id` of a name. Add a new `id` if needed.
1243    #[inline]
1244    fn get_id(&mut self, name: &str) -> Result<u16, EncodeError> {
1245        if let Some(id) = self.name_list.name_id.get(name) {
1246            return Ok(*id);
1247        }
1248
1249        let new_id = self.name_list.n_names;
1250        if new_id == u16::MAX {
1251            return Err(EncodeError::NameListOverflow);
1252        }
1253
1254        self.name_list.n_names += 1;
1255        self.name_list.buffer.extend(name.as_bytes());
1256        self.name_list.buffer.push(0);
1257        self.name_list.name_id.insert(String::from(name), new_id);
1258        Ok(new_id)
1259    }
1260
1261    /// Remap the file
1262    #[inline]
1263    #[cfg(target_os = "linux")]
1264    fn remap(&mut self) -> Result<(), io::Error> {
1265        unsafe {
1266            self.mmap.remap(
1267                self.file_len
1268                    .try_into()
1269                    .expect("file length should be validated elsewhere"),
1270                memmap2::RemapOptions::new().may_move(true),
1271            )?;
1272        }
1273        Ok(())
1274    }
1275
1276    /// Remap the file
1277    #[inline]
1278    #[cfg(not(target_os = "linux"))]
1279    fn remap(&mut self) -> Result<(), io::Error> {
1280        self.mmap = unsafe { Mmap::map(&self.file)? };
1281        Ok(())
1282    }
1283
1284    /// Access a single index entry from the memory map.
1285    #[inline]
1286    fn get_index(&self, i: u64) -> Result<IndexEntry, DecodeError> {
1287        // get_index is an internal method, assume that any caller has already
1288        // called remap() if needed. Verify this in debug builds.
1289        debug_assert!(self.mmap.len() as u64 == self.file_len);
1290
1291        let start = self.header.index_location + i * INDEX_ENTRY_SIZE;
1292        let end = start + INDEX_ENTRY_SIZE;
1293        debug_assert!(
1294            end <= self.header.index_location
1295                + self.header.index_allocated_entries * INDEX_ENTRY_SIZE
1296        );
1297
1298        let start = usize::try_from(start).map_err(DecodeError::UnaddressableIndex)?;
1299        let end = usize::try_from(end).map_err(DecodeError::UnaddressableIndex)?;
1300        let bytes: [u8; INDEX_ENTRY_USIZE] = self.mmap[start..end]
1301            .try_into()
1302            .expect("slice should always be the correct size");
1303        Ok(IndexEntry::from_ne_bytes(bytes))
1304    }
1305
1306    /// Get the size of a type given by its identifier.
1307    #[inline]
1308    fn size_of(data_type: u8) -> Option<usize> {
1309        match data_type {
1310            1 => Some(size_of::<u8>()),
1311            2 => Some(size_of::<u16>()),
1312            3 => Some(size_of::<u32>()),
1313            4 => Some(size_of::<u64>()),
1314            5 => Some(size_of::<i8>()),
1315            6 => Some(size_of::<i16>()),
1316            7 => Some(size_of::<i32>()),
1317            8 => Some(size_of::<i64>()),
1318            9 => Some(size_of::<f32>()),
1319            10 => Some(size_of::<f64>()),
1320            11 => Some(1),
1321            _ => None,
1322        }
1323    }
1324
1325    /// Test if an index entry is valid in the context of the file.
1326    fn is_entry_valid(&self, entry: &IndexEntry) -> bool {
1327        match GsdFile::size_of(entry.data_type) {
1328            Some(element_size) => {
1329                let total_size = entry.n * u64::from(entry.m) * element_size as u64;
1330                assert!(entry.location + total_size <= self.file_len);
1331                if entry.location + total_size > self.file_len {
1332                    return false;
1333                }
1334            }
1335            None => return false,
1336        }
1337
1338        // is_entry_valid is used before the file is fully loaded and the number
1339        // of frames is not yet known. Check that the frame is at least within
1340        // the number of allocated index entries.
1341        assert!(entry.frame < self.header.index_allocated_entries);
1342        if entry.frame >= self.header.index_allocated_entries {
1343            return false;
1344        }
1345
1346        // There is no need to include buffered names here because
1347        // is_entry_valid is only called on file open, not after any write_
1348        // methods.
1349        assert!(entry.id < self.name_list.n_names);
1350        if entry.id >= self.name_list.n_names {
1351            return false;
1352        }
1353
1354        if entry.flags != 0 {
1355            return false;
1356        }
1357
1358        true
1359    }
1360
1361    /// Determine the number of frames in the file.
1362    fn count_index_entries(&self) -> Result<u64, DecodeError> {
1363        let first_entry = self.get_index(0)?;
1364        if first_entry.location != 0 && !self.is_entry_valid(&first_entry) {
1365            return Err(DecodeError::CorruptIndexEntry(first_entry));
1366        }
1367
1368        if first_entry.location == 0 {
1369            return Ok(0);
1370        }
1371
1372        // determine the number of index entries (marked by location = 0)
1373        // binary search for the first index entry with location 0
1374        let mut l: u64 = 0;
1375        let mut r = self.header.index_allocated_entries;
1376
1377        // progressively narrow the search window by halves
1378        loop {
1379            let m = l.midpoint(r);
1380
1381            // file is corrupt if any index entry is invalid or frame does not increase
1382            // monotonically
1383            let entry_m = self.get_index(m)?;
1384            let entry_l = self.get_index(l)?;
1385
1386            if entry_m.location != 0 {
1387                if !self.is_entry_valid(&entry_m) || entry_m.frame < entry_l.frame {
1388                    return Err(DecodeError::CorruptIndexEntry(entry_m));
1389                }
1390                l = m;
1391            } else {
1392                r = m;
1393            }
1394
1395            if r - l == 1 {
1396                break;
1397            }
1398        }
1399
1400        // this finds R = the first index entry with location = 0
1401        Ok(r)
1402    }
1403
1404    /// Find a chunk in the index.
1405    ///
1406    /// Returns [`Some(index_entry)`](Option::Some) when the data chunk is present
1407    /// in the file and [`None`] when it is not.
1408    ///
1409    /// # Example
1410    ///
1411    /// ```
1412    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1413    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1414    /// # use tempfile::tempdir;
1415    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1416    /// # let path = tmp_dir.path().join("test.gsd");
1417    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1418    /// gsd_file.write_scalars("configuration/step", [100_000_u64])?;
1419    /// gsd_file.end_frame()?;
1420    /// gsd_file.sync_all()?;
1421    ///
1422    /// let configuration_box = gsd_file.find_chunk(0, "configuration/box");
1423    /// assert_eq!(configuration_box, None);
1424    ///
1425    /// let step = gsd_file.find_chunk(0, "configuration/step");
1426    /// assert!(step.is_some());
1427    /// if let Some(index) = step {
1428    ///     assert_eq!(index.frame(), 0);
1429    ///     assert_eq!(index.rows(), 1);
1430    ///     assert_eq!(index.columns(), 1);
1431    ///     assert_eq!(index.data_type(), Some(DataType::U64));
1432    /// }
1433    /// # Ok(())
1434    /// # }
1435    /// ```
1436    #[must_use]
1437    pub fn find_chunk(&self, frame: u64, name: &str) -> Option<IndexEntry> {
1438        if frame >= self.file_frame || self.index.n == 0 {
1439            return None;
1440        }
1441
1442        let id = match self.name_list.name_id.get(name) {
1443            None => return None,
1444            Some(id) => *id,
1445        };
1446
1447        // binary search for the index entry
1448        let mut l: u64 = 0;
1449        let mut r = self.index.n - 1;
1450
1451        while l <= r {
1452            let m = l.midpoint(r);
1453
1454            // We can map an error to None here because the unaddressable index error
1455            // would have previously been caught on open or sync.
1456            if let Ok(index_entry_m) = self.get_index(m) {
1457                match (index_entry_m.frame, index_entry_m.id).cmp(&(frame, id)) {
1458                    Ordering::Less => l = m + 1,
1459                    Ordering::Greater => r = m - 1,
1460                    Ordering::Equal => return Some(index_entry_m),
1461                }
1462            } else {
1463                return None;
1464            }
1465        }
1466        None
1467    }
1468
1469    /// Iterate over an array of scalars in the given frame.
1470    ///
1471    /// Returns [`Ok(iterator)`](Result::Ok) when the data chunk is present in the
1472    /// file and `Err(`[`ReadError::ChunkNotFound`]`)` when it is not. Collect the
1473    /// iterator into a [`Vec`] to make a copy of the data, or process the data in
1474    /// place while iterating.
1475    ///
1476    /// Data written to a file is not available for reading until the file
1477    /// is closed or after a call to [`sync_all`](GsdFile::sync_all).
1478    ///
1479    /// # Example
1480    ///
1481    /// ```
1482    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1483    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1484    /// # use tempfile::tempdir;
1485    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1486    /// # let path = tmp_dir.path().join("test.gsd");
1487    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1488    /// gsd_file.write_scalars(
1489    ///     "configuration/box",
1490    ///     [10.0_f32, 20.0, 15.0, 0.0, 0.0, 0.0],
1491    /// )?;
1492    /// gsd_file.end_frame()?;
1493    /// gsd_file.sync_all()?;
1494    ///
1495    /// let box_iter = gsd_file.iter_scalars::<f32>(0, "configuration/box")?;
1496    /// let box_vec = box_iter.collect::<Vec<_>>();
1497    /// assert_eq!(box_vec, vec![10.0_f32, 20.0, 15.0, 0.0, 0.0, 0.0]);
1498    /// # Ok(())
1499    /// # }
1500    /// ```
1501    ///
1502    /// # Errors
1503    ///
1504    /// Returns a [`ReadError`] when any of the following occur:
1505    /// * A chunk by the given `name` is not present in the given `frame`.
1506    /// * The data type stored in the file does not match `T`.
1507    /// * The array stored in the file does not have dimensions `N x 1`.
1508    /// * The file is corrupt, unreadable, or there is an I/O error (see
1509    ///   [`DecodeError`]).
1510    pub fn iter_scalars<T: Type>(
1511        &self,
1512        frame: u64,
1513        name: &str,
1514    ) -> Result<impl ExactSizeIterator<Item = T> + use<'_, T>, ReadError> {
1515        let Some(index_entry) = self.find_chunk(frame, name) else {
1516            return Err(ReadError::ChunkNotFound(name.into(), frame));
1517        };
1518
1519        if index_entry.m as usize != 1 {
1520            return Err(ReadError::Decode(
1521                name.into(),
1522                frame,
1523                DecodeError::InvalidColumns(1, index_entry.m),
1524            ));
1525        }
1526
1527        if index_entry.data_type != T::gsd_data_type() {
1528            return Err(ReadError::Decode(
1529                name.into(),
1530                frame,
1531                DecodeError::InvalidType(T::gsd_data_type(), index_entry.data_type),
1532            ));
1533        }
1534
1535        self.read_details(&index_entry)
1536            .map_err(|e| ReadError::Decode(name.into(), frame, e))
1537    }
1538
1539    /// Iterate over an array of arrays in the given frame.
1540    ///
1541    /// Returns [`Ok(iterator)`](Result::Ok) when the data chunk is present in the
1542    /// file and `Err(`[`ReadError::ChunkNotFound`]`)` when it is not. Collect the
1543    /// iterator into a [`Vec`] to make a copy of the data, or process the data in
1544    /// place while iterating.
1545    ///
1546    /// Data written to a file is not available for reading until the file
1547    /// is closed or after a call to [`sync_all`](GsdFile::sync_all).
1548    ///
1549    /// # Example
1550    ///
1551    /// ```
1552    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1553    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1554    /// # use tempfile::tempdir;
1555    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1556    /// # let path = tmp_dir.path().join("test.gsd");
1557    /// let position = vec![[5.0_f32, 3.0, -4.0], [-2.0, 3.0, -6.0]];
1558    ///
1559    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1560    /// gsd_file.write_arrays("particles/position", position.iter().copied())?;
1561    /// gsd_file.end_frame()?;
1562    /// gsd_file.sync_all()?;
1563    ///
1564    /// let position_iter =
1565    ///     gsd_file.iter_arrays::<f32, 3>(0, "particles/position")?;
1566    /// let position_vec = position_iter.collect::<Vec<_>>();
1567    /// assert_eq!(position_vec, position);
1568    /// # Ok(())
1569    /// # }
1570    /// ```
1571    ///
1572    /// # Errors
1573    ///
1574    /// Returns a [`ReadError`] when any of the following occur:
1575    /// * A chunk by the given `name` is not present in the given `frame`.
1576    /// * The data type stored in the file does not match `T`.
1577    /// * The array stored in the file does not have dimensions `N x M`.
1578    /// * The file is corrupt, unreadable, or there is an I/O error (see
1579    ///   [`DecodeError`]).
1580    pub fn iter_arrays<T: Type, const M: usize>(
1581        &self,
1582        frame: u64,
1583        name: &str,
1584    ) -> Result<impl ExactSizeIterator<Item = [T; M]> + use<'_, T, M>, ReadError> {
1585        let Some(index_entry) = self.find_chunk(frame, name) else {
1586            return Err(ReadError::ChunkNotFound(name.into(), frame));
1587        };
1588
1589        if index_entry.m as usize != M {
1590            return Err(ReadError::Decode(
1591                name.into(),
1592                frame,
1593                DecodeError::InvalidColumns(M, index_entry.m),
1594            ));
1595        }
1596
1597        if index_entry.data_type != T::gsd_data_type() {
1598            return Err(ReadError::Decode(
1599                name.into(),
1600                frame,
1601                DecodeError::InvalidType(T::gsd_data_type(), index_entry.data_type),
1602            ));
1603        }
1604
1605        Ok(ArrayChunks {
1606            iter: self
1607                .read_details::<T>(&index_entry)
1608                .map_err(|e| ReadError::Decode(name.into(), frame, e))?,
1609        })
1610    }
1611
1612    /// Read a string in the given frame.
1613    ///
1614    /// Returns [`Ok(String)`](Result::Ok) when the data chunk is present
1615    /// in the file and `Err(`[`ReadError::ChunkNotFound`]`)` when it is not.
1616    ///
1617    /// Data written to a file is not available for reading until the file
1618    /// is closed or after a call to [`sync_all`](GsdFile::sync_all).
1619    ///
1620    /// # Example
1621    ///
1622    /// ```
1623    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1624    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1625    /// # use tempfile::tempdir;
1626    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1627    /// # let path = tmp_dir.path().join("test.gsd");
1628    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1629    /// gsd_file.write_string("log/my_string", "Hello, GSD.")?;
1630    /// gsd_file.end_frame()?;
1631    /// gsd_file.sync_all()?;
1632    ///
1633    /// let string = gsd_file.read_string(0, "log/my_string")?;
1634    /// assert_eq!(string, "Hello, GSD.");
1635    /// # Ok(())
1636    /// # }
1637    /// ```
1638    ///
1639    /// # Errors
1640    ///
1641    /// Returns a [`ReadError`] when any of the following occur:
1642    /// * A chunk by the given `name` is not present in the given `frame`.
1643    /// * The data type stored in the file is not a UTF-8 string.
1644    /// * The array stored in the file does not have dimensions `N x 1`.
1645    /// * The file is corrupt, unreadable, or there is an I/O error (see
1646    ///   [`DecodeError`]).
1647    pub fn read_string(&self, frame: u64, name: &str) -> Result<String, ReadError> {
1648        let Some(index_entry) = self.find_chunk(frame, name) else {
1649            return Err(ReadError::ChunkNotFound(name.into(), frame));
1650        };
1651
1652        if index_entry.m as usize != 1 {
1653            return Err(ReadError::Decode(
1654                name.into(),
1655                frame,
1656                DecodeError::InvalidColumns(1, index_entry.m),
1657            ));
1658        }
1659
1660        if index_entry.data_type != 11 {
1661            return Err(ReadError::Decode(
1662                name.into(),
1663                frame,
1664                DecodeError::InvalidType(11, index_entry.data_type),
1665            ));
1666        }
1667
1668        let array = self
1669            .read_details::<u8>(&index_entry)
1670            .map_err(|e| ReadError::Decode(name.into(), frame, e))?
1671            .collect::<Vec<_>>();
1672
1673        String::from_utf8(array)
1674            .map_err(|e| ReadError::Decode(name.into(), frame, DecodeError::InvalidString(e)))
1675    }
1676
1677    /// Implement the details of `iter_scalars` and `iter_arrays`.
1678    fn read_details<T: Type>(
1679        &self,
1680        index_entry: &IndexEntry,
1681    ) -> Result<impl ExactSizeIterator<Item = T> + use<'_, T>, DecodeError> {
1682        let n_elements = index_entry.n * u64::from(index_entry.m);
1683        let n_bytes = usize::try_from(n_elements * size_of::<T>() as u64)
1684            .map_err(DecodeError::UnaddressableContent)?;
1685
1686        let location =
1687            usize::try_from(index_entry.location).map_err(DecodeError::UnaddressableContent)?;
1688
1689        if index_entry.location == 0 {
1690            return Err(DecodeError::CorruptIndexEntry(index_entry.clone()));
1691        }
1692
1693        debug_assert!(location + n_bytes <= self.mmap.len());
1694
1695        Ok(self.mmap[location..location + n_bytes]
1696            .chunks(size_of::<T>())
1697            .map(T::from_ne_byte_slice))
1698    }
1699
1700    /// Append an array of scalar values to the current frame.
1701    ///
1702    /// `write_scalars` writes one-dimensional array data to a named chunk in the
1703    /// current frame of the GSD file. Call [`end_frame`](GsdFile::end_frame) to
1704    /// complete the frame and start the next.
1705    ///
1706    /// <div class="warning">
1707    ///
1708    /// Dropping a [`GsdFile`] will also drop any pending data chunks in incomplete
1709    /// frames.
1710    ///
1711    /// </div>
1712    ///
1713    /// # Example
1714    ///
1715    /// ```
1716    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1717    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1718    /// # use tempfile::tempdir;
1719    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1720    /// # let path = tmp_dir.path().join("test.gsd");
1721    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1722    /// gsd_file.write_scalars(
1723    ///     "configuration/box",
1724    ///     [10.0_f32, 20.0, 15.0, 0.0, 0.0, 0.0],
1725    /// )?;
1726    /// gsd_file.end_frame()?;
1727    /// # Ok(())
1728    /// # }
1729    /// ```
1730    ///
1731    /// # Errors
1732    ///
1733    /// Returns a [`WriteError`] when any of the following occur:
1734    /// * The file is not opened in a write mode.
1735    /// * There are no available chunk identifiers.
1736    /// * A chunk with the same name has already been written in this frame.
1737    /// * There is an I/O error while writing to the file.
1738    pub fn write_scalars<T, I>(&mut self, name: &str, data: I) -> Result<(), WriteError>
1739    where
1740        T: Type,
1741        I: IntoIterator<Item = T>,
1742    {
1743        let data = data.into_iter();
1744
1745        self.write_details(name, 1, T::gsd_data_type(), |buffer: &mut Vec<u8>| -> u64 {
1746            let mut len = 0;
1747            for value in data {
1748                value.append_ne_bytes(buffer);
1749                len += 1;
1750            }
1751            len
1752        })
1753    }
1754
1755    /// Append an array of array values to the current frame.
1756    ///
1757    /// `write_arrays` writes two-dimensional array data to a named chunk in the
1758    /// current frame of the GSD file. Call [`end_frame`](GsdFile::end_frame) to
1759    /// complete the frame and start the next.
1760    ///
1761    /// <div class="warning">
1762    ///
1763    /// Dropping a [`GsdFile`] will also drop any pending data chunks in incomplete
1764    /// frames.
1765    ///
1766    /// </div>
1767    ///
1768    /// # Example
1769    ///
1770    /// ```
1771    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1772    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1773    /// # use tempfile::tempdir;
1774    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1775    /// # let path = tmp_dir.path().join("test.gsd");
1776    /// let position = vec![[5.0_f32, 3.0, -4.0], [-2.0, 3.0, -6.0]];
1777    ///
1778    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1779    /// gsd_file.write_arrays("particles/position", position.iter().copied())?;
1780    /// gsd_file.end_frame()?;
1781    /// # Ok(())
1782    /// # }
1783    /// ```
1784    ///
1785    /// # Errors
1786    ///
1787    /// Returns a [`WriteError`] when any of the following occur:
1788    /// * The file is not opened i* n a write mode.
1789    /// * There are no available chunk identifiers.
1790    /// * A chunk with the same name has already been written in this frame.
1791    /// * `M` is 0.
1792    /// * `M` cannot be represented by a `u32`.
1793    pub fn write_arrays<T, I, const M: usize>(
1794        &mut self,
1795        name: &str,
1796        data: I,
1797    ) -> Result<(), WriteError>
1798    where
1799        T: Type,
1800        I: IntoIterator<Item = [T; M]>,
1801    {
1802        if M == 0 {
1803            return Err(WriteError::Encode(
1804                name.into(),
1805                self.buffer_frame,
1806                EncodeError::InvalidColumns(M),
1807            ));
1808        }
1809
1810        let columns = u32::try_from(M).or(Err(WriteError::Encode(
1811            name.into(),
1812            self.buffer_frame,
1813            EncodeError::InvalidColumns(M),
1814        )))?;
1815
1816        let data = data.into_iter();
1817
1818        self.write_details(
1819            name,
1820            columns,
1821            T::gsd_data_type(),
1822            |buffer: &mut Vec<u8>| -> u64 {
1823                let mut len = 0;
1824                for element in data {
1825                    len += 1;
1826                    for value in element {
1827                        value.append_ne_bytes(buffer);
1828                    }
1829                }
1830                len
1831            },
1832        )
1833    }
1834
1835    /// Append a string to the current frame.
1836    ///
1837    /// `write_string` writes a UTF-8 string to a named chunk in the*  current frame
1838    /// of the GSD file. Call [`end_frame`](GsdFile::end_frame) to complete the
1839    /// frame and start the next.
1840    ///
1841    /// <div class="warning">
1842    ///
1843    /// Dropping a [`GsdFile`] will also drop any pending data chunks in incomplete
1844    /// frames.
1845    ///
1846    /// </div>
1847    ///
1848    /// # Example
1849    ///
1850    /// ```
1851    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1852    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1853    /// # use tempfile::tempdir;
1854    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1855    /// # let path = tmp_dir.path().join("test.gsd");
1856    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1857    /// gsd_file.write_string("log/my_string", "Hello, GSD.")?;
1858    /// gsd_file.end_frame()?;
1859    /// # Ok(())
1860    /// # }
1861    /// ```
1862    ///
1863    /// # Errors
1864    ///
1865    /// Returns a [`WriteError`] when any of the following occur:
1866    /// * The file is not opened in a write mode.
1867    /// * There are no available chunk identifiers.
1868    /// * A chunk with the same name has already been written in this frame.
1869    pub fn write_string(&mut self, name: &str, data: &str) -> Result<(), WriteError> {
1870        let data = data.as_bytes();
1871
1872        self.write_details(name, 1, 11, |buffer: &mut Vec<u8>| -> u64 {
1873            buffer.extend(data);
1874            data.len() as u64
1875        })
1876    }
1877
1878    /// Common code used in all write_ methods.
1879    ///
1880    /// The `append` callable must return the number of rows added to the buffer.
1881    fn write_details<F>(
1882        &mut self,
1883        name: &str,
1884        columns: u32,
1885        data_type: u8,
1886        append: F,
1887    ) -> Result<(), WriteError>
1888    where
1889        F: FnOnce(&mut Vec<u8>) -> u64,
1890    {
1891        if self.mode != Mode::Write {
1892            return Err(WriteError::Encode(
1893                name.into(),
1894                self.buffer_frame,
1895                EncodeError::NotWritable,
1896            ));
1897        }
1898
1899        let location = self.file_len + self.data_buffer.len() as u64;
1900
1901        let id = self
1902            .get_id(name)
1903            .map_err(|e| WriteError::Encode(name.into(), self.buffer_frame, e))?;
1904
1905        if !self.index.frame_names.insert(id) {
1906            return Err(WriteError::Encode(
1907                name.into(),
1908                self.buffer_frame,
1909                EncodeError::DuplicateChunkName(name.into(), self.buffer_frame),
1910            ));
1911        }
1912
1913        // This implementation is a departure from the GSD C implementation
1914        // which would eagerly write large arrays directly to the file before
1915        // flushing the previous entries. That complicated the code and
1916        // required two index buffers that needed to be patched up.
1917        // This implementation always appends data to the write buffer
1918        // (via the append call).
1919        //
1920        // The Rust implementation always writes full data chunks into the
1921        // buffer, but flushes the buffer first in, first out. That way, no
1922        // index entries need to be patched up. When the buffer is flushed here,
1923        // we do need to flag to `end_frame` that `sync_all` needs to be called.
1924        let rows = append(&mut self.data_buffer);
1925
1926        // write_* doesn't actually write any data to the file itself. For
1927        // performance, it buffers all writes. Above, `get_id` appended any
1928        // new names to `self.name_list.buffer`. Now, `write_scalars` needs to
1929        // construct the index entry and put the bytes of the array in the data
1930        // buffer. `sync_all` will write the data buffer first, so all index
1931        // entries can be constructed with the known location:
1932        // file_len + currently buffered bytes.
1933        let index_entry = IndexEntry {
1934            frame: self.buffer_frame,
1935            n: rows,
1936            m: columns,
1937            location,
1938            id,
1939            data_type,
1940            flags: 0,
1941        };
1942
1943        self.index.buffer.push(index_entry);
1944        self.index.pending += 1;
1945
1946        if self.data_buffer.len() >= self.maximum_write_buffer_size {
1947            self.flush_data()
1948                .map_err(|e| WriteError::Sync(name.into(), self.buffer_frame, e))?;
1949            self.data_buffer_flushed = true;
1950        }
1951
1952        Ok(())
1953    }
1954
1955    /// Complete the current frame.
1956    ///
1957    /// Commits previous calls to `write_*` methods to the current frame. Calls to
1958    /// `write_*` methods following `end_frame` will write to the next frame.
1959    ///
1960    /// Calling `end_frame` does **not** ensure that all buffered data is synced to
1961    /// the filesystem. Call [`sync_all`](GsdFile::sync_all) to do so.
1962    ///
1963    /// # Example
1964    ///
1965    /// ```
1966    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
1967    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
1968    /// # use tempfile::tempdir;
1969    /// # let tmp_dir = tempdir().expect("temp dir should be created");
1970    /// # let path = tmp_dir.path().join("test.gsd");
1971    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
1972    /// gsd_file.write_scalars("configuration/step", [100_000_u64])?;
1973    /// gsd_file.end_frame()?;
1974    ///
1975    /// gsd_file.write_scalars("configuration/step", [200_000_u64])?;
1976    /// gsd_file.end_frame()?;
1977    ///
1978    /// gsd_file.write_scalars("configuration/step", [300_000_u64])?;
1979    /// gsd_file.end_frame()?;
1980    /// # Ok(())
1981    /// # }
1982    /// ```
1983    ///
1984    /// # Errors
1985    ///
1986    /// Returns a [`EncodeError`] when any of the following occur:
1987    /// * The file is not opened in a write mode.
1988    pub fn end_frame(&mut self) -> Result<(), EncodeError> {
1989        if self.mode != Mode::Write {
1990            return Err(EncodeError::NotWritable);
1991        }
1992
1993        self.buffer_frame += 1;
1994        self.index.pending = 0;
1995        self.index.frame_names.clear();
1996
1997        Ok(())
1998    }
1999
2000    #[inline]
2001    #[must_use]
2002    /// The number of frames *written to the file*.
2003    ///
2004    /// `n_frames` returns the number of frames *available to read* from the file.
2005    /// Each call to [`end_frame`](GsdFile::end_frame) increments the number of
2006    /// frames, but they are not written to the file until it is closed or by a call
2007    /// to [`sync_all`](GsdFile::sync_all).
2008    ///
2009    /// ```
2010    /// use hoomd_gsd::file_layer::{DataType, GsdFile};
2011    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2012    /// # use tempfile::tempdir;
2013    /// # let tmp_dir = tempdir().expect("temp dir should be created");
2014    /// # let path = tmp_dir.path().join("test.gsd");
2015    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
2016    /// gsd_file.write_scalars("configuration/step", [100_000_u64])?;
2017    /// gsd_file.end_frame()?;
2018    ///
2019    /// gsd_file.write_scalars("configuration/step", [200_000_u64])?;
2020    /// gsd_file.end_frame()?;
2021    ///
2022    /// gsd_file.write_scalars("configuration/step", [300_000_u64])?;
2023    /// gsd_file.end_frame()?;
2024    /// gsd_file.sync_all()?;
2025    ///
2026    /// let n_frames = gsd_file.n_frames();
2027    /// assert_eq!(n_frames, 3);
2028    /// # Ok(())
2029    /// # }
2030    /// ```
2031    pub fn n_frames(&self) -> u64 {
2032        self.file_frame
2033    }
2034
2035    #[inline]
2036    #[must_use]
2037    /// Provide the mapping from data chunk names to ids.
2038    ///
2039    /// The mapping includes keys for all data chunk names in the file.
2040    ///
2041    /// # Example
2042    ///
2043    /// ```
2044    /// use hoomd_gsd::file_layer::GsdFile;
2045    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2046    /// # use tempfile::tempdir;
2047    /// # let tmp_dir = tempdir().expect("temp dir should be created");
2048    /// # let path = tmp_dir.path().join("test.gsd");
2049    /// let position = vec![[5.0_f32, 3.0, -4.0], [-2.0, 3.0, -6.0]];
2050    ///
2051    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
2052    /// gsd_file.write_scalars("configuration/step", [100_000_u64])?;
2053    /// gsd_file.write_scalars(
2054    ///     "configuration/box",
2055    ///     [10.0_f32, 20.0, 15.0, 0.0, 0.0, 0.0],
2056    /// )?;
2057    /// gsd_file.write_arrays("particles/position", position.iter().copied())?;
2058    /// gsd_file.end_frame()?;
2059    ///
2060    /// let name_id = gsd_file.name_id();
2061    /// assert!(name_id.contains_key("configuration/step"));
2062    /// assert!(name_id.contains_key("configuration/box"));
2063    /// assert!(name_id.contains_key("particles/position"));
2064    /// assert!(!name_id.contains_key("particles/orientation"));
2065    /// # Ok(())
2066    /// # }
2067    /// ```
2068    pub fn name_id(&self) -> &HashMap<String, u16> {
2069        &self.name_list.name_id
2070    }
2071
2072    /// The name of the application used to write the file.
2073    ///
2074    /// # Example
2075    ///
2076    /// ```
2077    /// use hoomd_gsd::file_layer::GsdFile;
2078    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2079    /// # use tempfile::tempdir;
2080    /// # let tmp_dir = tempdir().expect("temp dir should be created");
2081    /// # let path = tmp_dir.path().join("test.gsd");
2082    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
2083    ///
2084    /// let application = gsd_file.application();
2085    /// assert_eq!(application, "example");
2086    /// # Ok(())
2087    /// # }
2088    /// ```
2089    #[inline]
2090    #[must_use]
2091    pub fn application(&self) -> &str {
2092        &self.header.application
2093    }
2094
2095    /// The schema that describes the expected data chunks.
2096    ///
2097    /// # Example
2098    ///
2099    /// ```
2100    /// use hoomd_gsd::file_layer::GsdFile;
2101    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2102    /// # use tempfile::tempdir;
2103    /// # let tmp_dir = tempdir().expect("temp dir should be created");
2104    /// # let path = tmp_dir.path().join("test.gsd");
2105    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
2106    ///
2107    /// let schema = gsd_file.schema();
2108    /// assert_eq!(schema, "hoomd");
2109    /// # Ok(())
2110    /// # }
2111    /// ```
2112    #[inline]
2113    #[must_use]
2114    pub fn schema(&self) -> &str {
2115        &self.header.schema
2116    }
2117
2118    /// The schema version (major, minor).
2119    ///
2120    /// # Example
2121    ///
2122    /// ```
2123    /// use hoomd_gsd::file_layer::GsdFile;
2124    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
2125    /// # use tempfile::tempdir;
2126    /// # let tmp_dir = tempdir().expect("temp dir should be created");
2127    /// # let path = tmp_dir.path().join("test.gsd");
2128    /// let mut gsd_file = GsdFile::create_new(path, "example", "hoomd", (1, 4))?;
2129    ///
2130    /// let schema_version = gsd_file.schema_version();
2131    /// assert_eq!(schema_version, (1, 4));
2132    /// # Ok(())
2133    /// # }
2134    /// ```
2135    #[inline]
2136    #[must_use]
2137    pub fn schema_version(&self) -> (u16, u16) {
2138        self.header.schema_version
2139    }
2140
2141    #[inline]
2142    #[must_use]
2143    /// The maximum number of bytes to store in the write buffer.
2144    pub fn maximum_write_buffer_size(&self) -> usize {
2145        self.maximum_write_buffer_size
2146    }
2147
2148    #[inline]
2149    #[must_use]
2150    /// Mutable reference to the maximum number of bytes to store in the write buffer.
2151    pub fn maximum_write_buffer_size_mut(&mut self) -> &mut usize {
2152        &mut self.maximum_write_buffer_size
2153    }
2154
2155    /// Flush data buffer to the filesystem.
2156    ///
2157    /// Returns true when any data was written to the file.
2158    fn flush_data(&mut self) -> Result<bool, SyncError> {
2159        if self.data_buffer.is_empty() {
2160            Ok(false)
2161        } else {
2162            let current_len = self.file.seek(SeekFrom::End(0))?;
2163            debug_assert_eq!(current_len, self.file_len);
2164            self.file.write_all(&self.data_buffer)?;
2165            self.file_len += self.data_buffer.len() as u64;
2166            self.data_buffer.clear();
2167            Ok(true)
2168        }
2169    }
2170
2171    /// Flush the name buffer to the filesystem.
2172    ///
2173    /// Returns true when any data was written to the file.
2174    fn flush_names(&mut self) -> Result<bool, SyncError> {
2175        if self.name_list.buffer.is_empty() {
2176            Ok(false)
2177        } else {
2178            if self.name_list.insert_position + self.name_list.buffer.len() as u64
2179                > self.header.namelist_allocated_entries * NAME_SIZE
2180            {
2181                self.expand_name_list_to(
2182                    self.name_list.insert_position + self.name_list.buffer.len() as u64,
2183                )?;
2184            }
2185            debug_assert!(
2186                self.name_list.insert_position + self.name_list.buffer.len() as u64
2187                    <= self.header.namelist_allocated_entries * NAME_SIZE
2188            );
2189            self.file.seek(SeekFrom::Start(
2190                self.header.namelist_location + self.name_list.insert_position,
2191            ))?;
2192            self.file.write_all(&self.name_list.buffer)?;
2193            self.name_list.insert_position += self.name_list.buffer.len() as u64;
2194            self.name_list.buffer.clear();
2195            Ok(true)
2196        }
2197    }
2198
2199    /// Write buffered data to the filesystem.
2200    ///
2201    /// `sync_all` ensures that the data and indices for all complete frames is
2202    /// written to the filesystem.
2203    ///
2204    /// In most cases, callers should not call `sync_all` manually. It will be
2205    /// called automatically when a [`GsdFile`] is dropped. Call `sync_all` only
2206    /// when you need to read data arrays written in previous frames or when you
2207    /// want to ensure that all data up to a specific frame are present in the file.
2208    ///
2209    /// # Errors
2210    ///
2211    /// Returns a [`SyncError`] when any of the following occur:
2212    /// * The file is not opened in a write mode.
2213    /// * An I/O error writing to the file.
2214    pub fn sync_all(&mut self) -> Result<(), SyncError> {
2215        if self.mode != Mode::Write {
2216            return Err(SyncError::NotWritable);
2217        }
2218
2219        let mut need_remap = false;
2220
2221        // Write the data buffer to the file first. Should any error occur here,
2222        // the file might have some extra bytes at the end, but the index of
2223        // written data so far will be correct.
2224        if self.flush_data()? || self.data_buffer_flushed {
2225            need_remap = true;
2226            self.data_buffer_flushed = false;
2227            self.file.sync_all()?;
2228        }
2229
2230        // Write the new name next to ensure that the references in the index
2231        // will be consistent with the names.
2232        self.flush_names()?;
2233
2234        // Now write all the non-pending index entries.
2235        // Index entries must be sorted by (frame, id) to be valid. Given that
2236        // pending index entries are guaranteed to have `frame+1`, we do not
2237        // need to sort the pending entries here.
2238        let index_entries_to_write = self.index.buffer.len() - self.index.pending;
2239        if index_entries_to_write > 0 {
2240            if self.index.n + index_entries_to_write as u64 > self.header.index_allocated_entries {
2241                need_remap = true;
2242                self.expand_index_to(
2243                    (self.index.n + index_entries_to_write as u64) * INDEX_ENTRY_SIZE,
2244                )?;
2245            }
2246            debug_assert!(
2247                self.index.n + index_entries_to_write as u64 <= self.header.index_allocated_entries
2248            );
2249            self.index.buffer[0..index_entries_to_write].sort_unstable();
2250
2251            // format the index entries to write in the file byte order and
2252            // remove them from the index buffer.
2253            self.index.byte_buffer.clear();
2254            for entry in self.index.buffer.drain(0..index_entries_to_write) {
2255                self.index.byte_buffer.extend(&entry.to_ne_bytes());
2256            }
2257            self.file.seek(SeekFrom::Start(
2258                self.header.index_location + self.index.n * INDEX_ENTRY_SIZE,
2259            ))?;
2260            self.file.write_all(&self.index.byte_buffer)?;
2261            self.index.n += index_entries_to_write as u64;
2262
2263            self.file.sync_all()?;
2264        }
2265
2266        if need_remap {
2267            self.remap()?;
2268        }
2269
2270        self.file_frame = self.buffer_frame;
2271
2272        Ok(())
2273    }
2274
2275    /// Expand the name list.
2276    fn expand_name_list_to(&mut self, capacity: u64) -> Result<(), SyncError> {
2277        let old_size = self.header.namelist_allocated_entries * NAME_SIZE;
2278        let mut new_size = old_size;
2279        while new_size <= capacity {
2280            new_size *= 2;
2281        }
2282
2283        // Ensure that the new buffer size is a multiple of NAME_SIZE because
2284        // GSD files always allocate name lists in those multiples.
2285        let new_allocated_entries = new_size.div_ceil(NAME_SIZE);
2286        let new_size = new_allocated_entries * NAME_SIZE;
2287        let new_location = self.file.seek(SeekFrom::End(0))?;
2288
2289        usize::try_from(new_location)
2290            .map_err(|_| SyncError::NameListOutOfBounds(new_location, new_size))?;
2291        usize::try_from(new_location + new_size)
2292            .map_err(|_| SyncError::NameListOutOfBounds(new_location, new_size))?;
2293
2294        let old_start = usize::try_from(self.header.namelist_location)
2295            .expect("namelist should be validated addressable previously");
2296        let old_end =
2297            usize::try_from(self.header.namelist_location + self.name_list.insert_position)
2298                .expect("namelist should be validated addressable previously");
2299        self.file.write_all(&self.mmap[old_start..old_end])?;
2300        self.file.set_len(new_location + new_size)?;
2301        self.file_len = new_location + new_size;
2302
2303        // Ensure that the new name list is in place before updating the
2304        // header. If one of the writes fails, the file could otherwise
2305        // be left in a state where the header points to a non-existent
2306        // name list.
2307        self.file.sync_all()?;
2308
2309        self.header.namelist_location = new_location;
2310        self.header.namelist_allocated_entries = new_allocated_entries;
2311        self.file.seek(SeekFrom::Start(0))?;
2312        self.file.write_all(&self.header.to_ne_bytes())?;
2313
2314        self.file.sync_all()?;
2315
2316        Ok(())
2317    }
2318
2319    /// Expand the index.
2320    fn expand_index_to(&mut self, capacity: u64) -> Result<(), SyncError> {
2321        let old_size = self.header.index_allocated_entries * INDEX_ENTRY_SIZE;
2322        let mut new_size = old_size;
2323        while new_size <= capacity {
2324            new_size *= 2;
2325        }
2326
2327        // Ensure that the new buffer size is a multiple of INDEX_ENTRY_SIZE
2328        // because GSD files always allocate indices in those multiples.
2329        let new_allocated_entries = new_size.div_ceil(INDEX_ENTRY_SIZE);
2330        let new_size = new_allocated_entries * INDEX_ENTRY_SIZE;
2331        let new_location = self.file.seek(SeekFrom::End(0))?;
2332
2333        usize::try_from(new_location)
2334            .map_err(|_| SyncError::IndexOutOfBounds(new_location, new_size))?;
2335        usize::try_from(new_location + new_size)
2336            .map_err(|_| SyncError::IndexOutOfBounds(new_location, new_size))?;
2337
2338        let old_start = usize::try_from(self.header.index_location)
2339            .expect("index should be validated addressable previously");
2340        let old_end = usize::try_from(self.header.index_location + self.index.n * INDEX_ENTRY_SIZE)
2341            .expect("index should be validated addressable previously");
2342        if old_end > self.mmap.len() {
2343            return Err(SyncError::IndexOutOfBounds(
2344                old_start as u64,
2345                old_end as u64,
2346            ));
2347        }
2348        self.file.write_all(&self.mmap[old_start..old_end])?;
2349        self.file.set_len(new_location + new_size)?;
2350        self.file_len = new_location + new_size;
2351
2352        // Ensure that the new index is in place before updating the
2353        // header. If one of the writes fails, the file could otherwise
2354        // be left in a state where the header points to a non-existent
2355        // index.
2356        self.file.sync_all()?;
2357
2358        self.header.index_location = new_location;
2359        self.header.index_allocated_entries = new_allocated_entries;
2360        self.file.seek(SeekFrom::Start(0))?;
2361        self.file.write_all(&self.header.to_ne_bytes())?;
2362
2363        self.file.sync_all()?;
2364
2365        Ok(())
2366    }
2367
2368    /// Get the file mode.
2369    #[inline]
2370    #[must_use]
2371    pub fn mode(&self) -> &Mode {
2372        &self.mode
2373    }
2374}
2375
2376/// Automatically synchronize buffered data before closing the file.
2377///
2378/// [`GsdFile`] automatically calls [`sync_all`](GsdFile::sync_all) when
2379/// dropped and ignores and errors. To check for any potential errors, call
2380/// [`sync_all`](GsdFile::sync_all) before dropping a [`GsdFile`].
2381impl Drop for GsdFile {
2382    fn drop(&mut self) {
2383        let _ = self.sync_all();
2384    }
2385}
2386
2387#[cfg(test)]
2388mod tests {
2389    use super::*;
2390    use tempfile::tempdir;
2391
2392    #[test]
2393    fn create_new() -> anyhow::Result<()> {
2394        let tmp_dir = tempdir()?;
2395        let path = tmp_dir.path().join("test.gsd");
2396        GsdFile::create_new(path.clone(), "application", "schema", (12, 42))?;
2397
2398        let gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2399        assert_eq!(gsd_file.application(), "application");
2400        assert_eq!(gsd_file.schema(), "schema");
2401        assert_eq!(gsd_file.schema_version(), (12, 42));
2402        assert_eq!(gsd_file.n_frames(), 0);
2403        assert!(gsd_file.name_id().is_empty());
2404
2405        assert!(matches!(
2406            GsdFile::create_new(path.clone(), "application", "schema", (12, 42)),
2407            Err(OpenError::IO(_, _))
2408        ));
2409
2410        Ok(())
2411    }
2412
2413    #[test]
2414    fn open_write() -> anyhow::Result<()> {
2415        let tmp_dir = tempdir()?;
2416        let path = tmp_dir.path().join("test.gsd");
2417        GsdFile::create_new(path.clone(), "application", "schema", (12, 42))?;
2418
2419        let mut gsd_file = GsdFile::open(path.clone(), Mode::Write)?;
2420        gsd_file.write_scalars("a", [1])?;
2421        gsd_file.end_frame()?;
2422        gsd_file.sync_all()?;
2423
2424        Ok(())
2425    }
2426
2427    #[test]
2428    fn create_errors() -> anyhow::Result<()> {
2429        let tmp_dir = tempdir()?;
2430        let path = tmp_dir.path().join("test.gsd");
2431        GsdFile::create(path.clone(), "application", "schema", (12, 42))?;
2432
2433        let long_application = "a".repeat(64);
2434        let result = GsdFile::create(path.clone(), &long_application, "schema", (1, 0));
2435        assert!(matches!(result, Err(OpenError::NameTooLong(_))));
2436
2437        let long_schema = "s".repeat(64);
2438        let result = GsdFile::create(path.clone(), "a", &long_schema, (1, 0));
2439        assert!(matches!(result, Err(OpenError::NameTooLong(_))));
2440
2441        let just_right_application = "a".repeat(63);
2442        let just_right_schema = "s".repeat(63);
2443        let result = GsdFile::create(
2444            path.clone(),
2445            &just_right_application,
2446            &just_right_schema,
2447            (1, 0),
2448        );
2449        assert!(result.is_ok());
2450
2451        Ok(())
2452    }
2453
2454    #[test]
2455    fn maximum_write_buffer_size() -> anyhow::Result<()> {
2456        let tmp_dir = tempdir()?;
2457        let path = tmp_dir.path().join("test.gsd");
2458        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2459
2460        *gsd_file.maximum_write_buffer_size_mut() = 8;
2461        assert_eq!(gsd_file.maximum_write_buffer_size(), 8);
2462
2463        let initial_size = gsd_file.file.metadata()?.len();
2464        assert_eq!(initial_size, gsd_file.file_len);
2465
2466        gsd_file
2467            .write_scalars::<u64, _>("a", [1])
2468            .expect("write should succeed");
2469        gsd_file.end_frame()?;
2470
2471        let final_size = gsd_file.file.metadata()?.len();
2472        assert_eq!(final_size, gsd_file.file_len);
2473        assert_eq!(final_size, initial_size + 8);
2474
2475        Ok(())
2476    }
2477
2478    #[test]
2479    fn sync_all() -> anyhow::Result<()> {
2480        let tmp_dir = tempdir()?;
2481        let path = tmp_dir.path().join("test.gsd");
2482        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2483
2484        let initial_size = gsd_file.file.metadata()?.len();
2485
2486        gsd_file.write_scalars::<u64, _>("a", [1])?;
2487        gsd_file.end_frame().expect("write should succeed");
2488
2489        let final_size = gsd_file.file.metadata()?.len();
2490        assert_eq!(final_size, gsd_file.file_len);
2491        assert_eq!(final_size, initial_size);
2492
2493        gsd_file.sync_all().expect("write should succeed");
2494        let final_size = gsd_file.file.metadata()?.len();
2495        assert_eq!(final_size, gsd_file.file_len);
2496        assert_eq!(final_size, initial_size + 8);
2497
2498        Ok(())
2499    }
2500
2501    #[test]
2502    fn pending_index() -> anyhow::Result<()> {
2503        let tmp_dir = tempdir()?;
2504        let path = tmp_dir.path().join("test.gsd");
2505        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2506
2507        gsd_file.write_scalars("a", [1])?;
2508        gsd_file.end_frame()?;
2509
2510        gsd_file.write_scalars("a", [1])?;
2511        gsd_file.write_scalars("b", [2])?;
2512        gsd_file.write_scalars("c", [3])?;
2513        gsd_file.write_scalars("d", [4])?;
2514        gsd_file.write_scalars("e", [5])?;
2515        gsd_file.write_scalars("f", [6])?;
2516        gsd_file.write_scalars("g", [7])?;
2517        gsd_file.write_scalars("h", [8])?;
2518
2519        assert_eq!(gsd_file.n_frames(), 0);
2520
2521        gsd_file.sync_all().expect("write should succeed");
2522
2523        assert!(gsd_file.find_chunk(0, "a").is_some());
2524        assert_eq!(gsd_file.n_frames(), 1);
2525
2526        // frame 1 should not be in the file yet.
2527        assert!(gsd_file.find_chunk(1, "a").is_none());
2528        assert!(gsd_file.find_chunk(1, "b").is_none());
2529        assert!(gsd_file.find_chunk(1, "c").is_none());
2530        assert!(gsd_file.find_chunk(1, "d").is_none());
2531        assert!(gsd_file.find_chunk(1, "e").is_none());
2532        assert!(gsd_file.find_chunk(1, "f").is_none());
2533        assert!(gsd_file.find_chunk(1, "g").is_none());
2534        assert!(gsd_file.find_chunk(1, "h").is_none());
2535
2536        gsd_file.end_frame()?;
2537        assert_eq!(gsd_file.n_frames(), 1);
2538        gsd_file.sync_all()?;
2539        assert_eq!(gsd_file.n_frames(), 2);
2540
2541        // frame 1 should now contain all test chunks
2542        assert!(gsd_file.find_chunk(1, "a").is_some());
2543        assert!(gsd_file.find_chunk(1, "b").is_some());
2544        assert!(gsd_file.find_chunk(1, "c").is_some());
2545        assert!(gsd_file.find_chunk(1, "d").is_some());
2546        assert!(gsd_file.find_chunk(1, "e").is_some());
2547        assert!(gsd_file.find_chunk(1, "f").is_some());
2548        assert!(gsd_file.find_chunk(1, "g").is_some());
2549        assert!(gsd_file.find_chunk(1, "h").is_some());
2550
2551        Ok(())
2552    }
2553
2554    #[expect(clippy::too_many_lines, reason = "There are many data types to test")]
2555    #[test]
2556    fn all_types() -> anyhow::Result<()> {
2557        let tmp_dir = tempdir()?;
2558        let path = tmp_dir.path().join("test.gsd");
2559        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2560
2561        let u8_data = [1, 2, 3];
2562        let u16_data = [4, 5, 6];
2563        let u32_data = [7, 8, 9];
2564        let u64_data = [10, 11, 12];
2565        let i8_data = [-1, -2, -3];
2566        let i16_data = [-4, -5, -6];
2567        let i32_data = [-7, -8, -9];
2568        let i64_data = [-10, -11, -12];
2569        let f32_data = [13.0, 14.0, 15.0];
2570        let f64_data = [16.0, 17.0, 18.0];
2571        let string_data = "Test string.";
2572
2573        gsd_file.write_scalars("u8", u8_data)?;
2574        gsd_file.write_scalars("u16", u16_data)?;
2575        gsd_file.write_scalars("u32", u32_data)?;
2576        gsd_file.write_scalars("u64", u64_data)?;
2577        gsd_file.write_scalars("i8", i8_data)?;
2578        gsd_file.write_scalars("i16", i16_data)?;
2579        gsd_file.write_scalars("i32", i32_data)?;
2580        gsd_file.write_scalars("i64", i64_data)?;
2581        gsd_file.write_scalars("f32", f32_data)?;
2582        gsd_file.write_scalars("f64", f64_data)?;
2583        gsd_file.write_string("string", string_data)?;
2584        gsd_file.end_frame()?;
2585        drop(gsd_file);
2586
2587        let gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2588        assert_eq!(gsd_file.n_frames(), 1);
2589
2590        let u8_array = gsd_file.iter_scalars::<u8>(0, "u8")?;
2591        let u16_array = gsd_file.iter_scalars::<u16>(0, "u16")?;
2592        let u32_array = gsd_file.iter_scalars::<u32>(0, "u32")?;
2593        let u64_array = gsd_file.iter_scalars::<u64>(0, "u64")?;
2594        let i8_array = gsd_file.iter_scalars::<i8>(0, "i8")?;
2595        let i16_array = gsd_file.iter_scalars::<i16>(0, "i16")?;
2596        let i32_array = gsd_file.iter_scalars::<i32>(0, "i32")?;
2597        let i64_array = gsd_file.iter_scalars::<i64>(0, "i64")?;
2598        let f32_array = gsd_file.iter_scalars::<f32>(0, "f32")?;
2599        let f64_array = gsd_file.iter_scalars::<f64>(0, "f64")?;
2600        let string_array = gsd_file.read_string(0, "string")?;
2601
2602        itertools::assert_equal(u8_array, u8_data);
2603        itertools::assert_equal(u16_array, u16_data);
2604        itertools::assert_equal(u32_array, u32_data);
2605        itertools::assert_equal(u64_array, u64_data);
2606        itertools::assert_equal(i8_array, i8_data);
2607        itertools::assert_equal(i16_array, i16_data);
2608        itertools::assert_equal(i32_array, i32_data);
2609        itertools::assert_equal(i64_array, i64_data);
2610        itertools::assert_equal(f32_array, f32_data);
2611        itertools::assert_equal(f64_array, f64_data);
2612        assert_eq!(string_array, string_data);
2613
2614        assert_eq!(
2615            GsdFile::size_of(u8::gsd_data_type()).expect("data type should be valid"),
2616            size_of::<u8>()
2617        );
2618        assert_eq!(
2619            GsdFile::size_of(u16::gsd_data_type()).expect("data type should be valid"),
2620            size_of::<u16>()
2621        );
2622        assert_eq!(
2623            GsdFile::size_of(u32::gsd_data_type()).expect("data type should be valid"),
2624            size_of::<u32>()
2625        );
2626        assert_eq!(
2627            GsdFile::size_of(u64::gsd_data_type()).expect("data type should be valid"),
2628            size_of::<u64>()
2629        );
2630        assert_eq!(
2631            GsdFile::size_of(i8::gsd_data_type()).expect("data type should be valid"),
2632            size_of::<i8>()
2633        );
2634        assert_eq!(
2635            GsdFile::size_of(i16::gsd_data_type()).expect("data type should be valid"),
2636            size_of::<i16>()
2637        );
2638        assert_eq!(
2639            GsdFile::size_of(i32::gsd_data_type()).expect("data type should be valid"),
2640            size_of::<i32>()
2641        );
2642        assert_eq!(
2643            GsdFile::size_of(i64::gsd_data_type()).expect("data type should be valid"),
2644            size_of::<i64>()
2645        );
2646        assert_eq!(
2647            GsdFile::size_of(f32::gsd_data_type()).expect("data type should be valid"),
2648            size_of::<f32>()
2649        );
2650        assert_eq!(
2651            GsdFile::size_of(f64::gsd_data_type()).expect("data type should be valid"),
2652            size_of::<f64>()
2653        );
2654
2655        assert_eq!(
2656            gsd_file
2657                .find_chunk(0, "u8")
2658                .expect("u8 should be written above")
2659                .data_type(),
2660            Some(DataType::U8)
2661        );
2662        assert_eq!(
2663            gsd_file
2664                .find_chunk(0, "u16")
2665                .expect("u16 should be written above")
2666                .data_type(),
2667            Some(DataType::U16)
2668        );
2669        assert_eq!(
2670            gsd_file
2671                .find_chunk(0, "u32")
2672                .expect("u32 should be written above")
2673                .data_type(),
2674            Some(DataType::U32)
2675        );
2676        assert_eq!(
2677            gsd_file
2678                .find_chunk(0, "u64")
2679                .expect("c should be written above")
2680                .data_type(),
2681            Some(DataType::U64)
2682        );
2683        assert_eq!(
2684            gsd_file
2685                .find_chunk(0, "i8")
2686                .expect("i8 should be written above")
2687                .data_type(),
2688            Some(DataType::I8)
2689        );
2690        assert_eq!(
2691            gsd_file
2692                .find_chunk(0, "i16")
2693                .expect("i16 should be written above")
2694                .data_type(),
2695            Some(DataType::I16)
2696        );
2697        assert_eq!(
2698            gsd_file
2699                .find_chunk(0, "i32")
2700                .expect("i32 should be written above")
2701                .data_type(),
2702            Some(DataType::I32)
2703        );
2704        assert_eq!(
2705            gsd_file
2706                .find_chunk(0, "i64")
2707                .expect("i64 should be written above")
2708                .data_type(),
2709            Some(DataType::I64)
2710        );
2711        assert_eq!(
2712            gsd_file
2713                .find_chunk(0, "f32")
2714                .expect("f32 should be written above")
2715                .data_type(),
2716            Some(DataType::F32)
2717        );
2718        assert_eq!(
2719            gsd_file
2720                .find_chunk(0, "f64")
2721                .expect("f64 should be written above")
2722                .data_type(),
2723            Some(DataType::F64)
2724        );
2725        assert_eq!(
2726            gsd_file
2727                .find_chunk(0, "string")
2728                .expect("string should be written above")
2729                .data_type(),
2730            Some(DataType::String)
2731        );
2732
2733        Ok(())
2734    }
2735
2736    #[test]
2737    fn dimensions() -> anyhow::Result<()> {
2738        let tmp_dir = tempdir()?;
2739        let path = tmp_dir.path().join("test.gsd");
2740        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2741
2742        let initial_size = gsd_file
2743            .file
2744            .metadata()
2745            .expect("metadata should be valid")
2746            .len();
2747
2748        gsd_file.write_scalars::<u64, _>("a", [])?;
2749        gsd_file.end_frame()?;
2750        gsd_file.write_scalars::<u64, _>("b", [1, 2, 3, 4, 5, 6])?;
2751
2752        gsd_file.write_arrays("c", [[1_u64, 2, 3], [4, 5, 6]])?;
2753        gsd_file.end_frame()?;
2754
2755        gsd_file.sync_all()?;
2756        let final_size = gsd_file.file.metadata()?.len();
2757        assert_eq!(final_size, gsd_file.file_len);
2758        assert_eq!(final_size, initial_size + (12 * size_of::<u64>()) as u64);
2759
2760        drop(gsd_file);
2761
2762        let gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2763        assert_eq!(gsd_file.n_frames(), 2);
2764
2765        let array_a = gsd_file.iter_scalars::<u64>(0, "a")?;
2766        assert_eq!(array_a.len(), 0);
2767
2768        let array_b = gsd_file.iter_scalars::<u64>(1, "b")?;
2769        assert_eq!(array_b.len(), 6);
2770        itertools::assert_equal(array_b, [1, 2, 3, 4, 5, 6]);
2771
2772        // Scalar data can be read as an array with M=1.
2773        let array_b = gsd_file.iter_arrays::<u64, 1>(1, "b")?;
2774        assert_eq!(array_b.len(), 6);
2775        itertools::assert_equal(array_b, [[1], [2], [3], [4], [5], [6]]);
2776
2777        let array_c = gsd_file.iter_arrays::<u64, 3>(1, "c")?;
2778        itertools::assert_equal(array_c, [[1, 2, 3], [4, 5, 6]]);
2779
2780        let entry_a = gsd_file
2781            .find_chunk(0, "a")
2782            .expect("a should be written above");
2783        assert_eq!(entry_a.frame(), 0);
2784        assert_eq!(entry_a.rows(), 0);
2785        assert_eq!(entry_a.columns(), 1);
2786        assert_eq!(entry_a.data_type(), Some(DataType::U64));
2787
2788        let entry_b = gsd_file
2789            .find_chunk(1, "b")
2790            .expect("a should be written above");
2791        assert_eq!(entry_b.frame(), 1);
2792        assert_eq!(entry_b.rows(), 6);
2793        assert_eq!(entry_b.columns(), 1);
2794        assert_eq!(entry_b.data_type(), Some(DataType::U64));
2795
2796        let entry_c = gsd_file
2797            .find_chunk(1, "c")
2798            .expect("c should be written above");
2799        assert_eq!(entry_c.frame(), 1);
2800        assert_eq!(entry_c.rows(), 2);
2801        assert_eq!(entry_c.columns(), 3);
2802        assert_eq!(entry_c.data_type(), Some(DataType::U64));
2803
2804        Ok(())
2805    }
2806
2807    #[test]
2808    fn invalid_writes() -> anyhow::Result<()> {
2809        let tmp_dir = tempdir()?;
2810        let path = tmp_dir.path().join("test.gsd");
2811        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2812
2813        let result = gsd_file.write_arrays::<u64, _, 0>("a", []);
2814        assert!(matches!(
2815            result,
2816            Err(WriteError::Encode(_, _, EncodeError::InvalidColumns(_)))
2817        ));
2818
2819        let mut gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2820
2821        let result = gsd_file.write_scalars::<u64, _>("a", []);
2822        assert!(matches!(
2823            result,
2824            Err(WriteError::Encode(_, _, EncodeError::NotWritable))
2825        ));
2826
2827        let result = gsd_file.end_frame();
2828        assert!(matches!(result, Err(EncodeError::NotWritable)));
2829
2830        let result = gsd_file.sync_all();
2831        assert!(matches!(result, Err(SyncError::NotWritable)));
2832
2833        Ok(())
2834    }
2835
2836    #[test]
2837    fn duplicate_chunk_name() -> anyhow::Result<()> {
2838        let tmp_dir = tempdir()?;
2839        let path = tmp_dir.path().join("test.gsd");
2840        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2841
2842        gsd_file.write_scalars("a", [1])?;
2843        let result = gsd_file.write_scalars("a", [1, 2]);
2844        assert!(matches!(
2845            result,
2846            Err(WriteError::Encode(
2847                _,
2848                _,
2849                EncodeError::DuplicateChunkName(_, _)
2850            ))
2851        ));
2852
2853        Ok(())
2854    }
2855
2856    #[test]
2857    fn read_invalid_reads() -> anyhow::Result<()> {
2858        let tmp_dir = tempdir()?;
2859        let path = tmp_dir.path().join("test.gsd");
2860        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2861
2862        gsd_file.write_scalars("a", [1_u64])?;
2863        gsd_file.write_arrays("b", [[1_u64, 2], [3, 4]])?;
2864        gsd_file.end_frame()?;
2865        gsd_file.sync_all()?;
2866
2867        let result = gsd_file.iter_scalars::<u32>(0, "a");
2868        assert!(matches!(
2869            result,
2870            Err(ReadError::Decode(_, _, DecodeError::InvalidType(_, _)))
2871        ));
2872
2873        let result = gsd_file.iter_scalars::<u64>(0, "b");
2874        assert!(matches!(
2875            result,
2876            Err(ReadError::Decode(_, _, DecodeError::InvalidColumns(1, 2)))
2877        ));
2878
2879        let result = gsd_file.iter_arrays::<u64, 2>(0, "a");
2880        assert!(matches!(
2881            result,
2882            Err(ReadError::Decode(_, _, DecodeError::InvalidColumns(2, 1)))
2883        ));
2884
2885        let result = gsd_file.iter_arrays::<u64, 8>(0, "b");
2886        assert!(matches!(
2887            result,
2888            Err(ReadError::Decode(_, _, DecodeError::InvalidColumns(8, 2)))
2889        ));
2890
2891        let result = gsd_file.iter_scalars::<u32>(1, "a");
2892        assert!(matches!(result, Err(ReadError::ChunkNotFound(_, _))));
2893
2894        let result = gsd_file.iter_scalars::<u32>(0, "q");
2895        assert!(matches!(result, Err(ReadError::ChunkNotFound(_, _))));
2896
2897        Ok(())
2898    }
2899
2900    #[test]
2901    fn chunk_name_limit() -> anyhow::Result<()> {
2902        let tmp_dir = tempdir()?;
2903        let path = tmp_dir.path().join("test.gsd");
2904        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2905
2906        for i in 0..u16::MAX {
2907            gsd_file.write_scalars::<u64, _>(&format!("{i:x}"), [])?;
2908        }
2909
2910        let i = u16::MAX;
2911        let result = gsd_file.write_scalars::<u64, _>(&format!("{i:x}"), []);
2912        assert!(matches!(
2913            result,
2914            Err(WriteError::Encode(_, _, EncodeError::NameListOverflow))
2915        ));
2916
2917        drop(gsd_file);
2918
2919        let gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2920
2921        assert_eq!(gsd_file.name_id().len(), u16::MAX as usize);
2922        for i in 0..u16::MAX {
2923            assert!(gsd_file.name_id().contains_key(&format!("{i:x}")));
2924        }
2925
2926        let size = gsd_file.file.metadata()?.len();
2927        assert_eq!(size, gsd_file.file_len);
2928
2929        Ok(())
2930    }
2931
2932    #[test]
2933    fn expand_index_multi() -> anyhow::Result<()> {
2934        const N_ENTRIES: u16 = 1024;
2935
2936        let tmp_dir = tempdir()?;
2937        let path = tmp_dir.path().join("test.gsd");
2938        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2939
2940        for i in 0..N_ENTRIES {
2941            gsd_file.write_scalars::<u16, _>(&format!("{i:x}"), [i])?;
2942        }
2943        gsd_file.end_frame()?;
2944        gsd_file.sync_all()?;
2945
2946        drop(gsd_file);
2947
2948        let gsd_file = GsdFile::open(path.clone(), Mode::Read)?;
2949
2950        assert_eq!(gsd_file.index.n, u64::from(N_ENTRIES));
2951        for i in 0..N_ENTRIES {
2952            let array = gsd_file.iter_scalars::<u16>(0, &format!("{i:x}"))?;
2953            itertools::assert_equal(array, [i]);
2954        }
2955
2956        Ok(())
2957    }
2958
2959    #[test]
2960    fn string() -> anyhow::Result<()> {
2961        let tmp_dir = tempdir()?;
2962        let path = tmp_dir.path().join("test.gsd");
2963        let mut gsd_file = GsdFile::create(path.clone(), "a", "s", (1, 0))?;
2964
2965        gsd_file.write_string("a", "this is a string")?;
2966        gsd_file.end_frame()?;
2967        gsd_file.write_scalars::<u8, _>("b", [0, 159, 146, 150])?;
2968        gsd_file.end_frame()?;
2969        gsd_file.sync_all()?;
2970
2971        let a = gsd_file.read_string(0, "a")?;
2972        assert_eq!(a, "this is a string");
2973
2974        let b = gsd_file.read_string(1, "b");
2975        assert!(matches!(
2976            b,
2977            Err(ReadError::Decode(_, _, DecodeError::InvalidType(11, 1)))
2978        ));
2979
2980        Ok(())
2981    }
2982}