Restructure crates

Get rid of the arbitrary bin/lib split and instead move as much as possible into the bin crate, which becomes the main crate again. The types and NBT handling are moved into separate crates, so they can be reused by nbtdump and regiondump.
2025-07-01 13:29:06 +02:00 · 2023-08-20 16:28:10 +02:00 · 2023-08-20 16:28:10 +02:00 · 248a641035
commit 248a641035
parent 09399f5ae9
21 changed files with 121 additions and 62 deletions
--- a/crates/nbt/Cargo.toml
+++ b/crates/nbt/Cargo.toml
@ -0,0 +1,17 @@
+[package]
+name = "minedmap-nbt"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+anyhow = "1.0.75"
+bytemuck = "1.13.1"
+fastnbt = "2.4.4"
+flate2 = "1.0.27"
+minedmap-types = { version = "0.1.0", path = "../types" }
+serde = "1.0.183"
+
+[features]
+zlib-ng = ["flate2/zlib-ng"]
--- a/crates/nbt/src/data.rs
+++ b/crates/nbt/src/data.rs
@ -0,0 +1,32 @@
+//! Functions for reading and deserializing compressed NBT data
+
+use std::{fs::File, io::prelude::*, path::Path};
+
+use anyhow::{Context, Result};
+use flate2::read::GzDecoder;
+use serde::de::DeserializeOwned;
+
+/// Reads compressed NBT data from a reader and deserializes to a given data structure
+pub fn from_reader<R, T>(reader: R) -> Result<T>
+where
+	R: Read,
+	T: DeserializeOwned,
+{
+	let mut decoder = GzDecoder::new(reader);
+	let mut buf = vec![];
+	decoder
+		.read_to_end(&mut buf)
+		.context("Failed to read file")?;
+
+	fastnbt::from_bytes(&buf).context("Failed to decode NBT data")
+}
+
+/// Reads compressed NBT data from a file and deserializes to a given data structure
+pub fn from_file<P, T>(path: P) -> Result<T>
+where
+	P: AsRef<Path>,
+	T: DeserializeOwned,
+{
+	let file = File::open(path).context("Failed to open file")?;
+	from_reader(file)
+}
--- a/crates/nbt/src/lib.rs
+++ b/crates/nbt/src/lib.rs
@ -0,0 +1,7 @@
+//! MinedMap's of Minecraft NBT data and region files
+
+#![warn(missing_docs)]
+#![warn(clippy::missing_docs_in_private_items)]
+
+pub mod data;
+pub mod region;
--- a/crates/nbt/src/region.rs
+++ b/crates/nbt/src/region.rs
@ -0,0 +1,162 @@
+//! Functions for reading and deserializing region data
+
+use std::{
+	fs::File,
+	io::{prelude::*, SeekFrom},
+	path::Path,
+};
+
+use anyhow::{bail, Context, Result};
+use flate2::read::ZlibDecoder;
+use serde::de::DeserializeOwned;
+
+use minedmap_types::*;
+
+/// Data block size of region data files
+///
+/// After one header block, the region file consists of one or more consecutive blocks
+/// of data for each populated chunk.
+const BLOCKSIZE: usize = 4096;
+
+/// Chunk descriptor extracted from region file header
+#[derive(Debug)]
+struct ChunkDesc {
+	/// Offset of data block where the chunk starts
+	offset: u32,
+	/// Number of data block used by the chunk
+	len: u8,
+	/// Coodinates of chunk described by this descriptor
+	coords: ChunkCoords,
+}
+
+/// Parses the header of a region data file
+fn parse_header(header: &ChunkArray<u32>) -> Vec<ChunkDesc> {
+	let mut chunks: Vec<_> = header
+		.iter()
+		.filter_map(|(coords, &chunk)| {
+			let offset_len = u32::from_be(chunk);
+
+			let offset = offset_len >> 8;
+			let len = offset_len as u8;
+
+			if offset == 0 || len == 0 {
+				return None;
+			}
+
+			Some(ChunkDesc {
+				offset,
+				len,
+				coords,
+			})
+		})
+		.collect();
+
+	chunks.sort_by_key(|chunk| chunk.offset);
+
+	chunks
+}
+
+/// Decompresses chunk data and deserializes to a given data structure
+fn decode_chunk<T>(buf: &[u8]) -> Result<T>
+where
+	T: DeserializeOwned,
+{
+	let (format, buf) = buf.split_at(1);
+	if format[0] != 2 {
+		bail!("Unknown chunk format");
+	}
+
+	let mut decoder = ZlibDecoder::new(buf);
+	let mut decode_buffer = vec![];
+	decoder
+		.read_to_end(&mut decode_buffer)
+		.context("Failed to decompress chunk data")?;
+
+	fastnbt::from_bytes(&decode_buffer).context("Failed to decode NBT data")
+}
+
+/// Wraps a reader used to read a region data file
+#[derive(Debug)]
+pub struct Region<R: Read + Seek> {
+	/// The wrapper reader
+	reader: R,
+}
+
+impl<R: Read + Seek> Region<R> {
+	/// Iterates over the chunks of the region data
+	///
+	/// The order of iteration is based on the order the chunks appear in the
+	/// data file.
+	pub fn foreach_chunk<T, F>(self, mut f: F) -> Result<()>
+	where
+		R: Read + Seek,
+		T: DeserializeOwned,
+		F: FnMut(ChunkCoords, T) -> Result<()>,
+	{
+		let Region { mut reader } = self;
+
+		let chunks = {
+			let mut header = ChunkArray::<u32>::default();
+			reader
+				.read_exact(bytemuck::cast_mut::<_, [u8; BLOCKSIZE]>(&mut header.0))
+				.context("Failed to read region header")?;
+
+			parse_header(&header)
+		};
+
+		let mut seen = ChunkArray::<bool>::default();
+
+		for ChunkDesc {
+			offset,
+			len,
+			coords,
+		} in chunks
+		{
+			if seen[coords] {
+				bail!("Duplicate chunk {:?}", coords);
+			}
+			seen[coords] = true;
+
+			reader
+				.seek(SeekFrom::Start(offset as u64 * BLOCKSIZE as u64))
+				.context("Failed to seek chunk data")?;
+
+			let mut len_buf = [0u8; 4];
+			reader
+				.read_exact(&mut len_buf)
+				.with_context(|| format!("Failed to read length for chunk {:?}", coords))?;
+			let byte_len = u32::from_be_bytes(len_buf) as usize;
+			if byte_len < 1 || byte_len > (len as usize) * BLOCKSIZE - 4 {
+				bail!("Invalid length for chunk {:?}", coords);
+			}
+
+			let mut buffer = vec![0; byte_len];
+			reader
+				.read_exact(&mut buffer)
+				.with_context(|| format!("Failed to read data for chunk {:?}", coords))?;
+			let chunk = decode_chunk(&buffer)
+				.with_context(|| format!("Failed to decode data for chunk {:?}", coords))?;
+
+			f(coords, chunk)?;
+		}
+
+		Ok(())
+	}
+}
+
+/// Creates a new [Region] from a reader
+pub fn from_reader<R>(reader: R) -> Region<R>
+where
+	R: Read + Seek,
+{
+	Region { reader }
+}
+
+/// Creates a new [Region] for a file
+pub fn from_file<P>(path: P) -> Result<Region<File>>
+where
+	P: AsRef<Path>,
+{
+	let file = File::open(path).context("Failed to open file")?;
+	Ok(from_reader(file))
+}
--- a/crates/types/Cargo.toml
+++ b/crates/types/Cargo.toml
@ -0,0 +1,10 @@
+[package]
+name = "minedmap-types"
+version = "0.1.0"
+edition = "2021"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+itertools = "0.11.0"
+serde = "1.0.183"
--- a/crates/types/src/lib.rs
+++ b/crates/types/src/lib.rs
@ -0,0 +1,238 @@
+//! Common types used by MinedMap
+
+#![warn(missing_docs)]
+#![warn(clippy::missing_docs_in_private_items)]
+
+use std::{
+	fmt::Debug,
+	iter::FusedIterator,
+	ops::{Index, IndexMut},
+};
+
+use itertools::iproduct;
+use serde::{Deserialize, Serialize};
+
+/// Const generic AXIS arguments for coordinate types
+pub mod axis {
+	/// The X axis
+	pub const X: u8 = 0;
+	/// The Y axis (height)
+	pub const Y: u8 = 1;
+	/// The Z axis
+	pub const Z: u8 = 2;
+}
+
+/// Generates a generic coordinate type with a given range
+macro_rules! coord_type {
+	($t:ident, $max:expr, $doc:expr $(,)?) => {
+		#[doc = $doc]
+		#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+		pub struct $t<const AXIS: u8>(pub u8);
+
+		impl<const AXIS: u8> $t<AXIS> {
+			const MAX: usize = $max;
+
+			/// Constructs a new value
+			///
+			/// Will panic if the value is not in the valid range
+			#[inline]
+			pub fn new<T: TryInto<u8>>(value: T) -> Self {
+				Self(
+					value
+						.try_into()
+						.ok()
+						.filter(|&v| (v as usize) < Self::MAX)
+						.expect("coordinate should be in the valid range"),
+				)
+			}
+
+			/// Returns an iterator over all possible values of the type
+			#[inline]
+			pub fn iter() -> impl Iterator<Item = $t<AXIS>>
+			       + DoubleEndedIterator
+			       + ExactSizeIterator
+			       + FusedIterator
+			       + Clone
+			       + Debug {
+				(0..Self::MAX as u8).map($t)
+			}
+		}
+	};
+}
+
+/// Number of bits required to store a block coordinate
+pub const BLOCK_BITS: u8 = 4;
+/// Number of blocks per chunk in each dimension
+pub const BLOCKS_PER_CHUNK: usize = 1 << BLOCK_BITS;
+coord_type!(
+	BlockCoord,
+	BLOCKS_PER_CHUNK,
+	"A block coordinate relative to a chunk",
+);
+
+/// A block X coordinate relative to a chunk
+pub type BlockX = BlockCoord<{ axis::X }>;
+
+/// A block Y coordinate relative to a chunk section
+pub type BlockY = BlockCoord<{ axis::Y }>;
+
+/// A block Z coordinate relative to a chunk
+pub type BlockZ = BlockCoord<{ axis::Z }>;
+
+/// X and Z coordinates of a block in a chunk
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct LayerBlockCoords {
+	/// The X coordinate
+	pub x: BlockX,
+	/// The Z coordinate
+	pub z: BlockZ,
+}
+
+impl Debug for LayerBlockCoords {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		write!(f, "({}, {})", self.x.0, self.z.0)
+	}
+}
+
+impl LayerBlockCoords {
+	/// Computes a block's offset in various data structures
+	///
+	/// Many chunk data structures store block and biome data in the same
+	/// order. This method computes the offset at which the data for the
+	/// block at a given coordinate is stored.
+	#[inline]
+	pub fn offset(&self) -> usize {
+		use BLOCKS_PER_CHUNK as N;
+		let x = self.x.0 as usize;
+		let z = self.z.0 as usize;
+		N * z + x
+	}
+}
+
+/// Generic array for data stored per block of a chunk layer
+///
+/// Includes various convenient iteration functions.
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
+pub struct LayerBlockArray<T>(pub [[T; BLOCKS_PER_CHUNK]; BLOCKS_PER_CHUNK]);
+
+impl<T> Index<LayerBlockCoords> for LayerBlockArray<T> {
+	type Output = T;
+
+	#[inline]
+	fn index(&self, index: LayerBlockCoords) -> &Self::Output {
+		&self.0[index.z.0 as usize][index.x.0 as usize]
+	}
+}
+
+impl<T> IndexMut<LayerBlockCoords> for LayerBlockArray<T> {
+	#[inline]
+	fn index_mut(&mut self, index: LayerBlockCoords) -> &mut Self::Output {
+		&mut self.0[index.z.0 as usize][index.x.0 as usize]
+	}
+}
+
+/// X, Y and Z coordinates of a block in a chunk section
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct SectionBlockCoords {
+	/// The X and Z coordinates
+	pub xz: LayerBlockCoords,
+	/// The Y coordinate
+	pub y: BlockY,
+}
+
+impl SectionBlockCoords {
+	/// Computes a block's offset in various data structures
+	///
+	/// Many chunk data structures store block and biome data in the same
+	/// order. This method computes the offset at which the data for the
+	/// block at a given coordinate is stored.
+	#[inline]
+	pub fn offset(&self) -> usize {
+		use BLOCKS_PER_CHUNK as N;
+		let y = self.y.0 as usize;
+		N * N * y + self.xz.offset()
+	}
+}
+
+impl Debug for SectionBlockCoords {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		write!(f, "({}, {}, {})", self.xz.x.0, self.y.0, self.xz.z.0)
+	}
+}
+
+/// A section Y coordinate
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SectionY(pub i32);
+
+/// Number of bits required to store a chunk coordinate
+pub const CHUNK_BITS: u8 = 5;
+/// Number of chunks per region in each dimension
+pub const CHUNKS_PER_REGION: usize = 1 << CHUNK_BITS;
+coord_type!(
+	ChunkCoord,
+	CHUNKS_PER_REGION,
+	"A chunk coordinate relative to a region",
+);
+
+/// A chunk X coordinate relative to a region
+pub type ChunkX = ChunkCoord<{ axis::X }>;
+
+/// A chunk Z coordinate relative to a region
+pub type ChunkZ = ChunkCoord<{ axis::Z }>;
+
+/// A pair of chunk coordinates relative to a region
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct ChunkCoords {
+	/// The X coordinate
+	pub x: ChunkX,
+	/// The Z coordinate
+	pub z: ChunkZ,
+}
+
+impl Debug for ChunkCoords {
+	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+		write!(f, "({}, {})", self.x.0, self.z.0)
+	}
+}
+
+/// Generic array for data stored per chunk of a region
+///
+/// Includes various convenient iteration functions.
+#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)]
+pub struct ChunkArray<T>(pub [[T; CHUNKS_PER_REGION]; CHUNKS_PER_REGION]);
+
+impl<T> ChunkArray<T> {
+	/// Iterates over all possible chunk coordinate pairs used as [ChunkArray] keys
+	#[inline]
+	pub fn keys() -> impl Iterator<Item = ChunkCoords> + Clone + Debug {
+		iproduct!(ChunkZ::iter(), ChunkX::iter()).map(|(z, x)| ChunkCoords { x, z })
+	}
+
+	/// Iterates over all values stored in the [ChunkArray]
+	#[inline]
+	pub fn values(&self) -> impl Iterator<Item = &T> + Clone + Debug {
+		Self::keys().map(|k| &self[k])
+	}
+
+	/// Iterates over pairs of chunk coordinate pairs and corresponding stored values
+	#[inline]
+	pub fn iter(&self) -> impl Iterator<Item = (ChunkCoords, &T)> + Clone + Debug {
+		Self::keys().map(|k| (k, &self[k]))
+	}
+}
+
+impl<T> Index<ChunkCoords> for ChunkArray<T> {
+	type Output = T;
+
+	#[inline]
+	fn index(&self, index: ChunkCoords) -> &Self::Output {
+		&self.0[index.z.0 as usize][index.x.0 as usize]
+	}
+}
+
+impl<T> IndexMut<ChunkCoords> for ChunkArray<T> {
+	#[inline]
+	fn index_mut(&mut self, index: ChunkCoords) -> &mut Self::Output {
+		&mut self.0[index.z.0 as usize][index.x.0 as usize]
+	}
+}