diff --git a/Cargo.lock b/Cargo.lock index 36c8f841879cdb9a30fb1911130c24957782e5ce..4367f5c8c8abee02998b3a6170f72a3e6408a8f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,6 +168,7 @@ dependencies = [ "libc", "log", "machine_manager", + "nix 0.26.2", "once_cell", "thiserror", "trace", diff --git a/block_backend/Cargo.toml b/block_backend/Cargo.toml index d052bd0d55d48d3c2fb4e0eef0e09b98cc251585..a4283348f535081d8032d713662b3397c274dea7 100644 --- a/block_backend/Cargo.toml +++ b/block_backend/Cargo.toml @@ -16,3 +16,4 @@ libc = "0.2" machine_manager = { path = "../machine_manager" } util = { path = "../util" } trace = {path = "../trace"} +nix = { version = "0.26.2", default-features = false, features = ["fs", "feature"] } diff --git a/block_backend/src/file.rs b/block_backend/src/file.rs index 3acef03816abdc7d334d02efed5c0374ee27cfeb..306280496904e2e13c830bdc1fd1b47f0feb706b 100644 --- a/block_backend/src/file.rs +++ b/block_backend/src/file.rs @@ -25,7 +25,7 @@ use std::{ }, }; -use anyhow::{Context, Result}; +use anyhow::{bail, Context, Result}; use log::error; use vmm_sys_util::epoll::EventSet; @@ -212,6 +212,81 @@ impl FileDriver { } Ok(()) } + + // Find the data / hole range around offset `start`. + // According to linux man-pages: + // + // off_t lseek(int fd, off_t offset, int whence); + // + // SEEK_DATA: + // Adjust the file offset to the next location in the file greater than or equal to `offset` + // containing data. If offset points to data, then the file offset is set to `offset`. + // SEEK_HOLE: + // Adjust the file offset to the next hole in the file greater than or equal to offset. If offset points into the + // middle of a hole, then the file offset is set to offset. If there is no hole past offset, then the file offset is + // adjusted to the end of the file (i.e., there is an implicit hole at the end of any file). + // + // Return Value: + // Upon successful completion, lseek() returns the resulting offset location as measured in bytes from the beginning of the file. + // On error, the value (off_t) -1 is returned and errno is set to indicate the error. + // + // Common error codes: + // EBADF: fd is not an open file descriptor. + // EINVAL: whence is not valid. Or: the resulting file offset would be negative, or beyond the end of a seekable device. + // EOVERFLOW: The resulting file offset cannot be represented in an off_t. + // ESPIPE: fd is associated with a pipe, socket, or FIFO. + // ENXIO: whence is SEEK_DATA or SEEK_HOLE, and the current file offset is beyond the end of the file. + // + // So, SEEK_DATA has these cases: + // D1. doff == start: start is in data. + // D2. doff > start: start is in a hole, next data at doff. + // D3. doff < 0 && errno == ENXIO: either start is in a trailing hole or start is beyond EOF. + // D4. doff < 0 && errno != ENXIO: error. + // + // SEEK_HOLE has these cases: + // H1: hoff == start: start is in a hole. + // H2: hoff > start: start is in data, next hole at hoff. + // H3: hoff < 0, errno = ENXIO: start is beyond EOF. + // H4: hoff < 0, errno != ENXIO: error. + pub fn find_range_start(&mut self, start: u64, data_range: bool) -> Result { + if start > std::i64::MAX as u64 { + bail!("Too large offset {}", start); + } + + let file_fd = self.file.as_raw_fd(); + let whence = if data_range { + libc::SEEK_DATA + } else { + libc::SEEK_HOLE + }; + + // SAFETY: validated `start`. + let off = unsafe { libc::lseek(file_fd, start as i64, whence) }; + + if off < 0 { + let errno = nix::errno::errno(); + // D4 or H4: error. + if errno != libc::ENXIO { + bail!("lseek() whence {} error {}", whence, errno); + } + // D3 or H3. + return Ok(-1); + } + + // Invalid return by lseek(). + if off < start as i64 { + bail!( + "lseek() whence {} return invalid value {} around offset {}", + whence, + off, + start + ); + } + + // D1 or H1: off == start: start(off) is in a data(D1) / hole(H1). + // D2 or H2: off > start: start is in hole(D2) / data(H2), next data(D2) / hole(H2) at off. + Ok(off) + } } struct FileIoHandler { @@ -308,3 +383,50 @@ fn build_event_notifier( notifier.handler_poll = handler_poll; notifier } + +#[cfg(test)] +mod test { + use std::fs::{remove_file, OpenOptions}; + use std::os::unix::fs::OpenOptionsExt; + + use super::*; + use crate::qcow2::SyncAioInfo; + use crate::*; + use util::aio::AioEngine; + + #[test] + fn test_find_range_start() { + let path = "/tmp/test_find_range_start_file"; + let file = Arc::new( + OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_CREAT | libc::O_TRUNC | libc::O_RDWR) + .mode(0o660) + .open(path) + .unwrap(), + ); + + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + let mut file_driver = FileDriver::new(file, aio, BlockProperty::default()); + // End of file. + let off = file_driver.find_range_start(0, false).unwrap(); + assert_eq!(off, -1); + let off = file_driver.find_range_start(0, true).unwrap(); + assert_eq!(off, -1); + + // Write 4096 bytes in offset 4096 bytes. + // Note: We are using cache IO. Different file systems may have differences in cache IO alignment. + // So we tested all by using 4K alignment IO, which can already meet the needs of the vast majority of file systems. + let buf = [1_u8; 4094]; + let iov = Iovec::new(buf.as_ptr() as u64, buf.len() as u64); + let req = CombineRequest::new(vec![iov], 4096, buf.len() as u64); + assert!(file_driver.write_vectored(vec![req], ()).is_ok()); + let off = file_driver.find_range_start(0, true).unwrap(); + assert_eq!(off, 4096); + let off = file_driver.find_range_start(0, false).unwrap(); + assert_eq!(off, 0); + + remove_file(path).unwrap(); + } +} diff --git a/block_backend/src/raw.rs b/block_backend/src/raw.rs index d49578641b72eebabb14bb8f99e80d409babc724..1ce7af5c1b0b45880e3b043d285395d3249e150f 100644 --- a/block_backend/src/raw.rs +++ b/block_backend/src/raw.rs @@ -24,8 +24,8 @@ use anyhow::{bail, Result}; use crate::{ file::{CombineRequest, FileDriver}, qcow2::is_aligned, - BlockDriverOps, BlockIoErrorCallback, BlockProperty, BlockStatus, CheckResult, CreateOptions, - ImageInfo, SECTOR_SIZE, + BlockAllocStatus, BlockDriverOps, BlockIoErrorCallback, BlockProperty, BlockStatus, + CheckResult, CreateOptions, ImageInfo, SECTOR_SIZE, }; use util::{ aio::{get_iov_size, raw_write, Aio, Iovec}, @@ -212,4 +212,48 @@ impl BlockDriverOps for RawDriver { fn get_status(&mut self) -> Arc> { self.status.clone() } + + fn get_address_alloc_status( + &mut self, + offset: u64, + bytes: u64, + ) -> Result<(BlockAllocStatus, u64)> { + let data_start = self.driver.find_range_start(offset, true)?; + let hole_start = self.driver.find_range_start(offset, false)?; + + if (data_start != offset as i64 && hole_start != offset as i64) + || (data_start == offset as i64 && hole_start == offset as i64) + { + bail!( + "Impossible! Offset {} cannot be both hole and data, or neither hole nor data.", + offset + ); + } + + // Data range. + if data_start == offset as i64 { + if hole_start == -1 { + // No hole. All data. + let disk_size = self.disk_size()?; + let size = std::cmp::min(disk_size - offset, bytes); + return Ok((BlockAllocStatus::DATA, size)); + } + + // It was all data before the hole. + let size = std::cmp::min((hole_start - data_start) as u64, bytes); + return Ok((BlockAllocStatus::DATA, size)); + } + + // hole_start == offset. Zero range. + if data_start == -1 { + // No data. All hole. + let disk_size = self.disk_size()?; + let size = std::cmp::min(disk_size - offset, bytes); + return Ok((BlockAllocStatus::ZERO, size)); + } + + // It was all hole before the data. + let size = std::cmp::min((data_start - hole_start) as u64, bytes); + Ok((BlockAllocStatus::ZERO, size)) + } } diff --git a/image/src/img.rs b/image/src/img.rs index bc5b58ebcad1b0ea233410fe49a344d209610145..1f0372f38bfb870e9acaa911913c04f731e9bad0 100644 --- a/image/src/img.rs +++ b/image/src/img.rs @@ -2547,4 +2547,79 @@ mod test { drop(dst_driver); assert!(remove_file(dst_path.clone()).is_ok()); } + + /// Test image convert from raw to qcow2. + /// + /// TestStep: + /// 1. Create a raw image with size of 10G. + /// 2. Write data to this image in different position. + /// 3. Convert this raw to qcow2. + /// 4. Read data in the same position. + #[test] + fn test_image_convert_from_raw_to_qcow2() { + let src_path = "/tmp/test_image_convert_src.raw"; + let dst_path = "/tmp/test_image_convert_dst.qcow2"; + let _ = remove_file(src_path.clone()); + let _ = remove_file(dst_path.clone()); + + let mut test_image = TestRawImage::create(src_path.to_string(), "10G".to_string()); + let mut src_driver = test_image.create_driver(); + + // Write 1M data(number 1) in offset 0. + let buf1 = vec![1_u8; 1 * M as usize]; + assert!(image_write(&mut src_driver, 0, &buf1).is_ok()); + // Write 1M data(number 2) in offset 5G. + let buf2 = vec![2_u8; 1 * M as usize]; + assert!(image_write(&mut src_driver, 5 * G as usize, &buf2).is_ok()); + // Write 1M data(number 3) in last 1M. + let buf3 = vec![3_u8; 1 * M as usize]; + assert!(image_write(&mut src_driver, 10 * G as usize - 1 * M as usize, &buf3).is_ok()); + // Write 1M data(number 0) in random offset (eg: 300M offset). + let buf4 = vec![0_u8; 1 * M as usize]; + assert!(image_write(&mut src_driver, 300 * M as usize, &buf4).is_ok()); + + drop(src_driver); + + assert!(image_convert(vec![ + "-f".to_string(), + "raw".to_string(), + "-O".to_string(), + "qcow2".to_string(), + src_path.to_string(), + dst_path.to_string() + ]) + .is_ok()); + + // Open the converted qcow2 image. + let conf = BlockProperty { + format: DiskFormat::Qcow2, + ..Default::default() + }; + let aio = Aio::new(Arc::new(SyncAioInfo::complete_func), AioEngine::Off, None).unwrap(); + let file = open_file(dst_path, true, false).unwrap(); + let mut dst_driver = Qcow2Driver::new(Arc::new(file), aio, conf.clone()).unwrap(); + dst_driver.load_metadata(conf).unwrap(); + + // Read 1M data in offset 0. + let buf = vec![0; 1 * M as usize]; + assert!(image_read(&mut dst_driver, 0, &buf).is_ok()); + assert_eq!(buf, buf1); + // Read 1M data in offset 5G. + assert!(image_read(&mut dst_driver, 5 * G as usize, &buf).is_ok()); + assert_eq!(buf, buf2); + // Read 1M data in last 1M. + assert!(image_read(&mut dst_driver, 10 * G as usize - 1 * M as usize, &buf).is_ok()); + assert_eq!(buf, buf3); + + let mut img_info = ImageInfo::default(); + assert!(dst_driver.query_image(&mut img_info).is_ok()); + assert_eq!(img_info.virtual_size, 10 * G); + // 1M data(number 0) in offset 300M will not consume space. + // QCOW2 has metadata clusters, so it will not be 3M in size. + assert!(img_info.actual_size > 3 * M); + assert!(img_info.actual_size < 4 * M); + + // Clean. + assert!(remove_file(dst_path.clone()).is_ok()); + } }