diff --git a/src/filesystem/drive/directory_handle.rs b/src/filesystem/drive/directory_handle.rs index f396b49..be4bdc6 100644 --- a/src/filesystem/drive/directory_handle.rs +++ b/src/filesystem/drive/directory_handle.rs @@ -775,7 +775,6 @@ mod test { use super::*; use crate::filesystem::drive::inner::test::build_interesting_inner; use crate::prelude::MemoryDataStore; - #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] async fn mv_dir_from_dir_to_cwd_specify_name() { @@ -967,7 +966,7 @@ mod test { } #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] - async fn write_file_with_html_tags() { + async fn sniff_html_mime_type() { let mut rng = crate::utils::crypto_rng(); let current_key = SigningKey::generate(&mut rng); let mut handle = interesting_handle(Some(current_key)).await; @@ -1012,4 +1011,133 @@ mod test { assert_eq!(mime_type, "text/html"); } } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_mp3_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp3_test_case: &[u8] = &[ + 0x49, 0x44, 0x33, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x54, 0x53, 0x53, 0x45, + 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x03, 0x4c, 0x61, 0x76, 0x66, 0x36, 0x30, 0x2e, + 0x33, 0x2e, 0x31, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0xff, 0xfb, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + let file_name = "the_audio.mp4"; + handle + .write(&mut rng, &mut store, &[file_name], mp3_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp3_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "audio/mpeg"); + } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_mp4_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp4_test_case: &[u8] = &[ + 0x00, 0x00, 0x00, 0x1c, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d, 0x00, 0x00, + 0x02, 0x00, 0x69, 0x73, 0x6f, 0x6d, 0x69, 0x73, 0x6f, 0x32, 0x6d, 0x70, 0x34, 0x31, + 0x00, 0x00, 0x00, 0x08, + ]; + let file_name = "the_audio.mp3"; + handle + .write(&mut rng, &mut store, &[file_name], mp4_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp4_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "video/mp4"); + } + + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), tokio::test)] + async fn sniff_webm_file_mime_type() { + let mut rng = crate::utils::crypto_rng(); + let current_key = SigningKey::generate(&mut rng); + let mut handle = interesting_handle(Some(current_key)).await; + let mut store = MemoryDataStore::default(); + let mp4_test_case: &[u8] = &[ + 0x1a, 0x45, 0xdf, 0xa3, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x42, 0x86, + 0x81, 0x01, 0x42, 0xf7, 0x81, 0x01, 0x42, 0xf2, 0x81, 0x04, 0x42, 0xf3, 0x81, 0x08, + 0x42, 0x82, 0x84, 0x77, 0x65, 0x62, 0x6d, 0x42, 0x87, 0x81, 0x02, 0x42, 0x85, 0x81, + 0x02, 0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0d, 0xc0, 0x0a, 0x11, + 0x4d, 0x9b, 0x74, 0x40, 0x3c, 0x4d, 0xbb, 0x8b, 0x53, 0xab, 0x84, 0x15, 0x49, 0xa9, + 0x66, 0x53, 0xac, 0x81, 0xe5, 0x4d, 0xbb, 0x8c, 0x53, 0xab, + ]; + let file_name = "the_audio.mp4"; + handle + .write(&mut rng, &mut store, &[file_name], mp4_test_case) + .await + .unwrap(); + + let cwd_ls = handle.ls(&[]).await.unwrap(); + assert_eq!( + cwd_ls + .iter() + .filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .count(), + 1 + ); + + let file_entry = cwd_ls + .iter() + .find(|entry| entry.name() == NodeName::try_from(file_name).unwrap()) + .unwrap(); + + assert_eq!(file_entry.kind(), NodeKind::File); + + let file_data = handle.read(&mut store, &[file_name]).await.unwrap(); + assert_eq!(file_data.as_slice(), mp4_test_case); + + let mime_type = file_entry.mime_type().unwrap(); + assert_eq!(mime_type, "video/webm"); + } } diff --git a/src/filesystem/nodes/metadata/mime_type.rs b/src/filesystem/nodes/metadata/mime_type.rs index ddf5332..a1fe5f1 100644 --- a/src/filesystem/nodes/metadata/mime_type.rs +++ b/src/filesystem/nodes/metadata/mime_type.rs @@ -7,18 +7,6 @@ pub struct MimeGuesser { } impl MimeGuesser { - const MP3_RATES: [u32; 15] = [ - 0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000, - 256000, 320000, - ]; - - const MP25_RATES: [u32; 15] = [ - 0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, - 144000, 160000, - ]; - - const SAMPLE_RATES: [u32; 3] = [44100, 48000, 32000]; - pub fn with_name(mut self, name: NodeName) -> Self { match name { NodeName::Named(name) => self.name = Some(name.clone()), @@ -161,11 +149,12 @@ impl MimeGuesser { fn algorithm_match(&self) -> Option { if self.is_mp4() { - return Some(mime::AUDIO_MP4); + return Some(mime::VIDEO_MP4); } - if self.is_mp3() { - return Some(mime::AUDIO_MPEG); + if self.is_webm() { + return Some(mime::VIDEO_WEBM); } + None } @@ -178,75 +167,68 @@ impl MimeGuesser { if data.len() < box_size as usize || box_size % 4 != 0 { return false; } - if &data[4..8] != b"ftyp" { - return false; - } - if &data[8..11] == b"mp4" { - return true; - } - data[16..] - .chunks_exact(4) - .take_while(|chunk| &chunk[..3] != b"mp4") - .last() - .map_or(false, |chunk| &chunk[..3] == b"mp4") - } - fn is_mp3(&self) -> bool { + data.get(4..8) == Some(b"ftyp") + && (data.get(8..11) == Some(b"mp4") + || data[16..] + .chunks_exact(4) + .any(|chunk| chunk.starts_with(b"mp4"))) + } + fn is_webm(&self) -> bool { let data = &self.data; - let mut offset = 0; - - if !match_mp3_header(data, offset) { + if data.len() < 4 || data[..4] != [0x1A, 0x45, 0xDF, 0xA3] { return false; } - let (version, bitrate_index, sample_rate_index, pad) = parse_mp3_frame(data, offset); - let bitrate = if version & 0x01 != 0 { - Self::MP25_RATES[bitrate_index as usize] - } else { - Self::MP3_RATES[bitrate_index as usize] - }; - let sample_rate = Self::SAMPLE_RATES[sample_rate_index as usize]; - let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad); + let skip_first_bytes = 4; + let chunk_size = 2; + let magic_bytes_delim = [0x42, 0x82]; + for (chunk_idx, chunk) in data[skip_first_bytes..].chunks(chunk_size).enumerate() { + // went over 4 + 2 * 17 = 38 bytes + if chunk_idx >= 17 { + break; + } - if skipped_bytes < 4 || skipped_bytes > data.len() - offset { - return false; + if chunk != magic_bytes_delim { + continue; + } + + let offset = skip_first_bytes + chunk_idx * chunk_size + magic_bytes_delim.len(); + if let Some((_, number_size)) = data.get(offset..).map(|d| parse_vint(d, 0)) { + let start = offset + number_size; + let end = start + 4; + if data.get(start..end) == Some(b"webm") { + return true; + } + } } - offset += skipped_bytes; - match_mp3_header(data, offset) + false } } -fn match_mp3_header(sequence: &[u8], s: usize) -> bool { - let length = sequence.len(); - if length - s < 4 { - return false; +fn parse_vint(data: &[u8], offset: usize) -> (usize, usize) { + let mut mask = 128; + let max_vint_length = 8; + let mut number_size = 1; + + while number_size < max_vint_length + && data.get(offset).is_none() + && (data.get(offset).unwrap() & mask == 0) + { + mask >>= 1; + number_size += 1; } - sequence[s] == 0xff - && sequence[s + 1] & 0xe0 == 0xe0 - && (sequence[s + 1] & 0x06 >> 1) != 0 - && (sequence[s + 2] & 0xf0 >> 4) != 15 - && (sequence[s + 2] & 0x0c >> 2) != 3 - && (4 - (sequence[s + 1] & 0x06 >> 1)) == 3 -} - -fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) { - let version = sequence[s + 1] & 0x18 >> 3; - let bitrate_index = sequence[s + 2] & 0xf0 >> 4; - let sample_rate_index = sequence[s + 2] & 0x0c >> 2; - let pad = sequence[s + 2] & 0x02 >> 1; - (version, bitrate_index, sample_rate_index, pad) -} + let mut parsed_number = data.get(offset).map_or(0, |&b| (b & !mask) as usize); -fn compute_mp3_frame_size(version: u8, bitrate: u32, sample_rate: u32, pad: u8) -> usize { - let scale = if version == 1 { 72 } else { 144 }; - let mut size = bitrate * scale / sample_rate; - if pad != 0 { - size += 1; + for &b in data.get(offset + 1..offset + number_size).unwrap_or(&[]) { + parsed_number = (parsed_number << 8) | b as usize; } - size as usize + + (parsed_number, number_size) } + fn is_whitespace_or_tag_terminator(byte: u8) -> bool { byte == b' ' || byte == b'>' } diff --git a/src/filesystem/nodes/metadata/mod.rs b/src/filesystem/nodes/metadata/mod.rs index 07ad8bb..03a3db2 100644 --- a/src/filesystem/nodes/metadata/mod.rs +++ b/src/filesystem/nodes/metadata/mod.rs @@ -6,25 +6,37 @@ pub use mime_type::MimeGuesser; #[derive(Hash, Eq, PartialEq, Debug)] pub enum MetadataKey { MimeType, + Custom(String), } impl MetadataKey { - pub fn as_str(&self) -> &'static str { + pub fn as_str(&self) -> &str { match self { MetadataKey::MimeType => "mime", + MetadataKey::Custom(s) => s.as_str(), } } - pub fn as_bytes(&self) -> &'static [u8] { + pub fn as_bytes(&self) -> Vec { match self { - MetadataKey::MimeType => b"mime", + MetadataKey::MimeType => b"mime".to_vec(), + MetadataKey::Custom(s) => s.as_bytes().to_vec(), } } pub fn from_bytes(key: &[u8]) -> Option { match key { b"mime" => Some(MetadataKey::MimeType), - _ => None, + _ => { + if key.len() > 255 { + return None; + } + + match std::str::from_utf8(key) { + Ok(s) => Some(MetadataKey::Custom(s.to_string())), + Err(_) => None, + } + } } } } @@ -35,7 +47,13 @@ impl FromStr for MetadataKey { fn from_str(s: &str) -> Result { match s { "mime" => Ok(MetadataKey::MimeType), - _ => Err(winnow::error::ErrorKind::Token), + _ => { + if s.len() > 255 { + return Err(winnow::error::ErrorKind::Verify); + } + + Ok(MetadataKey::Custom(s.to_string())) + } } } } diff --git a/src/filesystem/nodes/mod.rs b/src/filesystem/nodes/mod.rs index 98fd983..fa68f89 100644 --- a/src/filesystem/nodes/mod.rs +++ b/src/filesystem/nodes/mod.rs @@ -196,7 +196,7 @@ impl Node { node_data.write_all(&[entry_count]).await?; let mut sorted_metadata = self.metadata.iter().collect::>(); - sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes())); + sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(&b.as_bytes())); for (key, val) in sorted_metadata.into_iter() { let key_bytes = key.as_bytes(); @@ -207,7 +207,7 @@ impl Node { } node_data.write_all(&[key_bytes_len as u8]).await?; - node_data.write_all(key_bytes).await?; + node_data.write_all(&key_bytes).await?; let val_bytes_len = val.len(); if val_bytes_len > u8::MAX as usize { @@ -296,7 +296,7 @@ impl Node { encoded_size += self .metadata() .iter() - .map(|(k, v)| (2 + k.as_str().len() + v.len()) as u64) + .map(|(k, v)| (2 + k.as_bytes().len() + v.len()) as u64) .sum::(); encoded_size