Skip to content

Commit

Permalink
Addressed some comments
Browse files Browse the repository at this point in the history
  • Loading branch information
PlamenHristov committed Jun 7, 2024
1 parent a956ee0 commit 238d51b
Show file tree
Hide file tree
Showing 4 changed files with 206 additions and 78 deletions.
132 changes: 130 additions & 2 deletions src/filesystem/drive/directory_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -775,7 +775,6 @@ mod test {
use super::*;
use crate::filesystem::drive::inner::test::build_interesting_inner;
use crate::prelude::MemoryDataStore;

#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
async fn mv_dir_from_dir_to_cwd_specify_name() {
Expand Down Expand Up @@ -967,7 +966,7 @@ mod test {
}
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
async fn write_file_with_html_tags() {
async fn sniff_html_mime_type() {
let mut rng = crate::utils::crypto_rng();
let current_key = SigningKey::generate(&mut rng);
let mut handle = interesting_handle(Some(current_key)).await;
Expand Down Expand Up @@ -1012,4 +1011,133 @@ mod test {
assert_eq!(mime_type, "text/html");
}
}

#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
async fn sniff_mp3_file_mime_type() {
let mut rng = crate::utils::crypto_rng();
let current_key = SigningKey::generate(&mut rng);
let mut handle = interesting_handle(Some(current_key)).await;
let mut store = MemoryDataStore::default();
let mp3_test_case: &[u8] = &[
0x49, 0x44, 0x33, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x54, 0x53, 0x53, 0x45,
0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x03, 0x4c, 0x61, 0x76, 0x66, 0x36, 0x30, 0x2e,
0x33, 0x2e, 0x31, 0x30, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0xff, 0xfb, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
];
let file_name = "the_audio.mp4";
handle
.write(&mut rng, &mut store, &[file_name], mp3_test_case)
.await
.unwrap();

let cwd_ls = handle.ls(&[]).await.unwrap();
assert_eq!(
cwd_ls
.iter()
.filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.count(),
1
);

let file_entry = cwd_ls
.iter()
.find(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.unwrap();

assert_eq!(file_entry.kind(), NodeKind::File);

let file_data = handle.read(&mut store, &[file_name]).await.unwrap();
assert_eq!(file_data.as_slice(), mp3_test_case);

let mime_type = file_entry.mime_type().unwrap();
assert_eq!(mime_type, "audio/mpeg");
}

#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
async fn sniff_mp4_file_mime_type() {
let mut rng = crate::utils::crypto_rng();
let current_key = SigningKey::generate(&mut rng);
let mut handle = interesting_handle(Some(current_key)).await;
let mut store = MemoryDataStore::default();
let mp4_test_case: &[u8] = &[
0x00, 0x00, 0x00, 0x1c, 0x66, 0x74, 0x79, 0x70, 0x69, 0x73, 0x6f, 0x6d, 0x00, 0x00,
0x02, 0x00, 0x69, 0x73, 0x6f, 0x6d, 0x69, 0x73, 0x6f, 0x32, 0x6d, 0x70, 0x34, 0x31,
0x00, 0x00, 0x00, 0x08,
];
let file_name = "the_audio.mp3";
handle
.write(&mut rng, &mut store, &[file_name], mp4_test_case)
.await
.unwrap();

let cwd_ls = handle.ls(&[]).await.unwrap();
assert_eq!(
cwd_ls
.iter()
.filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.count(),
1
);

let file_entry = cwd_ls
.iter()
.find(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.unwrap();

assert_eq!(file_entry.kind(), NodeKind::File);

let file_data = handle.read(&mut store, &[file_name]).await.unwrap();
assert_eq!(file_data.as_slice(), mp4_test_case);

let mime_type = file_entry.mime_type().unwrap();
assert_eq!(mime_type, "video/mp4");
}

#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)]
#[cfg_attr(not(target_arch = "wasm32"), tokio::test)]
async fn sniff_webm_file_mime_type() {
let mut rng = crate::utils::crypto_rng();
let current_key = SigningKey::generate(&mut rng);
let mut handle = interesting_handle(Some(current_key)).await;
let mut store = MemoryDataStore::default();
let mp4_test_case: &[u8] = &[
0x1a, 0x45, 0xdf, 0xa3, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1f, 0x42, 0x86,
0x81, 0x01, 0x42, 0xf7, 0x81, 0x01, 0x42, 0xf2, 0x81, 0x04, 0x42, 0xf3, 0x81, 0x08,
0x42, 0x82, 0x84, 0x77, 0x65, 0x62, 0x6d, 0x42, 0x87, 0x81, 0x02, 0x42, 0x85, 0x81,
0x02, 0x18, 0x53, 0x80, 0x67, 0x01, 0x00, 0x00, 0x00, 0x00, 0x0d, 0xc0, 0x0a, 0x11,
0x4d, 0x9b, 0x74, 0x40, 0x3c, 0x4d, 0xbb, 0x8b, 0x53, 0xab, 0x84, 0x15, 0x49, 0xa9,
0x66, 0x53, 0xac, 0x81, 0xe5, 0x4d, 0xbb, 0x8c, 0x53, 0xab,
];
let file_name = "the_audio.mp4";
handle
.write(&mut rng, &mut store, &[file_name], mp4_test_case)
.await
.unwrap();

let cwd_ls = handle.ls(&[]).await.unwrap();
assert_eq!(
cwd_ls
.iter()
.filter(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.count(),
1
);

let file_entry = cwd_ls
.iter()
.find(|entry| entry.name() == NodeName::try_from(file_name).unwrap())
.unwrap();

assert_eq!(file_entry.kind(), NodeKind::File);

let file_data = handle.read(&mut store, &[file_name]).await.unwrap();
assert_eq!(file_data.as_slice(), mp4_test_case);

let mime_type = file_entry.mime_type().unwrap();
assert_eq!(mime_type, "video/webm");
}
}
118 changes: 50 additions & 68 deletions src/filesystem/nodes/metadata/mime_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,6 @@ pub struct MimeGuesser {
}

impl MimeGuesser {
const MP3_RATES: [u32; 15] = [
0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000,
256000, 320000,
];

const MP25_RATES: [u32; 15] = [
0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
144000, 160000,
];

const SAMPLE_RATES: [u32; 3] = [44100, 48000, 32000];

pub fn with_name(mut self, name: NodeName) -> Self {
match name {
NodeName::Named(name) => self.name = Some(name.clone()),
Expand Down Expand Up @@ -161,11 +149,12 @@ impl MimeGuesser {

fn algorithm_match(&self) -> Option<mime::MediaType> {
if self.is_mp4() {
return Some(mime::AUDIO_MP4);
return Some(mime::VIDEO_MP4);
}
if self.is_mp3() {
return Some(mime::AUDIO_MPEG);
if self.is_webm() {
return Some(mime::VIDEO_WEBM);
}

None
}

Expand All @@ -178,75 +167,68 @@ impl MimeGuesser {
if data.len() < box_size as usize || box_size % 4 != 0 {
return false;
}
if &data[4..8] != b"ftyp" {
return false;
}
if &data[8..11] == b"mp4" {
return true;
}
data[16..]
.chunks_exact(4)
.take_while(|chunk| &chunk[..3] != b"mp4")
.last()
.map_or(false, |chunk| &chunk[..3] == b"mp4")
}

fn is_mp3(&self) -> bool {
data.get(4..8) == Some(b"ftyp")
&& (data.get(8..11) == Some(b"mp4")
|| data[16..]
.chunks_exact(4)
.any(|chunk| chunk.starts_with(b"mp4")))
}
fn is_webm(&self) -> bool {
let data = &self.data;
let mut offset = 0;

if !match_mp3_header(data, offset) {
if data.len() < 4 || data[..4] != [0x1A, 0x45, 0xDF, 0xA3] {
return false;
}

let (version, bitrate_index, sample_rate_index, pad) = parse_mp3_frame(data, offset);
let bitrate = if version & 0x01 != 0 {
Self::MP25_RATES[bitrate_index as usize]
} else {
Self::MP3_RATES[bitrate_index as usize]
};
let sample_rate = Self::SAMPLE_RATES[sample_rate_index as usize];
let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad);
let skip_first_bytes = 4;
let chunk_size = 2;
let magic_bytes_delim = [0x42, 0x82];
for (chunk_idx, chunk) in data[skip_first_bytes..].chunks(chunk_size).enumerate() {
// went over 4 + 2 * 17 = 38 bytes
if chunk_idx >= 17 {
break;
}

if skipped_bytes < 4 || skipped_bytes > data.len() - offset {
return false;
if chunk != magic_bytes_delim {
continue;
}

let offset = skip_first_bytes + chunk_idx * chunk_size + magic_bytes_delim.len();
if let Some((_, number_size)) = data.get(offset..).map(|d| parse_vint(d, 0)) {
let start = offset + number_size;
let end = start + 4;
if data.get(start..end) == Some(b"webm") {
return true;
}
}
}
offset += skipped_bytes;

match_mp3_header(data, offset)
false
}
}

fn match_mp3_header(sequence: &[u8], s: usize) -> bool {
let length = sequence.len();
if length - s < 4 {
return false;
fn parse_vint(data: &[u8], offset: usize) -> (usize, usize) {
let mut mask = 128;
let max_vint_length = 8;
let mut number_size = 1;

while number_size < max_vint_length
&& data.get(offset).is_none()
&& (data.get(offset).unwrap() & mask == 0)
{
mask >>= 1;
number_size += 1;
}

sequence[s] == 0xff
&& sequence[s + 1] & 0xe0 == 0xe0
&& (sequence[s + 1] & 0x06 >> 1) != 0
&& (sequence[s + 2] & 0xf0 >> 4) != 15
&& (sequence[s + 2] & 0x0c >> 2) != 3
&& (4 - (sequence[s + 1] & 0x06 >> 1)) == 3
}

fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) {
let version = sequence[s + 1] & 0x18 >> 3;
let bitrate_index = sequence[s + 2] & 0xf0 >> 4;
let sample_rate_index = sequence[s + 2] & 0x0c >> 2;
let pad = sequence[s + 2] & 0x02 >> 1;
(version, bitrate_index, sample_rate_index, pad)
}
let mut parsed_number = data.get(offset).map_or(0, |&b| (b & !mask) as usize);

fn compute_mp3_frame_size(version: u8, bitrate: u32, sample_rate: u32, pad: u8) -> usize {
let scale = if version == 1 { 72 } else { 144 };
let mut size = bitrate * scale / sample_rate;
if pad != 0 {
size += 1;
for &b in data.get(offset + 1..offset + number_size).unwrap_or(&[]) {
parsed_number = (parsed_number << 8) | b as usize;
}
size as usize

(parsed_number, number_size)
}

fn is_whitespace_or_tag_terminator(byte: u8) -> bool {
byte == b' ' || byte == b'>'
}
28 changes: 23 additions & 5 deletions src/filesystem/nodes/metadata/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,37 @@ pub use mime_type::MimeGuesser;
#[derive(Hash, Eq, PartialEq, Debug)]
pub enum MetadataKey {
MimeType,
Custom(String),
}

impl MetadataKey {
pub fn as_str(&self) -> &'static str {
pub fn as_str(&self) -> &str {
match self {
MetadataKey::MimeType => "mime",
MetadataKey::Custom(s) => s.as_str(),
}
}

pub fn as_bytes(&self) -> &'static [u8] {
pub fn as_bytes(&self) -> Vec<u8> {
match self {
MetadataKey::MimeType => b"mime",
MetadataKey::MimeType => b"mime".to_vec(),
MetadataKey::Custom(s) => s.as_bytes().to_vec(),
}
}

pub fn from_bytes(key: &[u8]) -> Option<Self> {
match key {
b"mime" => Some(MetadataKey::MimeType),
_ => None,
_ => {
if key.len() > 255 {
return None;
}

match std::str::from_utf8(key) {
Ok(s) => Some(MetadataKey::Custom(s.to_string())),
Err(_) => None,
}
}
}
}
}
Expand All @@ -35,7 +47,13 @@ impl FromStr for MetadataKey {
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s {
"mime" => Ok(MetadataKey::MimeType),
_ => Err(winnow::error::ErrorKind::Token),
_ => {
if s.len() > 255 {
return Err(winnow::error::ErrorKind::Verify);
}

Ok(MetadataKey::Custom(s.to_string()))
}
}
}
}
6 changes: 3 additions & 3 deletions src/filesystem/nodes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ impl Node {
node_data.write_all(&[entry_count]).await?;

let mut sorted_metadata = self.metadata.iter().collect::<Vec<_>>();
sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
sorted_metadata.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(&b.as_bytes()));

for (key, val) in sorted_metadata.into_iter() {
let key_bytes = key.as_bytes();
Expand All @@ -207,7 +207,7 @@ impl Node {
}

node_data.write_all(&[key_bytes_len as u8]).await?;
node_data.write_all(key_bytes).await?;
node_data.write_all(&key_bytes).await?;

let val_bytes_len = val.len();
if val_bytes_len > u8::MAX as usize {
Expand Down Expand Up @@ -296,7 +296,7 @@ impl Node {
encoded_size += self
.metadata()
.iter()
.map(|(k, v)| (2 + k.as_str().len() + v.len()) as u64)
.map(|(k, v)| (2 + k.as_bytes().len() + v.len()) as u64)
.sum::<u64>();

encoded_size
Expand Down

0 comments on commit 238d51b

Please sign in to comment.