Skip to content

Commit

Permalink
Preliminary implementation of MIME type sniffing
Browse files Browse the repository at this point in the history
  • Loading branch information
PlamenHristov committed Jun 5, 2024
1 parent 36be8d6 commit 0ace442
Show file tree
Hide file tree
Showing 8 changed files with 338 additions and 13 deletions.
35 changes: 33 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ reqwest = { version = "^0.12", default-features = false, optional = true, featur
serde = { version = "^1", features = ["derive"], optional = true }
serde_json = { version = "^1", optional = true }
url = { version = "^2", optional = true }
#mime = "0.3.17"
mime = {git = "https://github.com/PlamenHristov/mime.git", rev = "c30e3db"}
mime_guess = "2.0.4"


[[example]]
name = "full_fs_exercise"
Expand Down
15 changes: 15 additions & 0 deletions src/filesystem/drive/directory_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub struct DirectoryEntry {
name: NodeName,
kind: NodeKind,

mime_type: Option<mime::MediaType>,

size: u64,
}

Expand Down Expand Up @@ -48,6 +50,17 @@ impl DirectoryEntry {
pub fn size(&self) -> u64 {
self.size
}

pub fn mime_type(&self) -> Option<mime::MediaType> {
match self.kind {
NodeKind::File => self.mime_type.clone(),
NodeKind::Directory => None,
NodeKind::AssociatedData => None,
NodeKind::InternalLink => None,
NodeKind::NativeMount => None,
NodeKind::Unknown(_) => None,
}
}
}

impl TryFrom<&Node> for DirectoryEntry {
Expand All @@ -63,6 +76,8 @@ impl TryFrom<&Node> for DirectoryEntry {
name: node.name().clone(),
kind: node.kind().clone(),

mime_type: node.mime_type(),

size: node.size(),
})
}
Expand Down
17 changes: 14 additions & 3 deletions src/filesystem/drive/directory_handle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use crate::codec::crypto::{AccessKey, SigningKey};
use crate::codec::data_storage::{data_chunk::DataChunk, DataBlock};
use crate::codec::filesystem::BlockKind;
use crate::filesystem::drive::{DirectoryEntry, InnerDrive, OperationError, WalkState};
use crate::filesystem::nodes::{Node, NodeData, NodeId, NodeName};
use crate::filesystem::nodes::{MetadataKey, MimeGuesser, Node, NodeData, NodeId, NodeName};
use crate::filesystem::{ContentLocation, ContentReference, FileContent, NodeBuilder};
use crate::stores::DataStore;

Expand Down Expand Up @@ -65,7 +65,7 @@ impl DirectoryHandle {
}

/// Changes the permission on the target node. Currently not implemented and changes are
/// expected to combine the [`FilePermissions`] with the [`crate::codec::filesystem::DirectoryPermissions`] all at once.
/// expected to combine the [`FilePermissions`] with the ::new(crate::codec::filesystem::DirectoryPermissions`] all at once.
pub async fn chmod(
&self,
_path: &[&str],
Expand Down Expand Up @@ -693,8 +693,19 @@ impl DirectoryHandle {

let mut inner_write = self.inner.write().await;
let node = inner_write.by_perm_id_mut(&new_permanent_id).await?;
let node_data = node.data_mut().await;
let mime_type = {
let node_name = node.name().clone();
MimeGuesser::default()
.with_name(node_name)
// .with_data(content_references.map())
.guess_mime_type()
};
if let Some(mime_type) = mime_type {
node.set_attribute(MetadataKey::MimeType, mime_type.to_string().into())
.await;
}

let node_data = node.data_mut().await;
let file_content =
FileContent::encrypted(locked_key, plaintext_cid, data_size, content_references);
*node_data = NodeData::full_file(file_content);
Expand Down
212 changes: 212 additions & 0 deletions src/filesystem/nodes/metadata/mime_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
use crate::prelude::nodes::NodeName;


#[derive(Default)]
pub struct MimeGuesser {
name: Option<String>,
data: Vec<u8>,
}

impl MimeGuesser {
const MP3_RATES: [u32; 15] = [
0, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000, 160000, 192000, 224000,
256000, 320000,
];

const MP25_RATES: [u32; 15] = [
0, 8000, 16000, 24000, 32000, 40000, 48000, 56000, 64000, 80000, 96000, 112000, 128000,
144000, 160000,
];

const SAMPLE_RATES: [u32; 3] = [44100, 48000, 32000];

pub fn with_name(mut self, name: NodeName) -> Self {
match name {
NodeName::Named(name) => self.name = Some(name.clone()),
NodeName::Root => {}
}
self
}

pub fn with_data(mut self, data: &[u8]) -> Self {
self.data.extend_from_slice(data);
self
}

pub fn guess_mime_type(&self) -> Option<mime::MediaType> {
self.pattern_match()
.or_else(|| self.algorithm_match())
.or_else(|| self.extension_match())
}

fn extension_match(&self) -> Option<mime::MediaType> {
let guess = mime_guess::get_mime_extensions_str(
self.name.as_ref().map_or("", |name| name.as_str()),
);
if let Some(guess) = guess {
return mime::MediaType::parse(*guess.first()?).ok();
}
None
}

fn pattern_match(&self) -> Option<mime::MediaType> {
let magic_bytes = &self.data.get(0..34)?;

// Taken from https://mimesniff.spec.whatwg.org/
match magic_bytes {
[0xFF, 0xD8, 0xFF, ..] => Some(mime::IMAGE_JPEG),
[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A] => Some(mime::IMAGE_PNG),
[0x47, 0x49, 0x46, 0x38, 0x37, 0x61, ..] | [0x47, 0x49, 0x46, 0x38, 0x39, 0x61, ..] => {
Some(mime::IMAGE_GIF)
}
[0x42, 0x4D, ..] => Some(mime::IMAGE_BMP),
[0x3C, 0x3F, 0x78, 0x6D, 0x6C, ..] => Some(mime::TEXT_XML),
[0x3C, 0x73, 0x76, 0x67, ..] => Some(mime::IMAGE_SVG),
[0x77, 0x4F, 0x46, 0x46, ..] => Some(mime::FONT_WOFF),
[0x77, 0x4F, 0x46, 0x32, ..] => Some(mime::FONT_WOFF2),
[0x25, 0x50, 0x44, 0x46, 0x2D, ..] => Some(mime::APPLICATION_PDF),
[0x7B, ..] => Some(mime::APPLICATION_JSON),
[0x46, 0x4F, 0x52, 0x4D, _, _, _, _, 0x41, 0x49, 0x46, 0x46, ..] => {
Some(mime::AUDIO_AIFF)
}
[0x49, 0x44, 0x33, ..] => Some(mime::AUDIO_MPEG),
[0x4F, 0x67, 0x67, 0x53, 0x00, ..] => Some(mime::AUDIO_OGG),
[0x4D, 0x54, 0x68, 0x64, 0x00, 0x00, 0x00, 0x06, ..] => Some(mime::AUDIO_MIDI),
[0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x41, 0x56, 0x49, 0x20, ..] => {
Some(mime::VIDEO_AVI)
}
[0x52, 0x49, 0x46, 0x46, _, _, _, _, 0x57, 0x41, 0x56, 0x45, ..] => {
Some(mime::AUDIO_WAVE)
}
[0x1F, 0x8B, 0x08, ..] => Some(mime::APPLICATION_GZIP),
[0x50, 0x4B, 0x03, 0x04, ..] => Some(mime::APPLICATION_ZIP),
[0x52, 0x61, 0x72, 0x20, 0x1A, 0x07, 0x00, ..] => Some(mime::APPLICATION_RAR),
[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4C, 0x50, ..] => {
Some(mime::APPLICATION_VND_MS_FONTOBJECT)
}
[0x00, 0x01, 0x00, 0x00, ..] => Some(mime::FONT_TTF),
[0x4F, 0x54, 0x54, 0x4F, ..] => Some(mime::FONT_OTF),
[0x74, 0x74, 0x63, 0x66, ..] => Some(mime::FONT_COLLECTION),
[0x25, 0x21, 0x50, 0x53, 0x2D, 0x41, 0x64, 0x6F, 0x62, 0x65, 0x2D, ..] => {
Some(mime::APPLICATION_POSTSCRIPT)
}
[0xFE, 0xFF, 0x00, 0x00, ..]
| [0xFF, 0xFE, 0x00, 0x00, ..]
| [0xEF, 0xBB, 0xBF, 0x00, ..] => Some(mime::TEXT_PLAIN),
// TODO: And the mask
[0x3C, 0x21, 0x44, 0x4F, 0x43, 0x54, 0x59, 0x50, 0x45, 0x20, 0x48, 0x54, 0x4D, 0x4C, ..]
| [0x3C, 0x53, 0x43, 0x52, 0x49, 0x50, 0x54, ..]
| [0x3C, 0x49, 0x46, 0x52, 0x41, 0x4D, 0x45, ..]
| [0x3C, 0x54, 0x41, 0x42, 0x4C, 0x45, ..]
| [0x3C, 0x53, 0x54, 0x59, 0x4C, 0x45, ..]
| [0x3C, 0x54, 0x49, 0x54, 0x4C, 0x45, ..]
| [0x3C, 0x48, 0x45, 0x41, 0x44, ..]
| [0x3C, 0x48, 0x54, 0x4D, 0x4C, ..]
| [0x3C, 0x46, 0x4F, 0x4E, 0x54, ..]
| [0x3C, 0x42, 0x4F, 0x44, 0x59, ..]
| [0x3C, 0x44, 0x49, 0x56, ..]
| [0x3C, 0x21, 0x2D, 0x2D, ..]
| [0x3C, 0x48, 0x31, ..]
| [0x3C, 0x42, 0x52, ..]
| [0x3C, 0x41, ..]
| [0x3C, 0x42, ..]
| [0x3C, 0x50, ..] => Some(mime::TEXT_HTML),
_ => None,
}
}

fn algorithm_match(&self) -> Option<mime::MediaType> {
if self.is_mp4() == Some(mime::AUDIO_MP4) {
return Some(mime::AUDIO_MP4);
}
if self.is_mp3() {
return Some(mime::AUDIO_MPEG);
}
None
}
fn is_mp4(&self) -> Option<mime::MediaType> {
let length = self.data.len();
if length < 12 {
return None;
}
let box_size = u32::from_be_bytes([self.data[0], self.data[1], self.data[2], self.data[3]]);
if length < box_size as usize || box_size % 4 != 0 {
return None;
}
if self.data[4..8] != [0x66, 0x74, 0x79, 0x70] {
return None;
}
if self.data[8..11] == [0x6D, 0x70, 0x34] {
return Some(mime::AUDIO_MP4);
}
let mut bytes_read = 16;
while bytes_read < box_size as usize {
if self.data[bytes_read..bytes_read + 3] == [0x6D, 0x70, 0x34] {
return Some(mime::AUDIO_MP4);
}
bytes_read += 4;
}
None
}

fn is_mp3(&self) -> bool {
let sequence = &self.data;
let length = sequence.len();
let mut s = 0;

if !match_mp3_header(sequence, s) {
return false;
}

let (version, bitrate_index, samplerate_index, pad) = parse_mp3_frame(sequence, s);
let bitrate = if version & 0x01 != 0 {
MimeGuesser::MP25_RATES[bitrate_index as usize]
} else {
MimeGuesser::MP3_RATES[bitrate_index as usize]
};
let sample_rate = MimeGuesser::SAMPLE_RATES[samplerate_index as usize];
let skipped_bytes = compute_mp3_frame_size(version, bitrate, sample_rate, pad);

if skipped_bytes < 4 || skipped_bytes > length - s {
return false;
}
s += skipped_bytes;

if !match_mp3_header(sequence, s) {
return false;
}

true
}
}

fn match_mp3_header(sequence: &[u8], s: usize) -> bool {
let length = sequence.len();
if length - s < 4 {
return false;
}

sequence[s] == 0xff
&& sequence[s + 1] & 0xe0 == 0xe0
&& (sequence[s + 1] & 0x06 >> 1) != 0
&& (sequence[s + 2] & 0xf0 >> 4) != 15
&& (sequence[s + 2] & 0x0c >> 2) != 3
&& (4 - (sequence[s + 1] & 0x06 >> 1)) == 3
}

fn parse_mp3_frame(sequence: &[u8], s: usize) -> (u8, u8, u8, u8) {
let version = sequence[s + 1] & 0x18 >> 3;
let bitrate_index = sequence[s + 2] & 0xf0 >> 4;
let samplerate_index = sequence[s + 2] & 0x0c >> 2;
let pad = sequence[s + 2] & 0x02 >> 1;
(version, bitrate_index, samplerate_index, pad)
}

fn compute_mp3_frame_size(version: u8, bitrate: u32, samplerate: u32, pad: u8) -> usize {
let scale = if version == 1 { 72 } else { 144 };
let mut size = (bitrate as usize * scale / samplerate as usize) as usize;
if pad != 0 {
size += 1;
}
size
}
Loading

0 comments on commit 0ace442

Please sign in to comment.