From 7538bf8f9b29013db73170d5f7e720e816237702 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Thu, 6 Jul 2023 11:53:33 -0400 Subject: [PATCH] Added comment. --- include/ada/percent_decoder.h | 1 - src/unicode.cpp | 11 ++++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/include/ada/percent_decoder.h b/include/ada/percent_decoder.h index 7ff26d045..14c28fd21 100644 --- a/include/ada/percent_decoder.h +++ b/include/ada/percent_decoder.h @@ -342,7 +342,6 @@ struct AVX2 { struct AVX512 { [[gnu::target("avx512f,avx512bw,avx512vbmi,avx512vbmi2")]] static size_t percent_decode(char const *in, size_t inputsize, char *out) { - const __m512i byte_plus = _mm512_set1_epi8('+'); const __m512i byte_percent = _mm512_set1_epi8('%'); diff --git a/src/unicode.cpp b/src/unicode.cpp index 459b6e61e..4ccdc8ebf 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -475,11 +475,16 @@ bool percent_encode(const std::string_view input, const uint8_t character_set[], bool to_ascii(std::optional& out, const std::string_view plain, size_t first_percent) { - std::string percent_decoded_buffer(plain.size(), 0); + std::string percent_decoded_buffer; std::string_view input = plain; if (first_percent != std::string_view::npos) { - size_t output_size = percent_decoder::percent_decode(plain.data(), plain.size(), percent_decoded_buffer.data()); - percent_decoded_buffer.resize(output_size); + // The indeed code does full percent decoding (over all of plain), + // but it seems more suitable to skip over the prefix before the first "%" + // since in many cases, we might expect percent encoding to be sparse. + percent_decoded_buffer.assign(input.substr(0, first_percent)); + percent_decoded_buffer.resize(plain.size()); + size_t output_size = percent_decoder::percent_decode(plain.data() + first_percent, plain.size() - first_percent, percent_decoded_buffer.data() + first_percent); + percent_decoded_buffer.resize(output_size + first_percent); input = percent_decoded_buffer; } // input is a non-empty UTF-8 string, must be percent decoded