LibTextCodec+LibWeb: Move isomorphic coders to LibTextCodec

This will be used outside of LibWeb.
This commit is contained in:
Timothy Flynn
2025-11-24 12:20:51 -05:00
committed by Jelle Raaijmakers
parent 0480934afb
commit 0fd80a8f99
Notes: github-actions[bot] 2025-11-27 13:59:06 +00:00
17 changed files with 65 additions and 64 deletions

View File

@@ -1299,4 +1299,18 @@ ErrorOr<void> ReplacementDecoder::process(StringView input, Function<ErrorOr<voi
return {};
}
// https://infra.spec.whatwg.org/#isomorphic-decode
String isomorphic_decode(StringView input)
{
// To isomorphic decode a byte sequence input, return a string whose code point length is equal to inputs length
// and whose code points have the same values as the values of inputs bytes, in the same order.
// NB: This is essentially spec-speak for "Decode as ISO-8859-1 / Latin-1".
StringBuilder builder(input.length());
for (auto byte : input.bytes())
builder.append_code_point(byte);
return builder.to_string_without_validation();
}
}

View File

@@ -135,4 +135,6 @@ TEXTCODEC_API ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_t
TEXTCODEC_API StringView get_output_encoding(StringView encoding);
TEXTCODEC_API String isomorphic_decode(StringView);
}

View File

@@ -667,4 +667,23 @@ ErrorOr<void> SingleByteEncoder<ArrayType>::process(Utf8View input, Function<Err
return {};
}
// https://infra.spec.whatwg.org/#isomorphic-encode
ByteString isomorphic_encode(StringView input)
{
// To isomorphic encode an isomorphic string input: return a byte sequence whose length is equal to inputs code
// point length and whose bytes have the same values as the values of inputs code points, in the same order.
// NB: This is essentially spec-speak for "Encode as ISO-8859-1 / Latin-1".
StringBuilder builder(input.length());
for (auto code_point : Utf8View { input }) {
// VERIFY(code_point <= 0xFF);
if (code_point > 0xFF)
dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF.");
builder.append(static_cast<u8>(code_point));
}
return builder.to_byte_string();
}
}

View File

@@ -90,4 +90,6 @@ private:
TEXTCODEC_API Optional<Encoder&> encoder_for_exact_name(StringView encoding);
TEXTCODEC_API Optional<Encoder&> encoder_for(StringView label);
TEXTCODEC_API ByteString isomorphic_encode(StringView);
}

View File

@@ -6,6 +6,7 @@
#include <AK/GenericLexer.h>
#include <AK/String.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/ContentSecurityPolicy/Directives/DirectiveFactory.h>
#include <LibWeb/ContentSecurityPolicy/Directives/SerializedDirective.h>
#include <LibWeb/ContentSecurityPolicy/Policy.h>
@@ -14,7 +15,6 @@
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
#include <LibWeb/Infra/CharacterTypes.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::ContentSecurityPolicy {
@@ -31,7 +31,7 @@ GC::Ref<Policy> Policy::parse_a_serialized_csp(GC::Heap& heap, Variant<ByteStrin
// 1. If serialized is a byte sequence, then set serialized to be the result of isomorphic decoding serialized.
auto serialized_string = serialized.has<String>()
? serialized.get<String>()
: Infra::isomorphic_decode(serialized.get<ByteString>());
: TextCodec::isomorphic_decode(serialized.get<ByteString>());
// 2. Let policy be a new policy with an empty directive set, a source of source, and a disposition of disposition.
auto policy = heap.allocate<Policy>();

View File

@@ -23,6 +23,7 @@
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/FunctionObject.h>
#include <LibJS/Runtime/NativeFunction.h>
#include <LibTextCodec/Decoder.h>
#include <LibURL/Origin.h>
#include <LibURL/Parser.h>
#include <LibUnicode/Segmenter.h>
@@ -425,7 +426,7 @@ WebIDL::ExceptionOr<GC::Ref<Document>> Document::create_and_initialize(Type type
// 15. If navigationParams's response has a `Refresh` header, then:
if (auto maybe_refresh = navigation_params.response->header_list()->get("Refresh"sv); maybe_refresh.has_value()) {
// 1. Let value be the isomorphic decoding of the value of the header.
auto value = Infra::isomorphic_decode(maybe_refresh.value());
auto value = TextCodec::isomorphic_decode(maybe_refresh.value());
// 2. Run the shared declarative refresh steps with document and value.
document->shared_declarative_refresh_steps(value, nullptr);

View File

@@ -24,7 +24,6 @@
#include <LibWeb/FileAPI/File.h>
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
#include <LibWeb/Infra/JSON.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/MimeSniff/MimeType.h>
#include <LibWeb/Streams/ReadableStream.h>
#include <LibWeb/WebIDL/Promise.h>
@@ -353,7 +352,7 @@ static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_dat
header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);
// 3. Set contentType to the isomorphic decoding of header value.
header.content_type = Infra::isomorphic_decode(header_value.bytes());
header.content_type = TextCodec::isomorphic_decode(header_value);
}
// -> Otherwise
else {

View File

@@ -14,6 +14,7 @@
#include <AK/ScopeGuard.h>
#include <LibJS/Runtime/Completion.h>
#include <LibRequests/RequestTimingInfo.h>
#include <LibTextCodec/Encoder.h>
#include <LibWeb/Bindings/MainThreadVM.h>
#include <LibWeb/Bindings/PrincipalHostDefined.h>
#include <LibWeb/ContentSecurityPolicy/BlockingAlgorithms.h>
@@ -51,7 +52,6 @@
#include <LibWeb/HTML/Window.h>
#include <LibWeb/HTML/WorkerGlobalScope.h>
#include <LibWeb/HighResolutionTime/TimeOrigin.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Loader/LoadRequest.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/MixedContent/AbstractOperations.h>
@@ -1831,7 +1831,7 @@ GC::Ref<PendingResponse> http_network_or_cache_fetch(JS::Realm& realm, Infrastru
// 11. If httpRequests referrer is a URL, then:
if (auto const* referrer_url = http_request->referrer().get_pointer<URL::URL>()) {
// 1. Let referrerValue be httpRequests referrer, serialized and isomorphic encoded.
auto referrer_value = Infra::isomorphic_encode(referrer_url->serialize());
auto referrer_value = TextCodec::isomorphic_encode(referrer_url->serialize());
// 2. Append (`Referer`, referrerValue) to httpRequests header list.
http_request->header_list()->append({ "Referer"sv, move(referrer_value) });

View File

@@ -6,10 +6,10 @@
#include <LibJS/Runtime/Completion.h>
#include <LibJS/Runtime/VM.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/HeadersPrototype.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/Fetch/Headers.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::Fetch {
@@ -102,7 +102,7 @@ WebIDL::ExceptionOr<Optional<String>> Headers::get(String const& name)
// 2. Return the result of getting name from thiss header list.
auto byte_buffer = m_header_list->get(name);
return byte_buffer.has_value() ? Infra::isomorphic_decode(*byte_buffer) : Optional<String> {};
return byte_buffer.has_value() ? TextCodec::isomorphic_decode(*byte_buffer) : Optional<String> {};
}
// https://fetch.spec.whatwg.org/#dom-headers-getsetcookie
@@ -119,7 +119,7 @@ Vector<String> Headers::get_set_cookie()
// `Set-Cookie`, in order.
for (auto const& header : *m_header_list) {
if (header.name.equals_ignoring_ascii_case("Set-Cookie"sv))
values.append(Infra::isomorphic_decode(header.value));
values.append(TextCodec::isomorphic_decode(header.value));
}
return values;
}
@@ -187,7 +187,7 @@ JS::ThrowCompletionOr<void> Headers::for_each(ForEachCallback callback)
auto const& pair = pairs[i];
// 2. Invoke idlCallback with « pairs value, pairs key, idlObject » and with thisArg as the callback this value.
TRY(callback(Infra::isomorphic_decode(pair.name), Infra::isomorphic_decode(pair.value)));
TRY(callback(TextCodec::isomorphic_decode(pair.name), TextCodec::isomorphic_decode(pair.value)));
// 3. Set pairs to idlObjects current list of value pairs to iterate over. (It might have changed.)
pairs = value_pairs_to_iterate_over();

View File

@@ -6,10 +6,10 @@
#include <LibJS/Runtime/Array.h>
#include <LibJS/Runtime/Iterator.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/Bindings/HeadersIteratorPrototype.h>
#include <LibWeb/Bindings/Intrinsics.h>
#include <LibWeb/Fetch/HeadersIterator.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::Bindings {
@@ -66,8 +66,8 @@ GC::Ref<JS::Object> HeadersIterator::next()
return create_iterator_result_object(vm(), JS::js_undefined(), true);
auto const& pair = pairs[m_index++];
auto pair_name = Infra::isomorphic_decode(pair.name);
auto pair_value = Infra::isomorphic_decode(pair.value);
auto pair_name = TextCodec::isomorphic_decode(pair.name);
auto pair_value = TextCodec::isomorphic_decode(pair.value);
switch (m_iteration_kind) {
case JS::Object::PropertyKind::Key:

View File

@@ -14,10 +14,10 @@
#include <LibJS/Runtime/VM.h>
#include <LibRegex/Regex.h>
#include <LibTextCodec/Decoder.h>
#include <LibTextCodec/Encoder.h>
#include <LibWeb/Fetch/Infrastructure/HTTP.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Methods.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/MimeSniff/MimeType.h>
@@ -28,8 +28,8 @@ GC_DEFINE_ALLOCATOR(HeaderList);
Header Header::isomorphic_encode(StringView name, StringView value)
{
return {
.name = Infra::isomorphic_encode(name),
.value = Infra::isomorphic_encode(value),
.name = TextCodec::isomorphic_encode(name),
.value = TextCodec::isomorphic_encode(value),
};
}
@@ -523,7 +523,7 @@ Vector<String> get_decode_and_split_header_value(StringView value)
// To get, decode, and split a header value value, run these steps:
// 1. Let input be the result of isomorphic decoding value.
auto input = Infra::isomorphic_decode(value);
auto input = TextCodec::isomorphic_decode(value);
// 2. Let position be a position variable for input, initially pointing at the start of input.
GenericLexer lexer { input };
@@ -612,7 +612,7 @@ ByteString build_content_range(u64 range_start, u64 range_end, u64 full_length)
Optional<RangeHeaderValue> parse_single_range_header_value(StringView const value, bool const allow_whitespace)
{
// 1. Let data be the isomorphic decoding of value.
auto const data = Infra::isomorphic_decode(value);
auto const data = TextCodec::isomorphic_decode(value);
// 2. If data does not start with "bytes", then return failure.
if (!data.starts_with_bytes("bytes"sv))
@@ -709,7 +709,7 @@ bool is_cors_safelisted_request_header(Header const& header)
return false;
// 2. Let mimeType be the result of parsing the result of isomorphic decoding value.
auto decoded = Infra::isomorphic_decode(value);
auto decoded = TextCodec::isomorphic_decode(value);
auto mime_type = MimeSniff::MimeType::parse(decoded);
// 3. If mimeType is failure, then return false.

View File

@@ -7,6 +7,7 @@
#include <AK/Array.h>
#include <LibGC/Heap.h>
#include <LibJS/Runtime/Realm.h>
#include <LibTextCodec/Encoder.h>
#include <LibWeb/ContentSecurityPolicy/Directives/Names.h>
#include <LibWeb/ContentSecurityPolicy/PolicyList.h>
#include <LibWeb/ContentSecurityPolicy/Violation.h>
@@ -14,7 +15,6 @@
#include <LibWeb/Fetch/Fetching/PendingResponse.h>
#include <LibWeb/Fetch/Infrastructure/HTTP/Requests.h>
#include <LibWeb/HTML/TraversableNavigable.h>
#include <LibWeb/Infra/Strings.h>
namespace Web::Fetch::Infrastructure {
@@ -217,7 +217,7 @@ ByteString Request::byte_serialize_origin() const
{
// Byte-serializing a request origin, given a request request, is to return the result of serializing a request
// origin with request, isomorphic encoded.
return Infra::isomorphic_encode(serialize_origin());
return TextCodec::isomorphic_encode(serialize_origin());
}
// https://fetch.spec.whatwg.org/#concept-request-clone

View File

@@ -7,10 +7,10 @@
*/
#include <AK/Base64.h>
#include <LibTextCodec/Decoder.h>
#include <LibURL/URL.h>
#include <LibWeb/Export.h>
#include <LibWeb/Fetch/Infrastructure/URL.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/MimeSniff/MimeType.h>
namespace Web::Fetch::Infrastructure {
@@ -77,7 +77,7 @@ ErrorOr<DataURL> process_data_url(URL::URL const& data_url)
trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right);
if (trimmed_substring_view.ends_with(';')) {
// 1. Let stringBody be the isomorphic decode of body.
auto string_body = Infra::isomorphic_decode(body);
auto string_body = TextCodec::isomorphic_decode(body);
// 2. Set body to the forgiving-base64 decode of stringBody.
// 3. If body is failure, then return failure.

View File

@@ -13,13 +13,13 @@
#include <AK/Vector.h>
#include <LibGC/Function.h>
#include <LibJS/Runtime/NativeFunction.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/HTML/PromiseRejectionEvent.h>
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
#include <LibWeb/HTML/StructuredSerialize.h>
#include <LibWeb/HTML/StructuredSerializeOptions.h>
#include <LibWeb/HTML/UniversalGlobalScope.h>
#include <LibWeb/HTML/Window.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/WebIDL/AbstractOperations.h>
#include <LibWeb/WebIDL/DOMException.h>
#include <LibWeb/WebIDL/ExceptionOr.h>
@@ -71,7 +71,7 @@ WebIDL::ExceptionOr<String> UniversalGlobalScopeMixin::atob(String const& data)
// 3. Return decodedData.
// decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use isomorphic decoding to convert bytes to UTF-8.
return Infra::isomorphic_decode(decoded_data.value());
return TextCodec::isomorphic_decode(decoded_data.value());
}
// https://html.spec.whatwg.org/multipage/timers-and-user-prompts.html#dom-queuemicrotask

View File

@@ -156,39 +156,6 @@ ErrorOr<String> convert_to_scalar_value_string(StringView string)
return scalar_value_builder.to_string();
}
// https://infra.spec.whatwg.org/#isomorphic-encode
ByteString isomorphic_encode(StringView input)
{
// To isomorphic encode an isomorphic string input: return a byte sequence whose length is equal to inputs code
// point length and whose bytes have the same values as the values of inputs code points, in the same order.
// NB: This is essentially spec-speak for "Encode as ISO-8859-1 / Latin-1".
StringBuilder builder(input.length());
for (auto code_point : Utf8View { input }) {
// VERIFY(code_point <= 0xFF);
if (code_point > 0xFF)
dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF.");
builder.append(static_cast<u8>(code_point));
}
return builder.to_byte_string();
}
// https://infra.spec.whatwg.org/#isomorphic-decode
String isomorphic_decode(StringView input)
{
// To isomorphic decode a byte sequence input, return a string whose code point length is equal to inputs length
// and whose code points have the same values as the values of inputs bytes, in the same order.
// NB: This is essentially spec-speak for "Decode as ISO-8859-1 / Latin-1".
StringBuilder builder(input.length());
for (auto byte : input.bytes())
builder.append_code_point(byte);
return builder.to_string_without_validation();
}
// https://infra.spec.whatwg.org/#code-unit-less-than
bool code_unit_less_than(StringView a, StringView b)
{

View File

@@ -20,8 +20,6 @@ WEB_API ErrorOr<String> strip_and_collapse_whitespace(StringView string);
Utf16String strip_and_collapse_whitespace(Utf16String const& string);
WEB_API bool is_code_unit_prefix(StringView potential_prefix, StringView input);
WEB_API ErrorOr<String> convert_to_scalar_value_string(StringView string);
ByteString isomorphic_encode(StringView input);
WEB_API String isomorphic_decode(StringView input);
bool code_unit_less_than(StringView a, StringView b);
}

View File

@@ -43,7 +43,6 @@
#include <LibWeb/HTML/Window.h>
#include <LibWeb/Infra/ByteSequences.h>
#include <LibWeb/Infra/JSON.h>
#include <LibWeb/Infra/Strings.h>
#include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/Page/Page.h>
#include <LibWeb/Platform/EventLoopPlugin.h>
@@ -968,7 +967,7 @@ Optional<String> XMLHttpRequest::get_response_header(String const& name) const
return {};
// FIXME: The spec doesn't mention isomorphic decode. Spec bug?
return Infra::isomorphic_decode(header_bytes->bytes());
return TextCodec::isomorphic_decode(*header_bytes);
}
// https://xhr.spec.whatwg.org/#legacy-uppercased-byte-less-than
@@ -1002,7 +1001,7 @@ String XMLHttpRequest::get_all_response_headers() const
// 4. For each header in headers, append headers name, followed by a 0x3A 0x20 byte pair, followed by headers value, followed by a 0x0D 0x0A byte pair, to output.
for (auto const& header : initial_headers) {
// FIXME: The spec does not mention isomorphic decode. Spec bug?
output.appendff("{}: {}\r\n", header.name, Infra::isomorphic_decode(header.value));
output.appendff("{}: {}\r\n", header.name, TextCodec::isomorphic_decode(header.value));
}
// 5. Return output.