mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2025-12-05 01:10:24 +00:00
LibTextCodec+LibWeb: Move isomorphic coders to LibTextCodec
This will be used outside of LibWeb.
This commit is contained in:
committed by
Jelle Raaijmakers
parent
0480934afb
commit
0fd80a8f99
Notes:
github-actions[bot]
2025-11-27 13:59:06 +00:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/0fd80a8f992 Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/6944 Reviewed-by: https://github.com/gmta ✅
@@ -1299,4 +1299,18 @@ ErrorOr<void> ReplacementDecoder::process(StringView input, Function<ErrorOr<voi
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#isomorphic-decode
|
||||
String isomorphic_decode(StringView input)
|
||||
{
|
||||
// To isomorphic decode a byte sequence input, return a string whose code point length is equal to input’s length
|
||||
// and whose code points have the same values as the values of input’s bytes, in the same order.
|
||||
// NB: This is essentially spec-speak for "Decode as ISO-8859-1 / Latin-1".
|
||||
StringBuilder builder(input.length());
|
||||
|
||||
for (auto byte : input.bytes())
|
||||
builder.append_code_point(byte);
|
||||
|
||||
return builder.to_string_without_validation();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -135,4 +135,6 @@ TEXTCODEC_API ErrorOr<String> convert_input_to_utf8_using_given_decoder_unless_t
|
||||
|
||||
TEXTCODEC_API StringView get_output_encoding(StringView encoding);
|
||||
|
||||
TEXTCODEC_API String isomorphic_decode(StringView);
|
||||
|
||||
}
|
||||
|
||||
@@ -667,4 +667,23 @@ ErrorOr<void> SingleByteEncoder<ArrayType>::process(Utf8View input, Function<Err
|
||||
return {};
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#isomorphic-encode
|
||||
ByteString isomorphic_encode(StringView input)
|
||||
{
|
||||
// To isomorphic encode an isomorphic string input: return a byte sequence whose length is equal to input’s code
|
||||
// point length and whose bytes have the same values as the values of input’s code points, in the same order.
|
||||
// NB: This is essentially spec-speak for "Encode as ISO-8859-1 / Latin-1".
|
||||
StringBuilder builder(input.length());
|
||||
|
||||
for (auto code_point : Utf8View { input }) {
|
||||
// VERIFY(code_point <= 0xFF);
|
||||
if (code_point > 0xFF)
|
||||
dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF.");
|
||||
|
||||
builder.append(static_cast<u8>(code_point));
|
||||
}
|
||||
|
||||
return builder.to_byte_string();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -90,4 +90,6 @@ private:
|
||||
TEXTCODEC_API Optional<Encoder&> encoder_for_exact_name(StringView encoding);
|
||||
TEXTCODEC_API Optional<Encoder&> encoder_for(StringView label);
|
||||
|
||||
TEXTCODEC_API ByteString isomorphic_encode(StringView);
|
||||
|
||||
}
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/String.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/Directives/DirectiveFactory.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/Directives/SerializedDirective.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/Policy.h>
|
||||
@@ -14,7 +15,6 @@
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP/Responses.h>
|
||||
#include <LibWeb/Infra/CharacterTypes.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
|
||||
namespace Web::ContentSecurityPolicy {
|
||||
|
||||
@@ -31,7 +31,7 @@ GC::Ref<Policy> Policy::parse_a_serialized_csp(GC::Heap& heap, Variant<ByteStrin
|
||||
// 1. If serialized is a byte sequence, then set serialized to be the result of isomorphic decoding serialized.
|
||||
auto serialized_string = serialized.has<String>()
|
||||
? serialized.get<String>()
|
||||
: Infra::isomorphic_decode(serialized.get<ByteString>());
|
||||
: TextCodec::isomorphic_decode(serialized.get<ByteString>());
|
||||
|
||||
// 2. Let policy be a new policy with an empty directive set, a source of source, and a disposition of disposition.
|
||||
auto policy = heap.allocate<Policy>();
|
||||
|
||||
@@ -23,6 +23,7 @@
|
||||
#include <LibJS/Runtime/Array.h>
|
||||
#include <LibJS/Runtime/FunctionObject.h>
|
||||
#include <LibJS/Runtime/NativeFunction.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibURL/Origin.h>
|
||||
#include <LibURL/Parser.h>
|
||||
#include <LibUnicode/Segmenter.h>
|
||||
@@ -425,7 +426,7 @@ WebIDL::ExceptionOr<GC::Ref<Document>> Document::create_and_initialize(Type type
|
||||
// 15. If navigationParams's response has a `Refresh` header, then:
|
||||
if (auto maybe_refresh = navigation_params.response->header_list()->get("Refresh"sv); maybe_refresh.has_value()) {
|
||||
// 1. Let value be the isomorphic decoding of the value of the header.
|
||||
auto value = Infra::isomorphic_decode(maybe_refresh.value());
|
||||
auto value = TextCodec::isomorphic_decode(maybe_refresh.value());
|
||||
|
||||
// 2. Run the shared declarative refresh steps with document and value.
|
||||
document->shared_declarative_refresh_steps(value, nullptr);
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include <LibWeb/FileAPI/File.h>
|
||||
#include <LibWeb/HTML/Scripting/TemporaryExecutionContext.h>
|
||||
#include <LibWeb/Infra/JSON.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/MimeSniff/MimeType.h>
|
||||
#include <LibWeb/Streams/ReadableStream.h>
|
||||
#include <LibWeb/WebIDL/Promise.h>
|
||||
@@ -353,7 +352,7 @@ static MultipartParsingErrorOr<MultiPartFormDataHeader> parse_multipart_form_dat
|
||||
header_value = header_value.trim(Infrastructure::HTTP_TAB_OR_SPACE, TrimMode::Right);
|
||||
|
||||
// 3. Set contentType to the isomorphic decoding of header value.
|
||||
header.content_type = Infra::isomorphic_decode(header_value.bytes());
|
||||
header.content_type = TextCodec::isomorphic_decode(header_value);
|
||||
}
|
||||
// -> Otherwise
|
||||
else {
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include <AK/ScopeGuard.h>
|
||||
#include <LibJS/Runtime/Completion.h>
|
||||
#include <LibRequests/RequestTimingInfo.h>
|
||||
#include <LibTextCodec/Encoder.h>
|
||||
#include <LibWeb/Bindings/MainThreadVM.h>
|
||||
#include <LibWeb/Bindings/PrincipalHostDefined.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/BlockingAlgorithms.h>
|
||||
@@ -51,7 +52,6 @@
|
||||
#include <LibWeb/HTML/Window.h>
|
||||
#include <LibWeb/HTML/WorkerGlobalScope.h>
|
||||
#include <LibWeb/HighResolutionTime/TimeOrigin.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/Loader/LoadRequest.h>
|
||||
#include <LibWeb/Loader/ResourceLoader.h>
|
||||
#include <LibWeb/MixedContent/AbstractOperations.h>
|
||||
@@ -1831,7 +1831,7 @@ GC::Ref<PendingResponse> http_network_or_cache_fetch(JS::Realm& realm, Infrastru
|
||||
// 11. If httpRequest’s referrer is a URL, then:
|
||||
if (auto const* referrer_url = http_request->referrer().get_pointer<URL::URL>()) {
|
||||
// 1. Let referrerValue be httpRequest’s referrer, serialized and isomorphic encoded.
|
||||
auto referrer_value = Infra::isomorphic_encode(referrer_url->serialize());
|
||||
auto referrer_value = TextCodec::isomorphic_encode(referrer_url->serialize());
|
||||
|
||||
// 2. Append (`Referer`, referrerValue) to httpRequest’s header list.
|
||||
http_request->header_list()->append({ "Referer"sv, move(referrer_value) });
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
|
||||
#include <LibJS/Runtime/Completion.h>
|
||||
#include <LibJS/Runtime/VM.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/Bindings/HeadersPrototype.h>
|
||||
#include <LibWeb/Bindings/Intrinsics.h>
|
||||
#include <LibWeb/Fetch/Headers.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
|
||||
namespace Web::Fetch {
|
||||
|
||||
@@ -102,7 +102,7 @@ WebIDL::ExceptionOr<Optional<String>> Headers::get(String const& name)
|
||||
|
||||
// 2. Return the result of getting name from this’s header list.
|
||||
auto byte_buffer = m_header_list->get(name);
|
||||
return byte_buffer.has_value() ? Infra::isomorphic_decode(*byte_buffer) : Optional<String> {};
|
||||
return byte_buffer.has_value() ? TextCodec::isomorphic_decode(*byte_buffer) : Optional<String> {};
|
||||
}
|
||||
|
||||
// https://fetch.spec.whatwg.org/#dom-headers-getsetcookie
|
||||
@@ -119,7 +119,7 @@ Vector<String> Headers::get_set_cookie()
|
||||
// `Set-Cookie`, in order.
|
||||
for (auto const& header : *m_header_list) {
|
||||
if (header.name.equals_ignoring_ascii_case("Set-Cookie"sv))
|
||||
values.append(Infra::isomorphic_decode(header.value));
|
||||
values.append(TextCodec::isomorphic_decode(header.value));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
@@ -187,7 +187,7 @@ JS::ThrowCompletionOr<void> Headers::for_each(ForEachCallback callback)
|
||||
auto const& pair = pairs[i];
|
||||
|
||||
// 2. Invoke idlCallback with « pair’s value, pair’s key, idlObject » and with thisArg as the callback this value.
|
||||
TRY(callback(Infra::isomorphic_decode(pair.name), Infra::isomorphic_decode(pair.value)));
|
||||
TRY(callback(TextCodec::isomorphic_decode(pair.name), TextCodec::isomorphic_decode(pair.value)));
|
||||
|
||||
// 3. Set pairs to idlObject’s current list of value pairs to iterate over. (It might have changed.)
|
||||
pairs = value_pairs_to_iterate_over();
|
||||
|
||||
@@ -6,10 +6,10 @@
|
||||
|
||||
#include <LibJS/Runtime/Array.h>
|
||||
#include <LibJS/Runtime/Iterator.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/Bindings/HeadersIteratorPrototype.h>
|
||||
#include <LibWeb/Bindings/Intrinsics.h>
|
||||
#include <LibWeb/Fetch/HeadersIterator.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
|
||||
namespace Web::Bindings {
|
||||
|
||||
@@ -66,8 +66,8 @@ GC::Ref<JS::Object> HeadersIterator::next()
|
||||
return create_iterator_result_object(vm(), JS::js_undefined(), true);
|
||||
|
||||
auto const& pair = pairs[m_index++];
|
||||
auto pair_name = Infra::isomorphic_decode(pair.name);
|
||||
auto pair_value = Infra::isomorphic_decode(pair.value);
|
||||
auto pair_name = TextCodec::isomorphic_decode(pair.name);
|
||||
auto pair_value = TextCodec::isomorphic_decode(pair.value);
|
||||
|
||||
switch (m_iteration_kind) {
|
||||
case JS::Object::PropertyKind::Key:
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
#include <LibJS/Runtime/VM.h>
|
||||
#include <LibRegex/Regex.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibTextCodec/Encoder.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP/Headers.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP/Methods.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/Loader/ResourceLoader.h>
|
||||
#include <LibWeb/MimeSniff/MimeType.h>
|
||||
|
||||
@@ -28,8 +28,8 @@ GC_DEFINE_ALLOCATOR(HeaderList);
|
||||
Header Header::isomorphic_encode(StringView name, StringView value)
|
||||
{
|
||||
return {
|
||||
.name = Infra::isomorphic_encode(name),
|
||||
.value = Infra::isomorphic_encode(value),
|
||||
.name = TextCodec::isomorphic_encode(name),
|
||||
.value = TextCodec::isomorphic_encode(value),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -523,7 +523,7 @@ Vector<String> get_decode_and_split_header_value(StringView value)
|
||||
// To get, decode, and split a header value value, run these steps:
|
||||
|
||||
// 1. Let input be the result of isomorphic decoding value.
|
||||
auto input = Infra::isomorphic_decode(value);
|
||||
auto input = TextCodec::isomorphic_decode(value);
|
||||
|
||||
// 2. Let position be a position variable for input, initially pointing at the start of input.
|
||||
GenericLexer lexer { input };
|
||||
@@ -612,7 +612,7 @@ ByteString build_content_range(u64 range_start, u64 range_end, u64 full_length)
|
||||
Optional<RangeHeaderValue> parse_single_range_header_value(StringView const value, bool const allow_whitespace)
|
||||
{
|
||||
// 1. Let data be the isomorphic decoding of value.
|
||||
auto const data = Infra::isomorphic_decode(value);
|
||||
auto const data = TextCodec::isomorphic_decode(value);
|
||||
|
||||
// 2. If data does not start with "bytes", then return failure.
|
||||
if (!data.starts_with_bytes("bytes"sv))
|
||||
@@ -709,7 +709,7 @@ bool is_cors_safelisted_request_header(Header const& header)
|
||||
return false;
|
||||
|
||||
// 2. Let mimeType be the result of parsing the result of isomorphic decoding value.
|
||||
auto decoded = Infra::isomorphic_decode(value);
|
||||
auto decoded = TextCodec::isomorphic_decode(value);
|
||||
auto mime_type = MimeSniff::MimeType::parse(decoded);
|
||||
|
||||
// 3. If mimeType is failure, then return false.
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <AK/Array.h>
|
||||
#include <LibGC/Heap.h>
|
||||
#include <LibJS/Runtime/Realm.h>
|
||||
#include <LibTextCodec/Encoder.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/Directives/Names.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/PolicyList.h>
|
||||
#include <LibWeb/ContentSecurityPolicy/Violation.h>
|
||||
@@ -14,7 +15,6 @@
|
||||
#include <LibWeb/Fetch/Fetching/PendingResponse.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/HTTP/Requests.h>
|
||||
#include <LibWeb/HTML/TraversableNavigable.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
|
||||
namespace Web::Fetch::Infrastructure {
|
||||
|
||||
@@ -217,7 +217,7 @@ ByteString Request::byte_serialize_origin() const
|
||||
{
|
||||
// Byte-serializing a request origin, given a request request, is to return the result of serializing a request
|
||||
// origin with request, isomorphic encoded.
|
||||
return Infra::isomorphic_encode(serialize_origin());
|
||||
return TextCodec::isomorphic_encode(serialize_origin());
|
||||
}
|
||||
|
||||
// https://fetch.spec.whatwg.org/#concept-request-clone
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
*/
|
||||
|
||||
#include <AK/Base64.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibURL/URL.h>
|
||||
#include <LibWeb/Export.h>
|
||||
#include <LibWeb/Fetch/Infrastructure/URL.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/MimeSniff/MimeType.h>
|
||||
|
||||
namespace Web::Fetch::Infrastructure {
|
||||
@@ -77,7 +77,7 @@ ErrorOr<DataURL> process_data_url(URL::URL const& data_url)
|
||||
trimmed_substring_view = trimmed_substring_view.trim(" "sv, TrimMode::Right);
|
||||
if (trimmed_substring_view.ends_with(';')) {
|
||||
// 1. Let stringBody be the isomorphic decode of body.
|
||||
auto string_body = Infra::isomorphic_decode(body);
|
||||
auto string_body = TextCodec::isomorphic_decode(body);
|
||||
|
||||
// 2. Set body to the forgiving-base64 decode of stringBody.
|
||||
// 3. If body is failure, then return failure.
|
||||
|
||||
@@ -13,13 +13,13 @@
|
||||
#include <AK/Vector.h>
|
||||
#include <LibGC/Function.h>
|
||||
#include <LibJS/Runtime/NativeFunction.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
#include <LibWeb/HTML/PromiseRejectionEvent.h>
|
||||
#include <LibWeb/HTML/Scripting/ExceptionReporter.h>
|
||||
#include <LibWeb/HTML/StructuredSerialize.h>
|
||||
#include <LibWeb/HTML/StructuredSerializeOptions.h>
|
||||
#include <LibWeb/HTML/UniversalGlobalScope.h>
|
||||
#include <LibWeb/HTML/Window.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/WebIDL/AbstractOperations.h>
|
||||
#include <LibWeb/WebIDL/DOMException.h>
|
||||
#include <LibWeb/WebIDL/ExceptionOr.h>
|
||||
@@ -71,7 +71,7 @@ WebIDL::ExceptionOr<String> UniversalGlobalScopeMixin::atob(String const& data)
|
||||
|
||||
// 3. Return decodedData.
|
||||
// decode_base64() returns a byte buffer. LibJS uses UTF-8 for strings. Use isomorphic decoding to convert bytes to UTF-8.
|
||||
return Infra::isomorphic_decode(decoded_data.value());
|
||||
return TextCodec::isomorphic_decode(decoded_data.value());
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/timers-and-user-prompts.html#dom-queuemicrotask
|
||||
|
||||
@@ -156,39 +156,6 @@ ErrorOr<String> convert_to_scalar_value_string(StringView string)
|
||||
return scalar_value_builder.to_string();
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#isomorphic-encode
|
||||
ByteString isomorphic_encode(StringView input)
|
||||
{
|
||||
// To isomorphic encode an isomorphic string input: return a byte sequence whose length is equal to input’s code
|
||||
// point length and whose bytes have the same values as the values of input’s code points, in the same order.
|
||||
// NB: This is essentially spec-speak for "Encode as ISO-8859-1 / Latin-1".
|
||||
StringBuilder builder(input.length());
|
||||
|
||||
for (auto code_point : Utf8View { input }) {
|
||||
// VERIFY(code_point <= 0xFF);
|
||||
if (code_point > 0xFF)
|
||||
dbgln("FIXME: Trying to isomorphic encode a string with code points > U+00FF.");
|
||||
|
||||
builder.append(static_cast<u8>(code_point));
|
||||
}
|
||||
|
||||
return builder.to_byte_string();
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#isomorphic-decode
|
||||
String isomorphic_decode(StringView input)
|
||||
{
|
||||
// To isomorphic decode a byte sequence input, return a string whose code point length is equal to input’s length
|
||||
// and whose code points have the same values as the values of input’s bytes, in the same order.
|
||||
// NB: This is essentially spec-speak for "Decode as ISO-8859-1 / Latin-1".
|
||||
StringBuilder builder(input.length());
|
||||
|
||||
for (auto byte : input.bytes())
|
||||
builder.append_code_point(byte);
|
||||
|
||||
return builder.to_string_without_validation();
|
||||
}
|
||||
|
||||
// https://infra.spec.whatwg.org/#code-unit-less-than
|
||||
bool code_unit_less_than(StringView a, StringView b)
|
||||
{
|
||||
|
||||
@@ -20,8 +20,6 @@ WEB_API ErrorOr<String> strip_and_collapse_whitespace(StringView string);
|
||||
Utf16String strip_and_collapse_whitespace(Utf16String const& string);
|
||||
WEB_API bool is_code_unit_prefix(StringView potential_prefix, StringView input);
|
||||
WEB_API ErrorOr<String> convert_to_scalar_value_string(StringView string);
|
||||
ByteString isomorphic_encode(StringView input);
|
||||
WEB_API String isomorphic_decode(StringView input);
|
||||
bool code_unit_less_than(StringView a, StringView b);
|
||||
|
||||
}
|
||||
|
||||
@@ -43,7 +43,6 @@
|
||||
#include <LibWeb/HTML/Window.h>
|
||||
#include <LibWeb/Infra/ByteSequences.h>
|
||||
#include <LibWeb/Infra/JSON.h>
|
||||
#include <LibWeb/Infra/Strings.h>
|
||||
#include <LibWeb/Loader/ResourceLoader.h>
|
||||
#include <LibWeb/Page/Page.h>
|
||||
#include <LibWeb/Platform/EventLoopPlugin.h>
|
||||
@@ -968,7 +967,7 @@ Optional<String> XMLHttpRequest::get_response_header(String const& name) const
|
||||
return {};
|
||||
|
||||
// FIXME: The spec doesn't mention isomorphic decode. Spec bug?
|
||||
return Infra::isomorphic_decode(header_bytes->bytes());
|
||||
return TextCodec::isomorphic_decode(*header_bytes);
|
||||
}
|
||||
|
||||
// https://xhr.spec.whatwg.org/#legacy-uppercased-byte-less-than
|
||||
@@ -1002,7 +1001,7 @@ String XMLHttpRequest::get_all_response_headers() const
|
||||
// 4. For each header in headers, append header’s name, followed by a 0x3A 0x20 byte pair, followed by header’s value, followed by a 0x0D 0x0A byte pair, to output.
|
||||
for (auto const& header : initial_headers) {
|
||||
// FIXME: The spec does not mention isomorphic decode. Spec bug?
|
||||
output.appendff("{}: {}\r\n", header.name, Infra::isomorphic_decode(header.value));
|
||||
output.appendff("{}: {}\r\n", header.name, TextCodec::isomorphic_decode(header.value));
|
||||
}
|
||||
|
||||
// 5. Return output.
|
||||
|
||||
Reference in New Issue
Block a user