From ccc5be08d60b6549e977e8eb48921414be4cc121 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 09:41:01 -0700 Subject: [PATCH 01/12] start --- src/passes/GlobalStructInference.cpp | 55 ++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index e2b728fc10b..14162d9bc90 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -79,6 +79,8 @@ struct GlobalStructInference : public Pass { // optimizable it will have an entry here, and not if not. std::unordered_map> typeGlobals; + std::unique_ptr subTypes; + void run(Module* module) override { if (!module->features.hasGC()) { return; @@ -208,6 +210,12 @@ struct GlobalStructInference : public Pass { return; } + // When CD is enabled, we can optimize to ref.get_desc, depending on the + // presence of subtypes. + if (module->features.hasCustomDescriptors()) { + subTypes = std::make_unique(*module); + } + // The above loop on typeGlobalsCopy is on an unsorted data structure, and // that can lead to nondeterminism in typeGlobals. Sort the vectors there to // ensure determinism. @@ -528,6 +536,53 @@ struct GlobalStructInference : public Pass { right)); } + void visitRefCast(RefCast* curr) { + // When we see (ref.cast $T), and the type has a descriptor, and that + // desceriptor only has a single global, then we can do (ref.cast_desc) + // using the descriptor. Descriptor XXX + // casts are usually more efficient than normal ones (and even more so + // if we get lucky and are in a loop, where the global.get of the + // descriptor can be hoisted). + + // Check if we have a descriptor. + auto type = curr->type; + if (type == Type::unreachable) { + return; + } + auto heapType = type.getHeapType(); + auto desc = heapType.getDescriptorType(); + if (!desc) { + return; + } + + // Check if the type has no subtypes, as a ref.cast_desc will find + // precisely that type and nothing else. + if (!parent.subTypes->getStrictSubTypes(heapType).empty()) { + return; + } + + // Check if we have a single global for the descriptor. + auto iter = parent.typeGlobals.find(*desc); + if (iter == parent.typeGlobals.end()) { + return; + } + const auto& globals = iter->second; + if (globals.size() != 1) { + return; + } + + // We can optimize! + auto global = globals[0]; + auto& wasm = *getModule(); + Builder builder(wasm); + auto* getGlobal = + builder.makeGlobalGet(global, wasm.getGlobal(global)->type); + auto* castDesc = builder.makeRefCast(curr->ref, getGlobal, curr->type); + replaceCurrent(castDesc); + + // TODO nullable cast? + } + void visitFunction(Function* func) { if (refinalize) { ReFinalize().walkFunctionInModule(func, getModule()); From f4b9e45db3dc91971c25d2f99df335d120fe42c2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:33:43 -0700 Subject: [PATCH 02/12] test --- src/passes/GlobalStructInference.cpp | 1 + test/lit/passes/gsi-desc.wast | 52 ++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 14162d9bc90..4183405fac4 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -543,6 +543,7 @@ struct GlobalStructInference : public Pass { // casts are usually more efficient than normal ones (and even more so // if we get lucky and are in a loop, where the global.get of the // descriptor can be hoisted). + // TODO: only do this when shrinkLevel == 0? // Check if we have a descriptor. auto type = curr->type; diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 0ca1806b568..5ffb4902434 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -186,3 +186,55 @@ ) ) +;; Two types with descriptors and subtyping between them. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $super (sub (descriptor $super.desc (struct)))) + (type $super (sub (descriptor $super.desc (struct)))) + ;; CHECK: (type $super.desc (sub (describes $super (struct)))) + (type $super.desc (sub (describes $super (struct)))) + + ;; CHECK: (type $sub (sub $super (descriptor $sub.desc (struct)))) + (type $sub (sub $super (descriptor $sub.desc (struct)))) + ;; CHECK: (type $sub.desc (sub $super.desc (describes $sub (struct)))) + (type $sub.desc (sub $super.desc (describes $sub (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $sub.desc (ref $sub.desc) (struct.new_default $sub.desc)) + (global $sub.desc (ref $sub.desc) (struct.new $sub.desc)) + + ;; CHECK: (global $super.desc (ref $super.desc) (struct.new_default $super.desc)) + (global $super.desc (ref $super.desc) (struct.new $super.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $super) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $sub) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $sub.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The second cast here is optimizable: it can only be to a single type with + ;; no subtypes, so we can use ref.cast_desc. + (drop + (ref.cast (ref $super) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $sub) + (local.get $any) + ) + ) + ) +) + From 76916738151c604ac173b519efd4c4403f8df0cf Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:34:20 -0700 Subject: [PATCH 03/12] test --- test/lit/passes/gsi-desc.wast | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 5ffb4902434..38a425b8484 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -190,35 +190,35 @@ (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $super (sub (descriptor $super.desc (struct)))) - (type $super (sub (descriptor $super.desc (struct)))) - ;; CHECK: (type $super.desc (sub (describes $super (struct)))) - (type $super.desc (sub (describes $super (struct)))) - - ;; CHECK: (type $sub (sub $super (descriptor $sub.desc (struct)))) - (type $sub (sub $super (descriptor $sub.desc (struct)))) - ;; CHECK: (type $sub.desc (sub $super.desc (describes $sub (struct)))) - (type $sub.desc (sub $super.desc (describes $sub (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub $A (descriptor $B.desc (struct)))) + (type $B (sub $A (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) ) ;; CHECK: (type $4 (func (param anyref))) - ;; CHECK: (global $sub.desc (ref $sub.desc) (struct.new_default $sub.desc)) - (global $sub.desc (ref $sub.desc) (struct.new $sub.desc)) + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) - ;; CHECK: (global $super.desc (ref $super.desc) (struct.new_default $super.desc)) - (global $super.desc (ref $super.desc) (struct.new $super.desc)) + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (func $test (type $4) (param $any anyref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $super) + ;; CHECK-NEXT: (ref.cast (ref $A) ;; CHECK-NEXT: (local.get $any) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $sub) + ;; CHECK-NEXT: (ref.cast_desc (ref $B) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $sub.desc) + ;; CHECK-NEXT: (global.get $B.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) @@ -226,12 +226,12 @@ ;; The second cast here is optimizable: it can only be to a single type with ;; no subtypes, so we can use ref.cast_desc. (drop - (ref.cast (ref $super) + (ref.cast (ref $A) (local.get $any) ) ) (drop - (ref.cast (ref $sub) + (ref.cast (ref $B) (local.get $any) ) ) From 10d80a7e0316abc99ec2ab681e11beee1dd1db4d Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:45:02 -0700 Subject: [PATCH 04/12] test --- test/lit/passes/gsi-desc.wast | 108 +++++++++++++++++++++++++++++++++- 1 file changed, 106 insertions(+), 2 deletions(-) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 38a425b8484..482f495e1a0 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -203,12 +203,64 @@ ;; CHECK: (type $4 (func (param anyref))) + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) (global $B.desc (ref $B.desc) (struct.new $B.desc)) + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The second cast here is optimizable: it can only be to a single type with + ;; no subtypes, so we can use ref.cast_desc. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A and $B. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc (ref $A.desc) (struct.new $A.desc)) + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + ;; CHECK: (func $test (type $4) (param $any anyref) ;; CHECK-NEXT: (drop ;; CHECK-NEXT: (ref.cast (ref $A) @@ -223,8 +275,60 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test (param $any anyref) - ;; The second cast here is optimizable: it can only be to a single type with - ;; no subtypes, so we can use ref.cast_desc. + ;; We still cannot optimize $A: while $A has no subtypes, the descriptor + ;; $A.desc has a subtype. We could optimize this TODO + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A.desc and $B.desc. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub (describes $B (struct)))) + (type $B.desc (sub (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We can fully optimize these two independent cases. (drop (ref.cast (ref $A) (local.get $any) From 8bebd6ba3f4adb0316a1f68168707b9a75188894 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:46:03 -0700 Subject: [PATCH 05/12] test --- test/lit/passes/gsi-desc.wast | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 482f495e1a0..325592b1f48 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -342,3 +342,38 @@ ) ) +;; Two descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc2 (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could with a select + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + From 4475e505f1169d7cea0674be47b244dd16ccc1d2 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:46:42 -0700 Subject: [PATCH 06/12] test --- test/lit/passes/gsi-desc.wast | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 325592b1f48..1cfe3916b36 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -366,6 +366,14 @@ ;; CHECK-NEXT: (local.get $any) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block ;; (replaces unreachable RefCast we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) (func $test (param $any anyref) ;; We do not optimize here. TODO: we could with a select @@ -374,6 +382,12 @@ (local.get $any) ) ) + ;; We do not error on unreachable casts. + (drop + (ref.cast (ref $A) + (unreachable) + ) + ) ) ) From 891f616cd06ccdd631c11755e799ef60264e9f07 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:47:59 -0700 Subject: [PATCH 07/12] test --- test/lit/passes/gsi-desc.wast | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 1cfe3916b36..33c16f48c79 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -391,3 +391,26 @@ ) ) +(module + ;; CHECK: (type $0 (func (param anyref))) + + ;; CHECK: (type $A (sub (struct))) + (type $A (sub (struct))) + + ;; CHECK: (func $test (type $0) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not handle casts to types without descriptors. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + From c3d3c5aa7a6ffe2bc0dfca3c4f39f91f41c48d39 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 10:51:33 -0700 Subject: [PATCH 08/12] test --- test/lit/passes/gsi-desc.wast | 63 +++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 33c16f48c79..dfbc520962c 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -342,6 +342,35 @@ ) ) +;; Zero descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could make this trap + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + ;; Two descriptor instances in globals. (module (rec @@ -414,3 +443,37 @@ ) ) +;; Nullable cast. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref null $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The cast is nullable, which we can still optimize: null will succeed as + ;; expected. + (drop + (ref.cast (ref null $A) + (local.get $any) + ) + ) + ) +) + From 46b37d348c4025c7e618dd4507d96b569df5f5ea Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Thu, 18 Sep 2025 11:01:30 -0700 Subject: [PATCH 09/12] test --- src/passes/GlobalStructInference.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 4183405fac4..879c9e05dc4 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -557,7 +557,7 @@ struct GlobalStructInference : public Pass { } // Check if the type has no subtypes, as a ref.cast_desc will find - // precisely that type and nothing else. + // precisely that type and nothing else. TODO: exact types too if (!parent.subTypes->getStrictSubTypes(heapType).empty()) { return; } @@ -580,8 +580,6 @@ struct GlobalStructInference : public Pass { builder.makeGlobalGet(global, wasm.getGlobal(global)->type); auto* castDesc = builder.makeRefCast(curr->ref, getGlobal, curr->type); replaceCurrent(castDesc); - - // TODO nullable cast? } void visitFunction(Function* func) { From 9a9c1221e9372ffd94f0839d328a795d85652a9a Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Fri, 19 Sep 2025 09:44:49 -0700 Subject: [PATCH 10/12] TODO for exact casts --- src/passes/GlobalStructInference.cpp | 16 ++++++++-------- test/lit/passes/gsi-desc.wast | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 8 deletions(-) diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 879c9e05dc4..6f3514275b4 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -538,11 +538,10 @@ struct GlobalStructInference : public Pass { void visitRefCast(RefCast* curr) { // When we see (ref.cast $T), and the type has a descriptor, and that - // desceriptor only has a single global, then we can do (ref.cast_desc) - // using the descriptor. Descriptor XXX - // casts are usually more efficient than normal ones (and even more so - // if we get lucky and are in a loop, where the global.get of the - // descriptor can be hoisted). + // descriptor only has a single global, then we can do (ref.cast_desc) + // using the descriptor. Descriptor casts are usually more efficient + // than normal ones (and even more so if we get lucky and are in a loop, + // where the global.get of the descriptor can be hoisted). // TODO: only do this when shrinkLevel == 0? // Check if we have a descriptor. @@ -556,9 +555,10 @@ struct GlobalStructInference : public Pass { return; } - // Check if the type has no subtypes, as a ref.cast_desc will find - // precisely that type and nothing else. TODO: exact types too - if (!parent.subTypes->getStrictSubTypes(heapType).empty()) { + // Check if the type has no (relevant) subtypes, as a ref.cast_desc will + // find precisely that type and nothing else. + if (!type.isExact() && + !parent.subTypes->getStrictSubTypes(heapType).empty()) { return; } diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index dfbc520962c..c109bd5ca5e 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -236,6 +236,34 @@ ) ) ) + + ;; CHECK: (func $test-exact (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref (exact $A)) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-exact (param $any anyref) + ;; When using exact casts, we can optimize both. TODO: atm we do not + ;; optimize $A, as we propagate on |typeGlobals|. + (drop + (ref.cast (ref (exact $A)) + (local.get $any) + ) + ) + (drop + (ref.cast (ref (exact $B)) + (local.get $any) + ) + ) + ) ) ;; As above, but without subtyping between $A and $B. From 04f76fd5893f1a82e421327c3af0b3cc284dee0c Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 13 Jan 2026 13:32:37 -0800 Subject: [PATCH 11/12] test --- src/passes/GlobalStructInference.cpp | 18 +- test/lit/passes/gsi-desc.wast | 319 ------------------------- test/lit/passes/gsi-to-desc-cast.wast | 326 ++++++++++++++++++++++++++ 3 files changed, 341 insertions(+), 322 deletions(-) create mode 100644 test/lit/passes/gsi-to-desc-cast.wast diff --git a/src/passes/GlobalStructInference.cpp b/src/passes/GlobalStructInference.cpp index 9a7f5ee0fa8..ae2d0cb0557 100644 --- a/src/passes/GlobalStructInference.cpp +++ b/src/passes/GlobalStructInference.cpp @@ -54,6 +54,13 @@ // // TODO: Only do the case with a select when shrinkLevel == 0? // +// --pass-arg=gsi-desc-casts +// +// Optimize casts to descriptor casts when possible. If a cast has no +// relevant subtypes, and it has a known descriptor, then we can do a +// ref.cast_desc instead, which can be faster (but is larger, so this is +// not on by default yet). +// #include @@ -89,6 +96,8 @@ struct GlobalStructInference : public Pass { // type-based inference, and this remains empty. std::unordered_map> typeGlobals; + bool optimizeToDescCasts; + std::unique_ptr subTypes; void run(Module* module) override { @@ -96,9 +105,9 @@ struct GlobalStructInference : public Pass { return; } - // When CD is enabled, we can optimize to ref.get_desc, depending on the - // presence of subtypes. - if (module->features.hasCustomDescriptors()) { + optimizeToDescCasts = hasArgument("gsi-desc-casts"); + if (optimizeToDescCasts) { + // We need subtypes to know when to optimize to a desc cast. subTypes = std::make_unique(*module); } @@ -513,6 +522,9 @@ struct GlobalStructInference : public Pass { // than normal ones (and even more so if we get lucky and are in a loop, // where the global.get of the descriptor can be hoisted). // TODO: only do this when shrinkLevel == 0? + if (!parent.optimizeToDescCasts) { + return; + } // Check if we have a descriptor. auto type = curr->type; diff --git a/test/lit/passes/gsi-desc.wast b/test/lit/passes/gsi-desc.wast index 414f84e8fc1..32b42d999a5 100644 --- a/test/lit/passes/gsi-desc.wast +++ b/test/lit/passes/gsi-desc.wast @@ -186,322 +186,3 @@ ) ) -;; Two types with descriptors and subtyping between them. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub $A (descriptor $B.desc (struct)))) - (type $B (sub $A (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) - (type $B.desc (sub $A.desc (describes $B (struct)))) - ) - - ;; CHECK: (type $4 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) - (global $B.desc (ref $B.desc) (struct.new $B.desc)) - - ;; CHECK: (func $test (type $4) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; The second cast here is optimizable: it can only be to a single type with - ;; no subtypes, so we can use ref.cast_desc. - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - (drop - (ref.cast (ref $B) - (local.get $any) - ) - ) - ) - - ;; CHECK: (func $test-exact (type $4) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref (exact $A)) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test-exact (param $any anyref) - ;; When using exact casts, we can optimize both. TODO: atm we do not - ;; optimize $A, as we propagate on |typeGlobals|. - (drop - (ref.cast (ref (exact $A)) - (local.get $any) - ) - ) - (drop - (ref.cast (ref (exact $B)) - (local.get $any) - ) - ) - ) -) - -;; As above, but without subtyping between $A and $B. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) - (type $B (sub (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) - (type $B.desc (sub $A.desc (describes $B (struct)))) - ) - - ;; CHECK: (type $4 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) - (global $B.desc (ref $B.desc) (struct.new $B.desc)) - - ;; CHECK: (func $test (type $4) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We still cannot optimize $A: while $A has no subtypes, the descriptor - ;; $A.desc has a subtype. We could optimize this TODO - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - (drop - (ref.cast (ref $B) - (local.get $any) - ) - ) - ) -) - -;; As above, but without subtyping between $A.desc and $B.desc. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) - (type $B (sub (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub (describes $B (struct)))) - (type $B.desc (sub (describes $B (struct)))) - ) - - ;; CHECK: (type $4 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) - (global $B.desc (ref $B.desc) (struct.new $B.desc)) - - ;; CHECK: (func $test (type $4) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $A.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We can fully optimize these two independent cases. - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - (drop - (ref.cast (ref $B) - (local.get $any) - ) - ) - ) -) - -;; Zero descriptor instances in globals. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - ) - - ;; CHECK: (type $2 (func (param anyref))) - - ;; CHECK: (func $test (type $2) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We do not optimize here. TODO: we could make this trap - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - ) -) - -;; Two descriptor instances in globals. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - ) - - ;; CHECK: (type $2 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc2 (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (func $test (type $2) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (block ;; (replaces unreachable RefCast we can't emit) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (unreachable) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We do not optimize here. TODO: we could with a select - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - ;; We do not error on unreachable casts. - (drop - (ref.cast (ref $A) - (unreachable) - ) - ) - ) -) - -(module - ;; CHECK: (type $0 (func (param anyref))) - - ;; CHECK: (type $A (sub (struct))) - (type $A (sub (struct))) - - ;; CHECK: (func $test (type $0) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We do not handle casts to types without descriptors. - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - ) -) - -;; Nullable cast. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - ) - - ;; CHECK: (type $2 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (func $test (type $2) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref null $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $A.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; The cast is nullable, which we can still optimize: null will succeed as - ;; expected. - (drop - (ref.cast (ref null $A) - (local.get $any) - ) - ) - ) -) - diff --git a/test/lit/passes/gsi-to-desc-cast.wast b/test/lit/passes/gsi-to-desc-cast.wast new file mode 100644 index 00000000000..c99de511f60 --- /dev/null +++ b/test/lit/passes/gsi-to-desc-cast.wast @@ -0,0 +1,326 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; We should optimize casts to desc casts only when the flag is set. + +;; RUN: foreach %s %t wasm-opt --gsi -all --closed-world -S -o - | filecheck %s +;; RUN: foreach %s %t wasm-opt --gsi --pass-arg=gsi-desc-casts -all --closed-world -S -o - | filecheck %s --check-prefix=DESCC + +;; Two types with descriptors and subtyping between them. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub $A (descriptor $B.desc (struct)))) + (type $B (sub $A (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The second cast here is optimizable: it can only be to a single type with + ;; no subtypes, so we can use ref.cast_desc. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) + + ;; CHECK: (func $test-exact (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref (exact $A)) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test-exact (param $any anyref) + ;; When using exact casts, we can optimize both. TODO: atm we do not + ;; optimize $A, as we propagate on |typeGlobals|. + (drop + (ref.cast (ref (exact $A)) + (local.get $any) + ) + ) + (drop + (ref.cast (ref (exact $B)) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A and $B. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) + (type $B.desc (sub $A.desc (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We still cannot optimize $A: while $A has no subtypes, the descriptor + ;; $A.desc has a subtype. We could optimize this TODO + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; As above, but without subtyping between $A.desc and $B.desc. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + + ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) + (type $B (sub (descriptor $B.desc (struct)))) + ;; CHECK: (type $B.desc (sub (describes $B (struct)))) + (type $B.desc (sub (describes $B (struct)))) + ) + + ;; CHECK: (type $4 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + (global $B.desc (ref $B.desc) (struct.new $B.desc)) + + ;; CHECK: (func $test (type $4) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $B.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We can fully optimize these two independent cases. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + (drop + (ref.cast (ref $B) + (local.get $any) + ) + ) + ) +) + +;; Zero descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could make this trap + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + +;; Two descriptor instances in globals. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc2 (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (block ;; (replaces unreachable RefCast we can't emit) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (unreachable) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not optimize here. TODO: we could with a select + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ;; We do not error on unreachable casts. + (drop + (ref.cast (ref $A) + (unreachable) + ) + ) + ) +) + +(module + ;; CHECK: (type $0 (func (param anyref))) + + ;; CHECK: (type $A (sub (struct))) + (type $A (sub (struct))) + + ;; CHECK: (func $test (type $0) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast (ref $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; We do not handle casts to types without descriptors. + (drop + (ref.cast (ref $A) + (local.get $any) + ) + ) + ) +) + +;; Nullable cast. +(module + (rec + ;; CHECK: (rec + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) + (type $A (sub (descriptor $A.desc (struct)))) + ;; CHECK: (type $A.desc (sub (describes $A (struct)))) + (type $A.desc (sub (describes $A (struct)))) + ) + + ;; CHECK: (type $2 (func (param anyref))) + + ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + (global $A.desc (ref $A.desc) (struct.new $A.desc)) + + ;; CHECK: (func $test (type $2) (param $any anyref) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (ref.cast_desc (ref null $A) + ;; CHECK-NEXT: (local.get $any) + ;; CHECK-NEXT: (global.get $A.desc) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $test (param $any anyref) + ;; The cast is nullable, which we can still optimize: null will succeed as + ;; expected. + (drop + (ref.cast (ref null $A) + (local.get $any) + ) + ) + ) +) + From 34b1ddfc8c6fb1ba441e237c31b0d3fba08041dc Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Tue, 13 Jan 2026 13:36:13 -0800 Subject: [PATCH 12/12] work --- test/lit/passes/gsi-to-desc-cast.wast | 243 ++++++++++++++++---------- 1 file changed, 151 insertions(+), 92 deletions(-) diff --git a/test/lit/passes/gsi-to-desc-cast.wast b/test/lit/passes/gsi-to-desc-cast.wast index c99de511f60..73467efa2e1 100644 --- a/test/lit/passes/gsi-to-desc-cast.wast +++ b/test/lit/passes/gsi-to-desc-cast.wast @@ -9,23 +9,32 @@ (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub $A (descriptor $B.desc (struct)))) - (type $B (sub $A (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) - (type $B.desc (sub $A.desc (describes $B (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + ;; DESCC: (rec + ;; DESCC-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + (type $A (sub (descriptor $A.desc) (struct))) + ;; CHECK: (type $A.desc (sub (describes $A) (struct))) + ;; DESCC: (type $A.desc (sub (describes $A) (struct))) + (type $A.desc (sub (describes $A) (struct))) + + ;; CHECK: (type $B (sub $A (descriptor $B.desc) (struct))) + ;; DESCC: (type $B (sub $A (descriptor $B.desc) (struct))) + (type $B (sub $A (descriptor $B.desc) (struct))) + ;; CHECK: (type $B.desc (sub $A.desc (describes $B) (struct))) + ;; DESCC: (type $B.desc (sub $A.desc (describes $B) (struct))) + (type $B.desc (sub $A.desc (describes $B) (struct))) ) ;; CHECK: (type $4 (func (param anyref))) ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + ;; DESCC: (type $4 (func (param anyref))) + + ;; DESCC: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + ;; DESCC: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) (global $B.desc (ref $B.desc) (struct.new $B.desc)) ;; CHECK: (func $test (type $4) (param $any anyref) @@ -35,12 +44,24 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (ref.cast (ref $B) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test (type $4) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast (ref $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast_desc (ref $B) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: (global.get $B.desc) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; The second cast here is optimizable: it can only be to a single type with ;; no subtypes, so we can use ref.cast_desc. @@ -63,12 +84,24 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (ref.cast (ref (exact $B)) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test-exact (type $4) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast (ref (exact $A)) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast_desc (ref $B) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: (global.get $B.desc) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test-exact (param $any anyref) ;; When using exact casts, we can optimize both. TODO: atm we do not ;; optimize $A, as we propagate on |typeGlobals|. @@ -85,95 +118,64 @@ ) ) -;; As above, but without subtyping between $A and $B. -(module - (rec - ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) - (type $B (sub (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub $A.desc (describes $B (struct)))) - (type $B.desc (sub $A.desc (describes $B (struct)))) - ) - - ;; CHECK: (type $4 (func (param anyref))) - - ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) - (global $A.desc (ref $A.desc) (struct.new $A.desc)) - - ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) - (global $B.desc (ref $B.desc) (struct.new $B.desc)) - - ;; CHECK: (func $test (type $4) (param $any anyref) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast (ref $A) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) - ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - ;; CHECK-NEXT: ) - (func $test (param $any anyref) - ;; We still cannot optimize $A: while $A has no subtypes, the descriptor - ;; $A.desc has a subtype. We could optimize this TODO - (drop - (ref.cast (ref $A) - (local.get $any) - ) - ) - (drop - (ref.cast (ref $B) - (local.get $any) - ) - ) - ) -) - ;; As above, but without subtyping between $A.desc and $B.desc. (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) - - ;; CHECK: (type $B (sub (descriptor $B.desc (struct)))) - (type $B (sub (descriptor $B.desc (struct)))) - ;; CHECK: (type $B.desc (sub (describes $B (struct)))) - (type $B.desc (sub (describes $B (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + ;; DESCC: (rec + ;; DESCC-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + (type $A (sub (descriptor $A.desc) (struct))) + ;; CHECK: (type $A.desc (sub (describes $A) (struct))) + ;; DESCC: (type $A.desc (sub (describes $A) (struct))) + (type $A.desc (sub (describes $A) (struct))) + + ;; CHECK: (type $B (sub (descriptor $B.desc) (struct))) + ;; DESCC: (type $B (sub (descriptor $B.desc) (struct))) + (type $B (sub (descriptor $B.desc) (struct))) + ;; CHECK: (type $B.desc (sub (describes $B) (struct))) + ;; DESCC: (type $B.desc (sub (describes $B) (struct))) + (type $B.desc (sub (describes $B) (struct))) ) ;; CHECK: (type $4 (func (param anyref))) ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + ;; DESCC: (type $4 (func (param anyref))) + + ;; DESCC: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) + ;; DESCC: (global $B.desc (ref $B.desc) (struct.new_default $B.desc)) (global $B.desc (ref $B.desc) (struct.new $B.desc)) ;; CHECK: (func $test (type $4) (param $any anyref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $A) + ;; CHECK-NEXT: (ref.cast (ref $A) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $A.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref $B) + ;; CHECK-NEXT: (ref.cast (ref $B) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $B.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test (type $4) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast_desc (ref $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: (global.get $A.desc) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast_desc (ref $B) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: (global.get $B.desc) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; We can fully optimize these two independent cases. (drop @@ -193,10 +195,13 @@ (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + ;; DESCC: (rec + ;; DESCC-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + (type $A (sub (descriptor $A.desc) (struct))) + ;; CHECK: (type $A.desc (sub (describes $A) (struct))) + ;; DESCC: (type $A.desc (sub (describes $A) (struct))) + (type $A.desc (sub (describes $A) (struct))) ) ;; CHECK: (type $2 (func (param anyref))) @@ -208,6 +213,15 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (type $2 (func (param anyref))) + + ;; DESCC: (func $test (type $2) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast (ref $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; We do not optimize here. TODO: we could make this trap (drop @@ -222,18 +236,25 @@ (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + ;; DESCC: (rec + ;; DESCC-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + (type $A (sub (descriptor $A.desc) (struct))) + ;; CHECK: (type $A.desc (sub (describes $A) (struct))) + ;; DESCC: (type $A.desc (sub (describes $A) (struct))) + (type $A.desc (sub (describes $A) (struct))) ) ;; CHECK: (type $2 (func (param anyref))) ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + ;; DESCC: (type $2 (func (param anyref))) + + ;; DESCC: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) + ;; DESCC: (global $A.desc2 (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc2 (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (func $test (type $2) (param $any anyref) @@ -251,6 +272,21 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test (type $2) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast (ref $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (block ;; (replaces unreachable RefCast we can't emit) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (unreachable) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: (unreachable) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; We do not optimize here. TODO: we could with a select (drop @@ -271,6 +307,9 @@ ;; CHECK: (type $0 (func (param anyref))) ;; CHECK: (type $A (sub (struct))) + ;; DESCC: (type $0 (func (param anyref))) + + ;; DESCC: (type $A (sub (struct))) (type $A (sub (struct))) ;; CHECK: (func $test (type $0) (param $any anyref) @@ -280,6 +319,13 @@ ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test (type $0) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast (ref $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; We do not handle casts to types without descriptors. (drop @@ -294,25 +340,38 @@ (module (rec ;; CHECK: (rec - ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc (struct)))) - (type $A (sub (descriptor $A.desc (struct)))) - ;; CHECK: (type $A.desc (sub (describes $A (struct)))) - (type $A.desc (sub (describes $A (struct)))) + ;; CHECK-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + ;; DESCC: (rec + ;; DESCC-NEXT: (type $A (sub (descriptor $A.desc) (struct))) + (type $A (sub (descriptor $A.desc) (struct))) + ;; CHECK: (type $A.desc (sub (describes $A) (struct))) + ;; DESCC: (type $A.desc (sub (describes $A) (struct))) + (type $A.desc (sub (describes $A) (struct))) ) ;; CHECK: (type $2 (func (param anyref))) ;; CHECK: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) + ;; DESCC: (type $2 (func (param anyref))) + + ;; DESCC: (global $A.desc (ref $A.desc) (struct.new_default $A.desc)) (global $A.desc (ref $A.desc) (struct.new $A.desc)) ;; CHECK: (func $test (type $2) (param $any anyref) ;; CHECK-NEXT: (drop - ;; CHECK-NEXT: (ref.cast_desc (ref null $A) + ;; CHECK-NEXT: (ref.cast (ref null $A) ;; CHECK-NEXT: (local.get $any) - ;; CHECK-NEXT: (global.get $A.desc) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) ;; CHECK-NEXT: ) + ;; DESCC: (func $test (type $2) (param $any anyref) + ;; DESCC-NEXT: (drop + ;; DESCC-NEXT: (ref.cast_desc (ref null $A) + ;; DESCC-NEXT: (local.get $any) + ;; DESCC-NEXT: (global.get $A.desc) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) + ;; DESCC-NEXT: ) (func $test (param $any anyref) ;; The cast is nullable, which we can still optimize: null will succeed as ;; expected.