I want to collect all types that appear in a Program.
So, after parsing some source code (only the prelude in the example below), I get a Program
object from the compiler
Initialising the compiler
require "compiler/crystal/**"
src = Crystal::Compiler::Source.new("", "")
compiler = Crystal::Compiler.new
compiler.no_codegen = true
I have a class that traverses the types.
class TypeCollector
getter types = Set(Crystal::Type).new
def collect(type : Crystal::GenericType)
@types << type
type.generic_types.values.reject(&.unbound?).each do |bound_instance_type|
collect bound_instance_type.as Crystal::GenericInstanceType
end
type.types?.try &.values.each do |sub_t|
collect sub_t
end
end
def collect(type : Crystal::NamedType | Crystal::GenericInstanceType)
@types << type
type.types?.try &.values.each do |sub_t|
collect sub_t
end
end
end
program = compiler.compile(src, "").program
tc = TypeCollector.new
tc.collect(program)
I assumed this should in theory aggregate all types into one big set. While it does so for most types, there is an issue with generic modules.
To check if I did really get all the types, I decided to check if parents of each of the type in a set are themselves in this set. Turns out it’s not the case, so I aggregated all “uncollected” types into another set.
all_types = tc.types
uncollected = Set(Crystal::Type).new
all_types.each do |type|
type_parents = type.parents
type_parents.try &.each do |parent|
unless all_types.includes? parent
uncollected << parent
end
end
end
uncollected.each do |uncollected_type|
type_args = uncollected_type.type_vars.as(Hash).map do |label, var|
{label, var.type}.as Tuple(String, Crystal::Type)
end
uncollected_type.to_s.rjust(35) + " -- " + type_args.to_s
end
The output surprised me a little:
Output
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Comparable(Pointer(T)) -- [{"T", Pointer(T)}]
Comparable(Pointer(Pointer(UInt8))) -- [{"T", Pointer(Pointer(UInt8))}]
Indexable(T) -- [{"T", T}]
Indexable(T) -- [{"T", T}]
Indexable(T | U) -- [{"T", (T | U)}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterable(Tuple(K, V)) -- [{"T", Tuple(K, V)}]
Enumerable(Tuple(K, V)) -- [{"T", Tuple(K, V)}]
Iterator(Tuple(K, V)) -- [{"T", Tuple(K, V)}]
Iterator(K) -- [{"T", K}]
Iterator(V) -- [{"T", V}]
Iterable(B) -- [{"T", B}]
Enumerable(B) -- [{"T", B}]
Iterable(Int) -- [{"T", Int+}]
Enumerable(Int) -- [{"T", Int+}]
Iterable(Float) -- [{"T", Float+}]
Enumerable(Float) -- [{"T", Float+}]
Iterator(B) -- [{"T", B}]
Iterator(E) -- [{"T", E}]
Iterator(B) -- [{"T", B}]
Enumerable(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(U) -- [{"T", U}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(U) -- [{"T", U}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(Tuple(T, Int32)) -- [{"T", Tuple(T, Int32)}]
Iterator(Tuple(T, O)) -- [{"T", Tuple(T, O)}]
Iterator(Tuple(T1, T2)) -- [{"T", Tuple(T1, T2)}]
Iterator(Tuple(U, Array(T))) -- [{"T", Tuple(U, Array(T))}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Iterator(Array(T)) -- [{"T", Array(T)}]
Enumerable(T) -- [{"T", T}]
Iterable(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Iterator(T) -- [{"T", T}]
Channel(T) -- [{"T", T}]
Channel(T) -- [{"T", T}]
Indexable(T) -- [{"T", T}]
Iterable(T) -- [{"T", T}]
Enumerable(T) -- [{"T", T}]
Indexable(T) -- [{"T", T}]
I also printed uncollected_type.class
, and they’re all GenericInstanceType
s, i.e. GenericClassInstanceType
or GenericModuleInstanceType
In particular, I don’t understand the following
-
Iterable(T)
,Iterator(T)
,Indexable(T)
appear many times, despite it being a set. I figured it’s probably because theT
s are bound to different things, but they appear to be bound to some typeT
intype_vars
. -
Iterable(Int)
,Iterable(Float)
andComparable(Pointer(Pointer(UInt8)))
appear there. This are fully instantiated types. The latter one seems particularly weird to me. - And most importantly, why are there “uncollected” types in the first place?