In case anyone wants to try, all that is needed to implement dynamic Symbols is the ability to query the number of predefined Symbols:
# src/compiler/crystal/codegen/primitives.cr
class Crystal::CodeGenVisitor
def codegen_primitive(call, node, target_def, call_args)
@call_location = call.try &.name_location
@last = case node.name
# ...
when "symbol_predefined_count"
int(@symbols.size)
else
raise "BUG: unhandled primitive in codegen: #{node.name}"
end
@call_location = nil
end
end
# src/primitives.cr
struct Symbol
@[Primitive(:symbol_predefined_count)]
def self.predefined_count : Int32
end
# renamed from `#to_s`
@[Primitive(:symbol_to_s)]
protected def to_s_primitive : String
end
end
# src/symbol.cr
require "string_pool"
struct Symbol
@@strings = StringPool.new(Math.pw2ceil(predefined_count))
@@s_to_sym = Hash(String, Symbol).new(initial_capacity: predefined_count).compare_by_identity
@@i_to_s = Array(String).new(predefined_count)
private def self.add_symbol(str : String) : self
i = @@i_to_s.size
@@i_to_s << str
@@s_to_sym[str] = i.unsafe_as(Symbol)
end
private def self.init_string_pool
predefined_count.times do |i|
add_symbol(@@strings.get(i.unsafe_as(Symbol).to_s_primitive))
end
end
init_string_pool
def self.new(str : String)
str = @@strings.get(str)
@@s_to_sym.fetch(str) { add_symbol(str) }
end
def to_s : String
@@i_to_s[to_i]
end
def self.each(& : Symbol ->)
@@i_to_s.size.times do |i|
yield i.unsafe_as(Symbol)
end
end
def self.all_symbols : Array(Symbol)
Array.new(@@i_to_s.size, &.unsafe_as(Symbol))
end
def to_sym : self
self
end
end
class String
def to_sym : Symbol
Symbol.new(self)
end
end
Symbol.all_symbols # => [:sequentially_consistent, :xchg, :skip, :none, :unchecked, :add, :active, :done, :to_s, :file]
a = "xchg".to_sym
b = "xchg".to_sym
a.to_i # => 1
b.to_i # => 1
:xchg.to_i # => 1
a = String.build(&.<< "foo").to_sym
b = String.build(&.<<("f").<< "foo").to_sym
a.to_i # => 10
b.to_i # => 10
Symbol.all_symbols # => [:sequentially_consistent, :xchg, :skip, :none, :unchecked, :add, :active, :done, :to_s, :file, :foo]
The counterargument is that if Symbols are used in this capacity, then one can simply use the Strings and the StringPool directly without pulling in all the predefined Symbol constants, thereby avoiding global state, and the memory usage / performance will be same. One key difference, however, is that the entire StringPool can be garbage-collected even if its elements cannot be removed individually. In fact I did this in a custom JSON (de)serializer to reduce the generated document’s size (with a hard limit on the number of distinct strings).
We tackled the operator token issue recently, and we are now pretty close to removing all Symbol-typed variables in the standard library and the compiler, other than API changes. These are #11775, #11020, and the following:
# src/time/location.cr
class Time::Location
struct Zone
# Prints `#offset` to *io* in the format `+HH:mm:ss`.
# When *with_colon* is `false`, the format is `+HHmmss`.
#
# When *with_seconds* is `false`, seconds are omitted; when `:auto`, seconds
# are omitted if `0`.
def format(io : IO, with_colon = true, with_seconds = :auto)
# ...
end
# Returns the `#offset` formatted as `+HH:mm:ss`.
# When *with_colon* is `false`, the format is `+HHmmss`.
#
# When *with_seconds* is `false`, seconds are omitted; when `:auto`, seconds
# are omitted if `0`.
def format(with_colon = true, with_seconds = :auto)
String.build do |io|
format(io, with_colon: with_colon, with_seconds: with_seconds)
end
end
end
end
with_seconds is unrestricted, but the method body expects it to be one of true, false, or :auto. If we deprecate these as well then symbols can only appear in compile-time contexts, e.g. as arguments to #responds_to?; I think these are fine, because those symbols emphasize their compile-time aspect.