Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a bitstype just for IANA Variable TimeZones #335

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/TimeZones.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ function __init__()
)

global ISOZonedDateTimeFormat = DateFormat("yyyy-mm-ddTHH:MM:SS.ssszzz")
init_IANA_NAMES!()
end

include("compat.jl")
Expand All @@ -58,6 +59,7 @@ include("utcoffset.jl")
include(joinpath("types", "timezone.jl"))
include(joinpath("types", "fixedtimezone.jl"))
include(joinpath("types", "variabletimezone.jl"))
include(joinpath("types", "ianatimezone.jl"))
include(joinpath("types", "zoneddatetime.jl"))
include("exceptions.jl")
include(joinpath("tzdata", "TZData.jl"))
Expand Down
4 changes: 2 additions & 2 deletions src/arithmetic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ function broadcasted(::typeof(+), r::StepRange{ZonedDateTime}, p::DatePeriod)
# non-existent and ambiguous dates.

tz = timezone(start)
if isa(tz, VariableTimeZone)
if isa(tz, AbstractVariableTimeZone)
start = first_valid(DateTime(start) + p, tz, step)
else
start = start + p
end

tz = timezone(stop)
if isa(tz, VariableTimeZone)
if isa(tz, AbstractVariableTimeZone)
stop = last_valid(DateTime(stop) + p, tz, step)
else
stop = stop + p
Expand Down
2 changes: 2 additions & 0 deletions src/conversions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ Converts a `ZonedDateTime` from its current `TimeZone` into the specified `TimeZ
"""
function astimezone end

astimezone(zdt::ZonedDateTime, tz::IANATimeZone) = _do_and_rewrap(astimezone, zdt, tz)

function astimezone(zdt::ZonedDateTime, tz::VariableTimeZone)
i = searchsortedlast(
tz.transitions, zdt.utc_datetime,
Expand Down
11 changes: 6 additions & 5 deletions src/discovery.jl
Original file line number Diff line number Diff line change
Expand Up @@ -154,24 +154,25 @@ next_transition_instant

function next_transition_instant(zdt::ZonedDateTime)
tz = zdt.timezone
tz isa VariableTimeZone || return nothing
tz isa AbstractVariableTimeZone || return nothing

transits = transitions(tz)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be premature optimization to pull this out to be done before.
Idea is to avoid relooking up the backing_timezone for IANATimeZone multiple times

# Determine the index of the transition which occurs after the UTC datetime specified
index = searchsortedfirst(
tz.transitions, DateTime(zdt, UTC),
transits, DateTime(zdt, UTC),
by=el -> isa(el, TimeZones.Transition) ? el.utc_datetime : el,
)

index <= length(tz.transitions) || return nothing
index <= length(transits) || return nothing

# Use the UTC datetime of the transition and the offset information prior to the
# transition to create a `ZonedDateTime` which cannot be constructed with the high-level
# constructors. The instant constructed is equivalent to the first instant after the
# transition but visually appears to be before the transition. For example in a
# transition where the clock changes from 01:59 → 03:00 we would return 02:00 where
# the UTC datetime of 02:00 == 03:00.
utc_datetime = tz.transitions[index].utc_datetime
prev_zone = tz.transitions[index - 1].zone
utc_datetime = transits[index].utc_datetime
prev_zone = transits[index - 1].zone
ZonedDateTime(utc_datetime, tz, prev_zone)
end

Expand Down
4 changes: 4 additions & 0 deletions src/interpret.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,7 @@ function last_valid(local_dt::DateTime, tz::VariableTimeZone)
possible = interpret(local_dt, tz, Local)
return isempty(possible) ? first(shift_gap(local_dt, tz)) : last(possible)
end


first_valid(dt::DateTime, tz::IANATimeZone, args...) = _do_and_rewrap(first_valid, dt, tz, args...)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probnably _do_and_rewrap needs to be renamed

last_valid(dt::DateTime, tz::IANATimeZone, args...) = _do_and_rewrap(last_valid, dt, tz, args...)
159 changes: 159 additions & 0 deletions src/types/ianatimezone.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
const IANA_TABLE_SIZE = 1680

perfect_hash(tz::VariableTimeZone) = perfect_hash(tz.name)
function perfect_hash(str::AbstractString)
# This was generated via `gperf` and translated by hand.
# in case of collisions from new keys being added you must *not* regenerate it or
# change the assoc table, as that will change existing hashes and break any serialized
# data. Instead add special cases from the exact string via adding special cases. e.g.
#`name=="New/Timezone" && return 1681` (after adjusting `IANA_TABLE_SIZE`)
# The code below will (for all timezones in the 2021a release of tzdata) generate a
# value between 29 and 1680 inclusive. So any new keys that need to be added
# manually to resolve collisions can freely use anything outside that range.

asso_values = (
1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681,
1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681,
1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681,
1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681, 1681,
1681, 1681, 1681, 4, 1681, 1, 3, 3, 241, 48,
1, 22, 10, 9, 61, 18, 74, 9, 30, 1681,
1681, 1681, 1681, 1681, 1681, 4, 2, 18, 167, 48,
547, 691, 532, 446, 574, 466, 211, 452, 8, 387,
110, 475, 601, 262, 83, 124, 580, 536, 60, 1,
719, 2, 1681, 1681, 1681, 291, 1, 1, 27, 424,
13, 5, 43, 3, 355, 12, 168, 148, 90, 179,
4, 1, 315, 3, 3, 3, 2, 37, 5, 65,
22, 320, 245, 1, 1681, 1681, 1681,
)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How should i rewrap this?
10 per line?
Up to the line limit?
1 per line?


units = codeunits(str)
len = length(units)
hval = len

len >= 19 && (hval += asso_values[units[19]])
len >= 12 && (hval += asso_values[units[12]])
len >= 11 && (hval += asso_values[units[11]])
len >= 9 && (hval += asso_values[units[9] + 1])
len >= 8 && (hval += asso_values[units[8]])
len >= 6 && (hval += asso_values[units[6] + 1])
len >= 4 && (hval += asso_values[units[4]])
len >= 2 && (hval += asso_values[units[2] + 1])
len >= 1 && (hval += asso_values[units[1]])
len > 0 && (hval += asso_values[units[end]]) # add the last
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
len >= 19 && (hval += asso_values[units[19]])
len >= 12 && (hval += asso_values[units[12]])
len >= 11 && (hval += asso_values[units[11]])
len >= 9 && (hval += asso_values[units[9] + 1])
len >= 8 && (hval += asso_values[units[8]])
len >= 6 && (hval += asso_values[units[6] + 1])
len >= 4 && (hval += asso_values[units[4]])
len >= 2 && (hval += asso_values[units[2] + 1])
len >= 1 && (hval += asso_values[units[1]])
len > 0 && (hval += asso_values[units[end]]) # add the last
@inbounds begin
len >= 19 && (hval += asso_values[units[19]])
len >= 12 && (hval += asso_values[units[12]])
len >= 11 && (hval += asso_values[units[11]])
len >= 9 && (hval += asso_values[units[9] + 1])
len >= 8 && (hval += asso_values[units[8]])
len >= 6 && (hval += asso_values[units[6] + 1])
len >= 4 && (hval += asso_values[units[4]])
len >= 2 && (hval += asso_values[units[2] + 1])
len >= 1 && (hval += asso_values[units[1]])
len > 0 && (hval += asso_values[units[end]]) # add the last
end

This saves some time and simplifies the native code but relies on each code unit being in the range 1:125.

Might be worth it just to pad the array of asso_values to cover the domain of UInt8

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This saves some time and simplifies the native code but relies on each code unit being in the range 1:125

I tried this and didn't notice any improvement.
How were you benchmarking it?

Might be worth it just to pad the array of asso_values to cover the domain of UInt8

I tried this and it made it way worse. I think because the tuple gets too lange for julia to be willing to do some optimizations.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

julia> @btime for x in ["America/Winnipeg", "Etc/UTC", "Canada/Central"]
           perfect_hash(x)
       end
  71.438 ns (1 allocation: 112 bytes)

julia> @btime for x in ["America/Winnipeg", "Etc/UTC", "Canada/Central"]
           perfect_hash_inbounds(x)
       end
  66.137 ns (1 allocation: 112 bytes)

Copy link
Contributor Author

@oxinabox oxinabox Apr 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

running @btime on the whole testset in test/types/ianatimezone.jl
With the benchmark

@btime for x in ("America/Winnipeg", "Etc/UTC", "Canada/Central")
                TimeZones.perfect_hash(x)
             end

Noting that i got rid of the vector in the benchmark code since that was dominating.
In julia 1.6

  • current code without the inbounds annotation|: 38.097 ns (0 allocations: 0 bytes)
  • with inbounds (not safe): 33.217 ns (0 allocations: 0 bytes)
  • with inbounds + make assoc_values bigger: 79.305 ns (0 allocations: 0 bytes)

Copy link
Contributor Author

@oxinabox oxinabox Apr 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • with inbounds and assoc to 255: 44.585 ns (0 allocations: 0 bytes)

growing assoc to 127 seems to help: fikkiwubg wutgh tgat:

  • with just assoc to 127 : 28.334 ns (0 allocations: 0 bytes)
  • with inbounds ((Not safe): 23.236 ns (0 allocations: 0 bytes)
  • with inbounds and checking based on bitmask: 47.035 ns (0 allocations: 0 bytes)
  • with inbounds and checking based in <: 36.359 ns (0 allocations: 0 bytes)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm i don't trust my benchmarks
Timing for 127 + checking based on <: sometimes is 36ns sometimes is 48ns.

I think we should do that though, and i will push that code

return hval
end


const IANA_TIMEZONES = Vector{VariableTimeZone}(undef, IANA_TABLE_SIZE)

# TODO: maybe fill this during build(), probably by generating a julia file.
Copy link
Contributor Author

@oxinabox oxinabox Apr 15, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or maybe it is fine, since it only queries what files exist, using timezone_names rather than reading them.
Maybe timezone_names (or __init__) should do the validation that TimeZones.jl has build properly and has the TZDATA.COMPILED_DIR.
Which would maybe clean up the TimeZone constructor a bit.

# That way we can avoid actually instantitating every timezone til it is needed.
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
const IANA_NAMES = Vector{String}(undef, IANA_TABLE_SIZE)
function init_IANA_NAMES!() # this is run by __init__ (at least for now)
for name in timezone_names()
# TODO: we should workout how to filter out FixedTimeZones here
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
id = perfect_hash(name)
# Important: Make sure our hash is perfect (even module the table size)
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
isassigned(IANA_NAMES, id) && error("hash collision for $tz, at $id")
IANA_NAMES[id] = name
end
return IANA_NAMES
end

function is_standard_iana(str::AbstractString)
id = perfect_hash(str)
return isassigned(IANA_NAMES, id) && IANA_NAMES[id] == str
end

function get_iana_timezone!(str::AbstractString)
id = perfect_hash(str)
if isassigned(IANA_TIMEZONES, id)
IANA_TIMEZONES[id]
else
tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)
tz, class = deserialize(tz_path)
if tz isa VariableTimeZone
IANA_TIMEZONES[id] = tz
return IANATimeZone(perfect_hash(str))
else
# it is a FixedTimeZone, we are not going to use a IANATimeZone
return tz
end
end
end

function get_iana_timezone!(id::UInt)
if isassigned(IANA_NAMES, id)
name = IANA_NAMES[id]
return get_iana_timezone!(name)
else
error(
"$id does not correspond to any known IANA timezone. " *
"Check you are using the right version of the IANA database.",
)
end
end


"""
IANATimeZone(::AbstractString) <: AbstractVariableTimeZone

A type for representing a standard variable IANA TimeZome from the tzdata.
Under-the-hood it stores only a unique integer identifier.
"""
struct IANATimeZone <: TimeZone
# id must be a prefect_hash of the corresponding timezone name
id::UInt
end

function IANATimeZone(name::AbstractString)
return IANATimeZone(perfect_hash(name))
end

backing_timezone(itz::IANATimeZone) = get_iana_timezone!(itz.id)::VariableTimeZone

Base.:(==)(a::IANATimeZone, b::IANATimeZone) = a.id == b.id
Base.:(==)(a::IANATimeZone, b::TimeZone) = backing_timezone(a) == b
Base.:(==)(b::TimeZone, a::IANATimeZone) = backing_timezone(a) == b

# TODO: we have the hash, it seems like we should be able to use that to get seeded hash
oxinabox marked this conversation as resolved.
Show resolved Hide resolved
Base.hash(a::IANATimeZone, seed::UInt) = hash(backing_timezone(a), seed)

name(a::IANATimeZone) = name(backing_timezone(a))
transitions(tz::IANATimeZone) = transitions(backing_timezone(tz))

# TODO: should i just make this check the fields of VariableTimeZone and just delegate all?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be addressed

function Base.getproperty(tz::IANATimeZone, s::Symbol)
if s === :name
return name(tz)
elseif s == :transitions
return transitions(tz)
else
return getfield(tz, s)
end
end
function Base.hasproperty(tz::IANATimeZone, s::Symbol)
return s === :name || s === :transitions || hasfield(IANATimeZone, s)
end
oxinabox marked this conversation as resolved.
Show resolved Hide resolved




""""
_do_and_rewrap(f, arg1, tz::IANATimeZone, args...; kwargs...)

Run the function `f(arg1, backing_timezone(tz), args...; kwargs...)`
which must return a `ZonedDateTime`, with the backing timezone.
Replace the timezone field with `tz` (which should be equivalent).
"""
function _do_and_rewrap(f, arg1, tz::IANATimeZone, args...; kwargs...)
backed_tz = backing_timezone(tz)
backed_zdt::ZonedDateTime = f(arg1, backing_timezone(tz), args...; kwargs...)
# make it store tz rather than the equiv backing timezone, other fields the same
return ZonedDateTime(backed_zdt.utc_datetime, tz, backed_zdt.zone)
end


Base.show(io::IO, tz::IANATimeZone) = show(io, backing_timezone(tz))
Base.print(io::IO, tz::IANATimeZone) = print(io, backing_timezone(tz))
5 changes: 4 additions & 1 deletion src/types/timezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ function TimeZone(str::AbstractString, mask::Class=Class(:DEFAULT))
tz, class = get!(TIME_ZONE_CACHE, str) do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@omus input on the best way to rewrite this function would apprecated.

tz_path = joinpath(TZData.COMPILED_DIR, split(str, "/")...)

if isfile(tz_path)
if mask == Class(:DEFAULT) && is_standard_iana(str)
# TODO: idk if this as sensible way to handle class and mask
get_iana_timezone!(str), Class(:DEFAULT)
elseif isfile(tz_path)
open(deserialize, tz_path, "r")
elseif occursin(FIXED_TIME_ZONE_REGEX, str)
FixedTimeZone(str), Class(:FIXED)
Expand Down
19 changes: 18 additions & 1 deletion src/types/variabletimezone.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,27 @@ end

Base.isless(a::Transition, b::Transition) = isless(a.utc_datetime, b.utc_datetime)

# TODO: define and document an actual API for this
# Seems to need: (list may be incomplete)
# - transitions(tz)
# - name(tz)
# - first_valid(tz)
# - last_valid(tz)
# - some constructors for ZonedDateTime
#
# As well as ones that are common to TimeZone
# - astimezone(zdt, tz)
# - show(io, tz)
# - print(io, tz)
# - `==` and `hash`
abstract type AbstractVariableTimeZone <: TimeZone end

"""
VariableTimeZone

A `TimeZone` with an offset that changes over time.
"""
struct VariableTimeZone <: TimeZone
struct VariableTimeZone <: AbstractVariableTimeZone
name::String
transitions::Vector{Transition}
cutoff::Union{DateTime,Nothing}
Expand All @@ -20,6 +35,8 @@ struct VariableTimeZone <: TimeZone
end
end

transitions(tz::VariableTimeZone) = tz.transitions

name(tz::VariableTimeZone) = tz.name

function rename(tz::VariableTimeZone, name::AbstractString)
Expand Down
10 changes: 9 additions & 1 deletion src/types/zoneddatetime.jl
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,14 @@ function ZonedDateTime(dt::DateTime, tz::VariableTimeZone, is_dst::Bool)
end
end

function ZonedDateTime(dt::DateTime, tz::IANATimeZone, occ_or_dst::Integer)
return _do_and_rewrap(ZonedDateTime, dt, tz, occ_or_dst)
end

function ZonedDateTime(dt::DateTime, tz::IANATimeZone; kwargs...)
return _do_and_rewrap(ZonedDateTime, dt, tz; kwargs...)
end

# Convenience constructors
@doc """
ZonedDateTime(y, [m, d, h, mi, s, ms], tz, [amb]) -> DateTime
Expand All @@ -113,7 +121,7 @@ Construct a `ZonedDateTime` type by parts. Arguments `y, m, ..., ms` must be con
`TimeZone` then `amb` can be supplied to resolve ambiguity.
""" ZonedDateTime

@optional function ZonedDateTime(y::Integer, m::Integer=1, d::Integer=1, h::Integer=0, mi::Integer=0, s::Integer=0, ms::Integer=0, tz::VariableTimeZone, amb::Union{Integer,Bool})
@optional function ZonedDateTime(y::Integer, m::Integer=1, d::Integer=1, h::Integer=0, mi::Integer=0, s::Integer=0, ms::Integer=0, tz::AbstractVariableTimeZone, amb::Union{Integer,Bool})
ZonedDateTime(DateTime(y,m,d,h,mi,s,ms), tz, amb)
end

Expand Down
1 change: 1 addition & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ include("helpers.jl")
include(joinpath("types", "fixedtimezone.jl"))
include(joinpath("types", "variabletimezone.jl"))
include(joinpath("types", "zoneddatetime.jl"))
include(joinpath("types", "ianatimezone.jl"))
include("exceptions.jl")
include("interpret.jl")
include("accessors.jl")
Expand Down
Loading