You would benefit from extracting some of the argument validation and conversion code to helper routines.
Add PEP484 type hints.
Unlike in C#, in Python variables like resultC
should be formatted as result_c
.
Add unit tests.
I don't think it's useful to call into the system's UUID code to get 16 bytes of random data. Instead simply call randbytes
. Also, for the purposes of testing reproducibility, it's important to optionally have control over state, or even pass in a different generator such as the system random generator if the UUID needs to be more secure.
I don't think the function name is very clear. I propose something like uuid_v7
instead.
Add unit tests.
This first pass demonstrates an implementation that preserves your bit-fiddling:
import datetime
import random
import time
import uuid
def dt_to_unix_ms(timestamp: datetime.datetime | None) -> int:
if timestamp is None:
unix_ts = time.time() # Use current UTC time
elif isinstance(timestamp, datetime.datetime):
unix_ts = timestamp.timestamp() # User-provided time
else:
raise TypeError('timestamp must be a datetime or None')
unix_ts_ms = int(unix_ts * 1e3) # Convert to Unix time in milliseconds
# Version 7 UUIDs are defined only for non-negative timestamps
if unix_ts_ms < 0:
raise ValueError('timestamp cannot be negative')
return unix_ts_ms
def rand_basis(rand: random.Random | None) -> bytes:
if rand is None:
randbytes = random.randbytes # Use library default state
elif isinstance(rand, random.Random):
randbytes = rand.randbytes # User-provided state
else:
raise TypeError('rand must be Random or None')
# This gives us 128 bits of randomness to work from
return randbytes(n=16)
def uuid_v7(
timestamp: datetime.datetime | None = None,
rand: random.Random | None = None,
) -> uuid.UUID:
"""
Creates a new UUID Version 7 (UUIDv7) according to RFC 9562.
If no timestamp is provided, the current UTC time is used. The UUIDv7 format
encodes the Unix Epoch timestamp in the most significant bits and uses random
bits for the rest, ensuring increasing sort order for UUIDs generated over time.
:param timestamp: An optional datetime object (with timezone) used to derive the Unix timestamp.
If None, uses the current UTC time.
:return: A uuid.UUID object representing a Version 7 UUID.
:raises ValueError: If the timestamp corresponds to a time before the Unix epoch.
"""
unix_ts_ms = dt_to_unix_ms(timestamp)
result = rand_basis(rand)
# Extract some random bits to later partially overwrite with version and variant fields
# result_c corresponds to the 'c' field in the final UUID structure
# Here we just combine bytes as an integer
result_c = (result[4] << 8) | result[5]
# result_d is a single byte that will be used for the variant bits
result_d = result[8]
# According to the UUIDv7 specification, we derive fields as follows:
# - time_high_and_version occupies bits 48-63 of the timestamp section
# - time_mid occupies bits 32-47
# - time_low occupies bits 0-31
# The code below splits the 60-bit timestamp across these fields.
# a = time_low (32 bits)
# b = time_mid (16 bits)
# c = time_high_and_version (16 bits), with the version bits overwritten
a = (unix_ts_ms >> 16) & 0xFFFFFFFF
b = unix_ts_ms & 0xFFFF
c = (result_c & ~0xF000) | 0x7000 # Clear existing version bits and set to version 7
# Set the variant bits in 'd' (the first byte of the last 8-byte block)
# Variant (10xx) as per RFC 4122 and RFC 9562
d = (result_d & 0x3F) | 0x80
# Build the last 8 bytes (the 'd' array in C#)
# This means final_d = [d, result[9], result[10], result[11], result[12], result[13], result[14], 0]
final_d = bytearray(8)
final_d[0] = d
# Copy 6 bytes from result starting at index 9
final_d[1:7] = result[9:15] # This will copy indices 9..14 (6 bytes)
# final_d[7] remains 0 by default, just like in the C# code
# Now we must assemble the final 16 bytes that represent the UUID.
# The UUID fields are laid out as:
# time_low (4 bytes), time_mid (2 bytes), time_hi_and_version (2 bytes), followed by the 8 random bytes.
# Python's uuid.UUID expects bytes in network (big-endian) order for these fields.
# a (time_low), b (time_mid), c (time_hi_and_version) are integers, so we convert them to big-endian bytes.
final_bytes = (
a.to_bytes(4, byteorder='big') +
b.to_bytes(2, byteorder='big') +
c.to_bytes(2, byteorder='big') +
final_d
)
# Construct the UUID from these final bytes
return uuid.UUID(bytes=final_bytes)
def demo() -> None:
# Generate a Version 7 UUID and print it out
version7_uuid = uuid_v7()
print(f'Generated UUIDv7: {version7_uuid}')
# Optionally, specify a timestamp:
specific_time = datetime.datetime(2024, 12, 14, 0, 0, 0, tzinfo=datetime.timezone.utc)
version7_uuid_with_time = uuid_v7(specific_time)
print(f'Generated UUIDv7 with specific timestamp:', version7_uuid_with_time)
def test() -> None:
ua = uuid_v7()
ub = uuid_v7()
assert ua != ub, 'Two consecutive UUIDs must not collide'
now = datetime.datetime.now()
ua = uuid_v7(timestamp=now, rand=random.Random(0))
ub = uuid_v7(timestamp=now, rand=random.Random(0))
assert ua == ub, 'Same random state and time must produce matching IDs'
ui = uuid_v7(
timestamp=datetime.datetime(2020, 1, 1),
rand=random.Random(0),
)
assert ui == uuid.UUID('016f5f79-9080-7e6f-ac4c-09c28206e700'), 'Simple regression test'
if __name__ == '__main__':
test()
But bit-fiddling isn't necessary. I prefer declarative serialisation over imperative serialisation. That means making a structural definition and then passing data into fields, not having to worry about shift operations etc. This is possible with ctypes
. This version of the demonstration should be bit-for-bit compatible with your use of an indexed random array:
import ctypes
import datetime
import random
import time
import typing
import uuid
class BinUUID(ctypes.BigEndianStructure):
_pack_ = 1
_fields_ = (
# 'a' and 'b'
('unix_ms', ctypes.c_uint64, 48),
# first bits of 'c'
('version', ctypes.c_uint64, 4),
# remaining bits of 'c'
('rand_c', ctypes.c_uint64, 12),
# first bits of 'd'
('variant', ctypes.c_uint8, 2),
# leftover bits of first 'd' byte
('rand_d0', ctypes.c_uint8, 6),
# remaining bytes of 'd'
('rand_d1', ctypes.c_uint8*7),
)
@classmethod
def new(cls, unix_ms: int, rand: bytes) -> typing.Self:
return cls(
unix_ms=unix_ms,
version=7,
rand_c=((rand[4] & 0x0F) << 8) | rand[5],
variant=2,
rand_d0=rand[8],
rand_d1=tuple(rand[9:15]),
)
def uuid_v7(
timestamp: datetime.datetime | None = None,
rand: random.Random | None = None,
) -> uuid.UUID:
"""
Creates a new UUID Version 7 (UUIDv7) according to RFC 9562.
If no timestamp is provided, the current UTC time is used. The UUIDv7 format
encodes the Unix Epoch timestamp in the most significant bits and uses random
bits for the rest, ensuring increasing sort order for UUIDs generated over time.
:param timestamp: An optional datetime object (with timezone) used to derive the Unix timestamp.
If None, uses the current UTC time.
:return: A uuid.UUID object representing a Version 7 UUID.
:raises ValueError: If the timestamp corresponds to a time before the Unix epoch.
"""
struct = BinUUID.new(unix_ms=dt_to_unix_ms(timestamp), rand=rand_basis(rand))
# Construct the UUID from these final bytes
return uuid.UUID(bytes=bytes(struct))
Even simpler is to use mixed randrange
and randbytes
:
import ctypes
import datetime
import random
import time
import types
import typing
import uuid
type RandomSource = types.ModuleType | random.Random
def dt_to_unix_ms(timestamp: datetime.datetime | None) -> int:
if timestamp is None:
unix_ts = time.time() # Use current UTC time
elif isinstance(timestamp, datetime.datetime):
unix_ts = timestamp.timestamp() # User-provided time
else:
raise TypeError('timestamp must be a datetime or None')
unix_ts_ms = int(unix_ts * 1e3) # Convert to Unix time in milliseconds
# Version 7 UUIDs are defined only for non-negative timestamps
if unix_ts_ms < 0:
raise ValueError('timestamp cannot be negative')
return unix_ts_ms
def rand_basis(rand: random.Random | None) -> RandomSource:
if rand is None:
return random # Use library default state
elif isinstance(rand, random.Random):
return rand # User-provided state
raise TypeError('rand must be Random or None')
class BinUUID(ctypes.BigEndianStructure):
_pack_ = 1
_fields_ = (
# 'a' and 'b'
('unix_ms', ctypes.c_uint64, 8*6),
# first bits of 'c'
('version', ctypes.c_uint64, 4),
# remaining bits of 'c'
('rand_c', ctypes.c_uint64, 12),
# first bits of 'd'
('variant', ctypes.c_uint8, 2),
# leftover bits of first 'd' byte
('rand_d0', ctypes.c_uint8, 6),
# remaining bytes of 'd'
('rand_d1', ctypes.c_uint8*7),
)
@classmethod
def new(cls, unix_ms: int, rand: 'RandomSource') -> typing.Self:
return cls(
unix_ms=unix_ms,
version=7,
rand_c=rand.randrange(0x1000),
variant=2,
rand_d0=rand.randrange(0x40),
rand_d1=tuple(rand.randbytes(7)),
)
def uuid_v7(
timestamp: datetime.datetime | None = None,
rand: random.Random | None = None,
) -> uuid.UUID:
"""
Creates a new UUID Version 7 (UUIDv7) according to RFC 9562.
If no timestamp is provided, the current UTC time is used. The UUIDv7 format
encodes the Unix Epoch timestamp in the most significant bits and uses random
bits for the rest, ensuring increasing sort order for UUIDs generated over time.
:param timestamp: An optional datetime object (with timezone) used to derive the Unix timestamp.
If None, uses the current UTC time.
:return: A uuid.UUID object representing a Version 7 UUID.
:raises ValueError: If the timestamp corresponds to a time before the Unix epoch.
"""
struct = BinUUID.new(unix_ms=dt_to_unix_ms(timestamp), rand=rand_basis(rand))
# Construct the UUID from these final bytes
return uuid.UUID(bytes=bytes(struct))
def demo() -> None:
# Generate a Version 7 UUID and print it out
version7_uuid = uuid_v7()
print(f'Generated UUIDv7: {version7_uuid}')
# Optionally, specify a timestamp:
specific_time = datetime.datetime(2024, 12, 14, 0, 0, 0, tzinfo=datetime.timezone.utc)
version7_uuid_with_time = uuid_v7(specific_time)
print(f'Generated UUIDv7 with specific timestamp:', version7_uuid_with_time)
def test() -> None:
ui = uuid_v7(
timestamp=datetime.datetime(2020, 1, 1),
rand=random.Random(0),
)
assert ui == uuid.UUID('016f5f79-9080-7c53-b534-2f5d0a5e4842'), 'Simple regression test'
ua = uuid_v7()
ub = uuid_v7()
assert ua != ub, 'Two consecutive UUIDs must not collide'
now = datetime.datetime.now()
ua = uuid_v7(timestamp=now, rand=random.Random(0))
ub = uuid_v7(timestamp=now, rand=random.Random(0))
assert ua == ub, 'Same random state and time must produce matching IDs'
if __name__ == '__main__':
test()