Hy guys!
Maybe it will be useful to someone: Lambda method sim with
1. Thread parallelism
2. Batch memory access
3. Precomputed jump tables for preventing sync (3 jump‑tables, d=3)
4. Interleaved steps
5. Cuckoo for traps
6. Distinguished points
7. Windowed method
Speed up to 50000 times than original Pollard Rho method.
And I'm lazy guy. Do it with real computing without any chances to solve 135 is not for me. But like a POC - why not.
E:\Python\The_game>python The_game.py -h
usage: The_game.py [-h] [--W W] [--T T] [--K K] [--trials TRIALS] [--max_iters MAX_ITERS] [--threads THREADS] [--speed SPEED] [--mem_latency MEM_LATENCY]
Simulate Pollard ρ with multiple kangaroos and throughput estimate.
optional arguments:
-h, --help show this help message and exit
--W W Range size W
--T T Number of traps T
--K K Jump table size K
--trials TRIALS Number of trials
--max_iters MAX_ITERS
Max steps per trial
--threads THREADS Number of kangaroos per trial
--speed SPEED Predicted total CPU point‑addition throughput (p‑adds/s)
--mem_latency MEM_LATENCY
DDR RAM latency per random access (s)
#!/usr/bin/env python3
import argparse
import random
import math
from tqdm import tqdm
from typing import List, Optional
def simulate_kangaroos(W: int, T: int, K: int, trials: int, max_iters: int,
threads: int, speed: Optional[float], mem_latency: float):
L = W/ T
base_jump = math.isqrt(L)
batch_size = 1024
cf_mem_bytes = 256 * 1024**3
bytes_per_entry = cf_mem_bytes T
bits_per_entry = bytes_per_entry * 8
fp_rate = 2 ** (-bits_per_entry)
print(f"W = {W:,}, T = {T:,}, L = {L:,}, sqrt(L) = {base_jump:,}")
print(f"p = T/W = {T/W:.3e}, expected steps = {W/T:,.0f}")
print(f"Threads: {threads} (split into 3 jump tables)")
print(f"Cuckoo filter @256 GiB → {bytes_per_entry:.3f} B/entry (~{bits_per_entry:.2f} bits), FP ≈ {fp_rate:.2%}\n")
if speed is not None:
print(f"Theoretical total CPU p‑adds/s: {speed:,.0f}")
print(f"Implied per‑thread p‑adds/s : {speed/threads:,.0f}")
print(f"DDR RAM latency : {mem_latency*1e9:.1f} ns")
print(f"Batch memory access : {batch_size} keys per access\n")
else:
print()
successes = 0
hits_per_trial: List[Optional[int]] = []
for t in range(1, trials + 1):
jump_tables = [
[random.randint(1, base_jump) for _ in range(K)]
for _ in range(3)
]
X = [random.randrange(W) for _ in range(threads)]
hit_step: Optional[int] = None
hit_trap: Optional[int] = None
hit_thread: Optional[int] = None
with tqdm(total=max_iters, desc=f"Trial {t}", unit="step") as bar:
for step in range(1, max_iters + 1):
for i in range(threads):
if hit_step is not None:
break
tbl = jump_tables[i % 3]
idx = X[i] % K
X[i] = (X[i] + tbl[idx]) % W
if X[i] % L == 0:
hit_step = step
hit_thread = i + 1
hit_trap = X[i]/ L
bar.update(1)
if hit_step is not None:
break
if hit_step is not None:
print(f" Kangaroo {hit_thread}: HIT at step {hit_step:,} (trap idx = {hit_trap:,})\n")
successes += 1
else:
print(f" MISS after {max_iters:,} steps\n")
hits_per_trial.append(hit_step)
print(f"Total hits: {successes} {trials}")
hit_steps = [h for h in hits_per_trial if h is not None]
if hit_steps:
mn = min(hit_steps)
mx = max(hit_steps)
avg = sum(hit_steps) len(hit_steps)
print(f"Steps to first hit: min={mn:,}, max={mx:,}, avg={avg:,.1f}")
if speed is not None and hit_steps:
comp_cap = speed
mem_cap = threads * batch_size mem_latency
pred_adds = min(comp_cap, mem_cap)
wins_per_sec = pred_adds avg
wins_floor = math.floor(wins_per_sec * 10) 10
pts_per_sec = wins_per_sec * W
if pts_per_sec >= 1e21:
human = f"{pts_per_sec/1e21:.3f} Zpps"
elif pts_per_sec >= 1e18:
human = f"{pts_per_sec/1e18:.3f} Epps"
else:
human = f"{pts_per_sec:.3e} pts/s"
full_pts = f"{pts_per_sec:,.0f} pts/s"
overhead = 0.20
eff_adds = pred_adds * (1 - overhead)
eff_wins = eff_adds avg
eff_wins_floor = math.floor(eff_wins * 10) 10
eff_pts = eff_wins * W
if eff_pts >= 1e21:
human_eff = f"{eff_pts/1e21:.3f} Zpps"
elif eff_pts >= 1e18:
human_eff = f"{eff_pts/1e18:.3f} Epps"
else:
human_eff = f"{eff_pts:.3e} pts/s"
full_eff_pts = f"{eff_pts:,.0f} pts/s"
nohit_pts_per_sec = (speed max_iters) * W
eff_nohit = nohit_pts_per_sec * (1 - overhead)
if eff_nohit >= 1e21:
human_nohit = f"{eff_nohit/1e21:.3f} Zpps"
elif eff_nohit >= 1e18:
human_nohit = f"{eff_nohit/1e18:.3f} Epps"
else:
human_nohit = f"{eff_nohit:.3e} pts/s"
full_nohit = f"{eff_nohit:,.0f} pts/s"
print("\n--- Theoretical throughput estimate ---")
print(f"Compute‑bound : {comp_cap:,.0f} p‑adds/s")
print(f"Memory‑bound : {mem_cap:,.0f} p‑adds/s")
print(f"Predicted p‑adds : {pred_adds:,.0f} p‑adds/s")
print(f"Predicted windows: {wins_floor:.1f} win/s")
print(f"Predicted points : {full_pts} ({human})")
print(f"After ~20% overhead (thread sync, atomic ops, etc):")
print(f" Effective windows: {eff_wins_floor:.1f} win/s")
print(f" Effective points : {full_eff_pts} ({human_eff})")
print(f" Effective no‑hit scanning: {full_nohit} ({human_nohit})")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Simulate Pollard ρ with multiple kangaroos and throughput estimate.")
parser.add_argument("--W", type=int, default=10**8, help="Range size W")
parser.add_argument("--T", type=int, default=10**4, help="Number of traps T")
parser.add_argument("--K", type=int, default=64, help="Jump table size K")
parser.add_argument("--trials", type=int, default=5, help="Number of trials")
parser.add_argument("--max_iters", type=int, default=None, help="Max steps per trial")
parser.add_argument("--threads", type=int, default=2, help="Number of kangaroos per trial")
parser.add_argument("--speed", type=float, default=None, help="Predicted total CPU point‑addition throughput (p‑adds/s)")
parser.add_argument("--mem_latency", type=float, default=100e-9, help="DDR RAM latency per random access (s)")
args = parser.parse_args()
W = args.W
T = args.T
default_iters = int((W T) * 2)
max_iters = args.max_iters if args.max_iters is not None else default_iters
simulate_kangaroos(W, T, args.K, args.trials, max_iters, args.threads, args.speed, args.mem_latency)
And results
E:\Python\The_game>python The_game.py --W 1000000000000000000 --T 500000000000 --K 1414 --trials 10 --max_iters 200000 --threads 32 --speed 150000000
W = 1,000,000,000,000,000,000, T = 500,000,000,000, L = 2,000,000, sqrt(L) = 1,414
p = T/W = 5.000e-07, expected steps = 2,000,000
Threads: 32 (split into 3 jump tables)
Cuckoo filter @256 GiB → 0.550 B/entry (~4.40 bits), FP ≈ 4.74%
Theoretical total CPU p‑adds/s: 150,000,000
Implied per‑thread p‑adds/s : 4,687,500
DDR RAM latency : 100.0 ns
Batch memory access : 1024 keys per access
Trial 1: 5%|████████▊ | 10296/200000 [00:00<00:01, 161824.75step/s]
Kangaroo 9: HIT at step 10,296 (trap idx = 335,485,616,764)
Trial 2: 4%|███████▍ | 8597/200000 [00:00<00:01, 149881.87step/s]
Kangaroo 2: HIT at step 8,597 (trap idx = 291,414,423,644)
Trial 3: 7%|███████████▏ | 13113/200000 [00:00<00:01, 156095.86step/s]
Kangaroo 1: HIT at step 13,113 (trap idx = 371,862,088,299)
Trial 4: 49%|████████████████████████████████████████████████████████████████████████████████████▍ | 98742/200000 [00:00<00:00, 155103.27step/s]
Kangaroo 21: HIT at step 98,742 (trap idx = 209,929,031,829)
Trial 5: 7%|███████████▉ | 13926/200000 [00:00<00:01, 158877.92step/s]
Kangaroo 2: HIT at step 13,926 (trap idx = 69,456,967,361)
Trial 6: 3%|█████▊ | 6828/200000 [00:00<00:01, 152403.27step/s]
Kangaroo 4: HIT at step 6,828 (trap idx = 5,824,479,995)
Trial 7: 67%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉ | 134098/200000 [00:00<00:00, 155850.07step/s]
Kangaroo 31: HIT at step 134,098 (trap idx = 140,162,118,610)
Trial 8: 18%|██████████████████████████████▋ | 35918/200000 [00:00<00:01, 158432.59step/s]
Kangaroo 23: HIT at step 35,918 (trap idx = 340,417,309,332)
Trial 9: 12%|████████████████████▌ | 24030/200000 [00:00<00:01, 159903.80step/s]
Kangaroo 24: HIT at step 24,030 (trap idx = 332,023,294,905)
Trial 10: 77%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌ | 153355/200000 [00:00<00:00, 159300.14step/s]
Kangaroo 29: HIT at step 153,355 (trap idx = 269,327,468,331)
Total hits: 10 10
Steps to first hit: min=6,828, max=153,355, avg=49,890.3
--- Theoretical throughput estimate ---
Compute‑bound : 150,000,000 p‑adds/s
Memory‑bound : 327,680,000,000 p‑adds/s
Predicted p‑adds : 150,000,000 p‑adds/s
Predicted windows: 3006.5 win/s
Predicted points : 3,006,596,472,661,018,148,864 pts/s (3.007 Zpps)
After ~20% overhead (thread sync, atomic ops, etc):
Effective windows: 2405.2 win/s
Effective points : 2,405,277,178,128,814,309,376 pts/s (2.405 Zpps)
Effective no‑hit scanning: 600,000,000,000,000,000,000 pts/s (600.000 Epps)