Thursday, May 7, 2026

Speed-Optimized Python 3.14t on Debian Forky: A Clang-19 Build Guide (Assisted by Google AI)

Building multi-threaded Python 3.14+ from source on Debian Forky using Clang 19.1 enables high-performance, free-threaded execution (no GIL). Using clang-19 with optimized flags (-O3, -flto) and linking against libatomic1 (Debian/Ubuntu) ensures maximum performance and thread safety, crucial for taking advantage of modern multi-core architectures.

Build Configuration Highlights:
Compiler: Clang 19.1 (optimized for Debian Forky).
Interpreter Logic: Enabled "--with-tail-call-interp" for ~3-5% baseline improvement.    Threading: --disable-gil for multi-core parallelism (3.14t).    Optimization Pipeline: Full PGO + LTO cycle, which is essential for the tail-call interpreter to reach peak performance.

Setup Packages for build :
 
Step 1
 
$ sudo apt update && sudo apt install -y  make build-essential libssl-dev zlib1g-dev  libbz2-dev libreadline-dev libsqlite3-dev  wget curl llvm clang-19 libncurses-dev  xz-utils tk-dev libxml2-dev libxmlsec1-dev  libffi-dev liblzma-dev git gcc   libclang-19-dev  libdb5.3-dev  libgdbm-dev


 Step 2
 
$ sudo apt update && sudo apt install -y binutils libc6-dev libstdc++-14-dev
 
Step 3
 
Because Forky is a testing branch, package dependencies are sometimes in flux. For a stable build environment, you should ensure clang-19 is the active version by setting up alternatives if you plan to build frequently
  
$ sudo update-alternatives --install /usr/bin/clang clang /usr/bin/clang-19 100
 
Verify compiler setup

$  clang-19 --version
$   vi test.c
#include <stdio.h>
int main() { printf("Works\n"); return 0; }
:wq
$ clang-19 test.c -o test && ./test
Works
 
=================================================
Now configure && build from source with clang-19 pre-installed
=================================================
$  tar -xf Python-3.14.4.tar.xz
$ cd Python-3.14.4
$ CC=clang-19 ./configure --enable-optimizations --with-lto --disable-gil --with-tail-call-interp
$ make -j10
$ sudo make altinstall 
 
Configure working directory and install required packages into Python  virtual environment :
 
$ mkdir WORKMTH
$ cd   WORKMTH
$ python3.14t -m venv .env
$ source .env/bin/activate
$ pip install aqtinstall
$ pip install --upgrade pip
$ pip install numpy matplotlib cxroots
$ pip install seaborn
 
Several code samples

$ cat meromorphPlotting01.py
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import fsolve
from concurrent.futures import ThreadPoolExecutor
import csv

# 1. Function Definitions
def f_num(z):
    # Multiple root at origin (z^9 factor + sinh term also zero at 0)
    return z**16 + 7*z**11 + 6*z**9*np.sinh(np.pi*z)

def f_den(z):
    # Multiple root at origin (z^2)
    return z**12 + 3*z**5 + z**2

def f(z):
    return f_num(z) / f_den(z)

# 2. Multiplicity and Root Hunting
def get_multiplicity(z0, is_numerator):
    """Calculates order of root at z0 via local winding integral."""
    eps = 1e-5
    theta = np.linspace(0, 2*np.pi, 1000)
    z_circle = z0 + eps * np.exp(1j * theta)
    vals = f_num(z_circle) if is_numerator else f_den(z_circle)
    m = int(round(np.sum(np.diff(np.unwrap(np.angle(vals)))) / (2 * np.pi)))
    return max(m, 1)

def find_root_worker(is_numerator, seed):
    def func_to_solve(v):
        z = complex(v[0], v[1])
        val = f_num(z) if is_numerator else f_den(z)
        return [val.real, val.imag]
    sol, info, ier, msg = fsolve(func_to_solve, seed, full_output=True, xtol=1e-10)
    return complex(sol[0], sol[1]) if ier == 1 else None

def get_unique_roots_parallel(is_numerator, n_attempts=500):
    seeds = [np.random.uniform(-2.5, 2.5, 2) for _ in range(n_attempts)]
    with ThreadPoolExecutor() as executor:
        results = list(executor.map(lambda s: find_root_worker(is_numerator, s), seeds))
    
    # Manual inclusion of origin to ensure it's not missed by solver
    found_roots = [r for r in results if r is not None]
    found_roots.append(0.0 + 0.0j)
    
    unique_data = []
    for r in found_roots:
        if not any(np.isclose(r, u[0], atol=1e-4) for u in unique_data):
            m = get_multiplicity(r, is_numerator)
            unique_data.append((r, m))
    
    unique_data.sort(key=lambda x: (round(x[0].real, 4), round(x[0].imag, 4)))
    all_roots = [r for r, m in unique_data for _ in range(m)]
    return all_roots, unique_data

# 3. Execution and Processing
R_contour = 2.0
zeros_all, zeros_unique = get_unique_roots_parallel(True)
poles_all, poles_unique = get_unique_roots_parallel(False)

# Numerical Winding Calculation
phi_vals = np.linspace(0, 2*np.pi, 300000)
def get_p(p): return np.angle(f(R_contour * np.exp(1j * p)))
with ThreadPoolExecutor() as executor:
    phases = np.concatenate(list(executor.map(get_p, np.array_split(phi_vals, 8))))
winding_num = int(round((np.unwrap(phases)[-1] - np.unwrap(phases)[0]) / (2 * np.pi)))

# 4. Final Printing
print(f"{'TYPE':<8} | {'REAL':<10} | {'IMAG':<10} | {'MULTIPLICITY':<12} | {'MAGNITUDE'}")
print("-" * 65)
for r, m in zeros_unique:
    print(f"{'ZERO':<8} | {r.real:10.5f} | {r.imag:10.5f} | {m:<12} | {np.abs(r):.4f}")
for p, m in poles_unique:
    print(f"{'POLE':<8} | {p.real:10.5f} | {p.imag:10.5f} | {m:<12} | {np.abs(p):.4f}")

print(f"\n--- SUMMARY ---")
print(f"Total Z (with mult): {len(zeros_all)}")
print(f"Total P (with mult): {len(poles_all)}")
print(f"Winding (Z-P): {len(zeros_all)-len(poles_all)} | Integral: {winding_num}")

# 5. Phase Portrait Plotting
plt.figure(figsize=(11, 9))
res = 500
lim = 2.5
x, y = np.linspace(-lim, lim, res), np.linspace(-lim, lim, res)
X, Y = np.meshgrid(x, y)
# Domain coloring background
plt.pcolormesh(X, Y, np.angle(f(X + 1j*Y)), cmap='hsv', alpha=0.4, shading='gouraud')

# Overlay Roots - markers grow with multiplicity
for r, m in zeros_unique:
    plt.scatter(r.real, r.imag, s=50*m, edgecolors='blue', facecolors='none', lw=1.5, label='Zeros' if r == zeros_unique[0][0] else "")
for p, m in poles_unique:
    plt.scatter(p.real, p.imag, s=50*m, marker='*', color='black', label='Poles' if p == poles_unique[0][0] else "")

# Contour and Title
plt.plot(R_contour*np.cos(phi_vals), R_contour*np.sin(phi_vals), 'k--', label=f'Contour R={R_contour}')
plt.title(fr"$\Delta \arg f(z) = 2\pi(Z - P)$ | Winding = {winding_num}")
plt.axis('equal'); plt.grid(True, alpha=0.2); plt.legend(loc='upper right')
plt.show() 
 
$ cat plotMulti3DSeaborn01.py
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.tri import Triangulation
from concurrent.futures import ProcessPoolExecutor
# --- Parallel Worker Functions ---

def calc_paraboloid():   X, Y = np.meshgrid(np.linspace(-5, 5, 100), np.linspace(-5, 5, 100))   Z = X**2 + Y**2   # Return (plot_type, data, palette, title)   return ('surface', (X, Y, Z), 'rocket', 'Paraboloid') def calc_sine_tri():   X, Y = np.meshgrid(np.linspace(-5, 5, 50), np.linspace(-5, 5, 50))   Z = np.sin(np.sqrt(X**2 + Y**2))   triang = Triangulation(X.flatten(), Y.flatten())   return ('trisurf', (triang, Z.flatten()), 'viridis', 'Sine Triangulation') def calc_mobius():   theta, w = np.meshgrid(np.linspace(0, 2*np.pi, 100), np.linspace(-0.5, 0.5, 10))   r = 1 + w * np.cos(theta / 2)   x, y, z = r * np.cos(theta), r * np.sin(theta), w * np.sin(theta / 2)   return ('surface', (x, y, z), 'mako', 'Möbius Strip') if __name__ == "__main__":   # Apply Seaborn dashboard styling   sns.set_theme(style="white")   
 # 1. Execute calculations in parallel  with ProcessPoolExecutor() as executor:       futures = [           executor.submit(calc_paraboloid),           executor.submit(calc_sine_tri),           executor.submit(calc_mobius)       ]       results = [f.result() for f in futures]   # 2. Main Thread Rendering: Combined Dashboard
fig = plt.figure(figsize=(18, 6)) for i, (plot_type, data, palette, title) in enumerate(results, 1):       ax = fig.add_subplot(1, 3, i, projection='3d')       # Pull the Seaborn color palette       cmap = sns.color_palette(palette, as_cmap=True)        if plot_type == 'surface':           ax.plot_surface(*data, cmap=cmap, edgecolor='none', alpha=0.9)       else:           ax.plot_trisurf(*data, cmap=cmap, edgecolor='none') 
      ax.set_title(title, fontsize=14, fontweight='bold', pad=20)
      ax.xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0)) 
plt.tight_layout()
plt.show()


 










 

$ cat complexThreaded09.py
import os
import numpy as np
import matplotlib.pyplot as plt
from cxroots import Circle
from concurrent.futures import ThreadPoolExecutor

# Silence Qt warnings
os.environ["QT_LOGGING_RULES"] = "*.debug=false;qt.qpa.fonts.warning=false"

def count_zeros_task(f, df, contour_points):
  """Calculates zeros via the Argument Principle."""
  fz, dfz = f(contour_points), df(contour_points)
  integrand = dfz / fz
  dz = np.diff(contour_points, append=contour_points[0])
  integral = np.sum(integrand * dz)
  return int(np.round((integral / (2j * np.pi)).real))

def find_roots_task(contour, f, df):
  """Calculates specific root locations using cxroots."""
  return contour.roots(f, df)

# 1. Setup Data
f = lambda z: 4*z**5 + 4*z**3 - 4*z + 9
df = lambda z: 20*z**4 + 12*z**2 - 4
t = np.linspace(0, 2 * np.pi, 1000)
circle_pts = 4 * np.exp(1j * t)
C = Circle(0, 4)

print("Starting concurrent calculations...")

# 2. Parallel Execution
with ThreadPoolExecutor() as executor:
  # Submit both tasks to run simultaneously
  future_count = executor.submit(count_zeros_task, f, df, circle_pts)
  future_roots = executor.submit(find_roots_task, C, f, df)

  # Retrieve results (this waits for each to finish)
  zero_count = future_count.result()
  roots_result = future_roots.result()

# 3. Output and Visualization
print(f"\nVerification (Argument Principle): {zero_count} zeros found.")
print(f"Detailed Root Analysis:\n{roots_result}")

# Plotting must happen on the main thread
roots_result.show()
plt.show()

No comments:

Post a Comment