-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_example_thumbnails.py
More file actions
executable file
·513 lines (438 loc) · 23 KB
/
Copy pathgenerate_example_thumbnails.py
File metadata and controls
executable file
·513 lines (438 loc) · 23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
#!/usr/bin/env python3
"""
Renders a real PNG thumbnail for every CppMode example sketch by actually
running each one in a headless browser for a moment and capturing what it
draws. These are used by the examples/index.html gallery grid (built by
regen.py) instead of any third-party screenshots.
Requires (on whichever machine actually runs this):
pip install playwright pillow --break-system-packages
playwright install chromium
Usage:
python3 generate_example_thumbnails.py
Reads every example .js file from:
/home/pep/Projects/processing-cpp.github.io/assets/examples_js/Basics/<category>/<Example_Name>/<Example_Name>.js
Writes one PNG per example to:
/home/pep/Projects/processing-cpp.github.io/assets/examples_thumbs/<category>/<example-slug>.png
Each sketch renders at its OWN actual size (read from its createCanvas/size
call, same convention regen.py already uses), not forced into a square --
forcing non-square sketches into THUMB_SIZE x THUMB_SIZE distorted their
framing and cropped content out. The full-size screenshot is then scaled
down and letterboxed into a THUMB_SIZE x THUMB_SIZE square afterward (black
bars added on whichever axis is shorter), so the entire original drawing
is always visible in the thumbnail, never cropped or squished.
Sketches are given RENDER_DELAY_MS to actually draw a few frames (important
for noise/animation-based sketches that build up visual complexity over
time rather than looking right on frame 0), then the canvas is captured
and the browser page is closed before moving to the next sketch.
Sketches that call loadImage/loadFont/loadModel have their asset paths
rewritten to the real hosted URL (same fix_asset_paths() approach already
used for the full example pages in regen.py), so they render with their
real images instead of being skipped. Sketches that load actual *data*
(loadStrings/loadJSON/loadTable/loadXML/requestImage) or fetch external
non-canvas content (fetch()/noCanvas()) still get a placeholder tile,
since there's nothing meaningfully visual to screenshot for those in this
standalone context.
"""
import os
import re
import json
import base64
import socket
import random
import threading
import http.server
import functools
from playwright.sync_api import sync_playwright
from PIL import Image
import io
REPO_ROOT = "/home/pep/Projects/processing-cpp.github.io"
BASE_JS = os.path.join(REPO_ROOT, "assets/examples_js/Basics")
OUT_DIR = os.path.join(REPO_ROOT, "assets/examples_thumbs")
ASSETS_DIR = os.path.join(REPO_ROOT, "assets")
THUMB_SIZE = 220 # px -- max dimension of the final thumbnail's longest side; aspect ratio is preserved, no padding/cropping
MAX_RENDER_DIM = 800 # cap on native width/height actually rendered, for speed
RENDER_DELAY_MS = 1200 # base settle time (ms); used for both the initial wait after setup() and the final wait before capture -- doubled from before so sketches have noticeably more time to animate/react before the screenshot is taken
PAGE_TIMEOUT_MS = 8000 # safety cutoff per sketch in case something hangs
# Sketches that load actual *data* (not images) -- nothing meaningfully
# visual to screenshot for these in a standalone context, so they get a
# placeholder tile instead of being attempted. Note: fetch() alone is NOT
# included here -- several real examples (e.g. Shape_Vertices) use fetch()
# to load SVG path data while still drawing normally to a real canvas, so
# excluding on fetch() alone would wrongly skip a renderable sketch. Only
# noCanvas() reliably means there's no canvas to capture at all, which is
# handled separately below via a page-level (not canvas) screenshot.
_data_loading_re = re.compile(
r"loadStrings|loadJSON|loadTable|requestImage|loadXML"
)
_no_canvas_re = re.compile(r"\bnoCanvas\s*\(")
# loadImage/loadFont/loadModel ARE attempted (not skipped) -- their asset
# paths just need rewriting to a real reachable URL first, same general
# idea as fix_asset_paths() in regen.py, except this points at a LOCAL
# server (see start_local_asset_server below) reading straight from disk,
# not the live deployed site. Using the live URL would mean thumbnails
# silently 404 for any asset that's been added/changed locally but not
# yet pushed -- exactly what happened with rocket.obj. Reading from disk
# means thumbnails always reflect what's actually on disk right now.
_asset_load_re = re.compile(r"""load(Image|Font|Model)\s*\(\s*["']([^"']+)["']\s*((?:,[^)]*)?)\)""")
def fix_asset_paths(js_code, asset_base_url):
"""Rewrite relative loadImage/loadFont/loadModel filenames to a real
reachable asset URL, so sketches that load their own images actually
have something to load when rendered standalone (outside the real
example page, which normally handles this rewrite itself)."""
def replacer(m):
return f'load{m.group(1)}("{asset_base_url}{m.group(2)}"{m.group(3)})'
return _asset_load_re.sub(replacer, js_code)
class _CORSRequestHandler(http.server.SimpleHTTPRequestHandler):
"""SimpleHTTPRequestHandler, but with Access-Control-Allow-Origin: *
on every response. Plain SimpleHTTPRequestHandler sends no CORS
headers at all, which works fine for plain <img>/<script> tag loads
but breaks anything using fetch() or an Image with crossOrigin set --
both of which several real example sketches do (e.g. SVG-loading
shape examples use fetch() directly, and p5's own loadImage() sets
crossOrigin='anonymous', which then requires a real CORS header even
though the request would otherwise succeed). The live, deployed site
works for these today because GitHub Pages sends
Access-Control-Allow-Origin: * for all public content by default;
this matches that behavior locally."""
def end_headers(self):
self.send_header("Access-Control-Allow-Origin", "*")
super().end_headers()
def log_message(self, format, *args):
pass # quiet -- the per-request access log lines aren't useful here
def start_local_asset_server(directory, preferred_port=8765):
"""
Starts a small local HTTP server serving `directory` (the repo's
assets/ folder) in a background thread, on the first free port found
starting at preferred_port. Returns (base_url, httpd) -- base_url is
e.g. "http://127.0.0.1:8765/data/", already including the trailing
"data/" so it can be used as a drop-in ASSET_BASE_URL. Call
httpd.shutdown() when done.
Sends Access-Control-Allow-Origin: * on every response (see
_CORSRequestHandler), matching how the real deployed site behaves
(GitHub Pages sends the same header for all public content) -- without
this, fetch()-based loads and p5's own crossOrigin-tagged image loads
get blocked by the browser even though the file itself loads fine.
Using a real http:// origin (rather than page.set_content()'s null
origin) also means no opaque-origin edge cases for canvas tainting,
on top of the main benefit of reading straight from disk.
"""
handler = functools.partial(_CORSRequestHandler, directory=directory)
port = preferred_port
httpd = None
for _ in range(20):
try:
httpd = http.server.ThreadingHTTPServer(("127.0.0.1", port), handler)
break
except OSError:
port += 1
if httpd is None:
raise RuntimeError("Could not find a free port for the local asset server.")
thread = threading.Thread(target=httpd.serve_forever, daemon=True)
thread.start()
base_url = f"http://127.0.0.1:{port}/data/"
return base_url, httpd
# Catches files that are actually still Processing/Java (.pde) source sitting
# in the examples_js tree by mistake -- e.g. "int xvals[640];" or
# "void setup() {" -- rather than real JavaScript. Attempting to run these
# as JS produces a confusing "Unexpected identifier" page error; this lets
# us flag it clearly as a content/data bug instead.
_non_js_source_re = re.compile(
r"^\s*(int|float|boolean|void|String|char)\s+\w+(\[\d*\])?\s*(=|;|\()"
r"|^\s*void\s+(setup|draw)\s*\(\s*\)\s*\{",
re.M,
)
_canvas_size_detect_re = re.compile(r"(?:createCanvas|size)\s*\(\s*(\d+)\s*,\s*(\d+)")
def detect_canvas_size(js_code):
"""Read the sketch's own requested width/height (without rewriting
it) so it can be rendered at its real native size and aspect ratio,
instead of being forced into a square that distorts framing and
crops content out. Falls back to 640x360 (the common Processing
default) if no createCanvas/size call is found."""
m = _canvas_size_detect_re.search(js_code)
if m:
w, h = int(m.group(1)), int(m.group(2))
# Cap render size for speed/sanity; the letterbox step downscales
# afterward regardless, so a huge native canvas doesn't need to
# be rendered at full resolution to produce a good thumbnail.
scale = min(1.0, MAX_RENDER_DIM / max(w, h))
return max(1, int(w * scale)), max(1, int(h * scale))
return 640, 360
def scale_down(png_bytes, max_dim):
"""Scales the given PNG image down (preserving its real aspect ratio)
so its longest side is at most max_dim, with NO padding/letterboxing
added. The gallery card itself now matches each sketch's native
aspect ratio (see the .gallery-thumb CSS in regen.py), so there's no
square slot to pad into anymore -- a 640x360 sketch gets a roughly
16:9 thumbnail and card, not a square one with black bars top/bottom."""
img = Image.open(io.BytesIO(png_bytes)).convert("RGBA")
img.thumbnail((max_dim, max_dim), Image.LANCZOS)
return img
def strip_comment(code):
code = code.strip()
if code.startswith("/**"):
end = code.find("*/")
if end != -1:
code = code[end + 2:].strip()
return code
def discover_examples():
"""Walk BASE_JS and return a list of {category, slug, name, js_path}."""
examples = []
if not os.path.isdir(BASE_JS):
print(f"ERROR: {BASE_JS} not found.")
return examples
for cat in sorted(os.listdir(BASE_JS)):
cat_path = os.path.join(BASE_JS, cat)
if not os.path.isdir(cat_path):
continue
for example in sorted(os.listdir(cat_path)):
ex_path = os.path.join(cat_path, example)
if not os.path.isdir(ex_path):
continue
js_file = os.path.join(ex_path, example + ".js")
if not os.path.exists(js_file):
continue
examples.append({
"category": cat,
"slug": example.replace("_", "-").lower(),
"name": example.replace("_", " "),
"js_path": js_file,
})
return examples
def make_placeholder_svg(label, size):
"""A plain, neutral placeholder used for sketches we deliberately skip
(external-asset-dependent ones), so the grid still has a tile instead
of a broken image, but it's visually distinct from a real thumbnail."""
safe_label = (label[:18] + "…") if len(label) > 18 else label
return (
f'<svg xmlns="http://www.w3.org/2000/svg" width="{size}" height="{size}">'
f'<rect width="100%" height="100%" fill="#1a1a1a"/>'
f'<text x="50%" y="50%" fill="#666" font-family="monospace" font-size="13" '
f'text-anchor="middle" dominant-baseline="middle">{safe_label}</text>'
f"</svg>"
)
def render_thumbnail(browser, js_code, out_path, asset_base_url, debug_label="", is_no_canvas=False):
"""Renders one sketch in a brand-new, isolated page (never reused
across sketches -- p5 in global mode attaches setup/draw/state onto
window, and reusing a page via set_content() can leak the previous
sketch's globals into the next one, which is why earlier runs saw
every sketch after the first one fail to produce a canvas).
Renders at the sketch's own native canvas size/aspect ratio (see
detect_canvas_size), then letterboxes the result down to a
THUMB_SIZE x THUMB_SIZE square afterward, so nothing gets cropped or
squished regardless of the sketch's original proportions.
Also simulates some mouse movement and a couple of key presses partway
through the render delay, so sketches that react to mouseX/mouseY,
mousePressed, or keyPressed look more like what a person would
actually see, rather than a frozen idle/default frame.
is_no_canvas: set for sketches that call noCanvas() and render
directly into the page (e.g. injecting an <svg> into document.body)
instead of drawing to a <canvas>. These are captured via a normal
page screenshot of the viewport instead of canvas.toDataURL(), since
there's no canvas element to read pixels from.
"""
render_w, render_h = detect_canvas_size(js_code)
fixed_js = fix_asset_paths(js_code, asset_base_url)
safe = fixed_js.replace("</script>", "<\\/script>").replace("`", "\\`")
html = f"""<!DOCTYPE html>
<html><head><style>*{{margin:0;padding:0;}}body{{overflow:hidden;background:#000;}}</style>
<script src="https://cdnjs.cloudflare.com/ajax/libs/p5.js/1.9.0/p5.min.js"></script>
</head><body><script>{safe}</script></body></html>"""
page = browser.new_page(viewport={"width": render_w, "height": render_h})
console_errors = []
page.on("console", lambda msg: console_errors.append(msg.text) if msg.type == "error" else None)
page.on("pageerror", lambda exc: console_errors.append(f"pageerror: {exc}"))
try:
page.set_content(html, timeout=PAGE_TIMEOUT_MS)
# Let the sketch initialize and draw its first few frames before
# any simulated input, so setup() has definitely finished.
page.wait_for_timeout(RENDER_DELAY_MS // 3)
# Simulate a longer, smoother mouse wander across the canvas --
# more waypoints and more total time than a quick few-step pass,
# so animated/easing/trailing sketches have a chance to actually
# build up motion before the capture, instead of looking like
# they just started. Ends near the center (not exactly on it --
# offset randomly each run) so repeated runs don't all freeze on
# the literal same pixel, while still keeping the subject roughly
# in the middle of the frame.
center_x, center_y = render_w / 2, render_h / 2
rest_x = center_x + random.uniform(-render_w * 0.12, render_w * 0.12)
rest_y = center_y + random.uniform(-render_h * 0.12, render_h * 0.12)
try:
waypoints = [
(render_w * 0.15, render_h * 0.85),
(render_w * 0.35, render_h * 0.25),
(render_w * 0.65, render_h * 0.75),
(render_w * 0.85, render_h * 0.35),
(render_w * 0.30, render_h * 0.60),
(render_w * 0.70, render_h * 0.20),
(render_w * 0.45, render_h * 0.50),
(rest_x, rest_y),
]
for x, y in waypoints:
page.mouse.move(x, y, steps=10)
page.wait_for_timeout(120)
# A press-and-release near the resting point, for
# mousePressed()/mouseReleased()-driven sketches.
page.mouse.down()
page.wait_for_timeout(90)
page.mouse.up()
page.wait_for_timeout(120)
except Exception:
pass # input simulation is best-effort; never let it abort the render
# A broader, repeated set of key presses for keyPressed()-driven
# sketches. Arrow keys and space are common in the Basics
# examples for movement/selection/toggling; repeating the pass
# (rather than one press of each) gives accumulating-effect
# sketches -- e.g. something that nudges position per keypress --
# a chance to visibly move, not just register a single tap.
try:
key_sequence = [
"ArrowRight", "ArrowRight", "ArrowUp",
"ArrowDown", "ArrowLeft", "Space",
"ArrowRight", "Space",
]
for key in key_sequence:
page.keyboard.press(key)
page.wait_for_timeout(90)
except Exception:
pass
# More mouse movement after the keyboard pass, ending at the same
# near-center resting point, so a sketch that reacts to BOTH
# input types still settles somewhere reasonable for the
# screenshot rather than wherever the keyboard pass left things
# mouse-wise (keys don't move the cursor, so this is mostly
# about giving a bit more total motion/time before capture).
try:
page.mouse.move(rest_x - render_w * 0.05, rest_y + render_h * 0.05, steps=8)
page.wait_for_timeout(150)
page.mouse.move(rest_x, rest_y, steps=8)
except Exception:
pass
page.wait_for_timeout(RENDER_DELAY_MS)
if is_no_canvas:
# No <canvas> element exists -- the sketch draws straight into
# the page (e.g. injecting an <svg>). Capture the viewport
# itself via a normal page screenshot instead.
try:
png_bytes = page.screenshot(timeout=PAGE_TIMEOUT_MS)
except Exception as e:
print(f" [{debug_label}] page screenshot failed: {e}")
if console_errors:
print(f" [{debug_label}] console errors: {console_errors[:3]}")
return False
thumb = scale_down(png_bytes, THUMB_SIZE)
thumb.save(out_path)
return True
canvas = page.query_selector("canvas")
if canvas is None:
if console_errors:
print(f" [{debug_label}] no canvas; console errors: {console_errors[:3]}")
return False
# Capture via canvas.toDataURL() inside the page rather than
# Playwright's element.screenshot(). The latter first waits for
# the element to be considered "visible" (stable layout, nonzero
# opacity, in the viewport, etc.) before it will take the shot --
# a check that WebGL canvases (createCanvas(..., WEBGL)) can fail
# or take a very long time to satisfy in headless mode, which is
# what caused the 30s timeout / "element is not visible" retries.
# Reading pixels directly via toDataURL has no such visibility
# precondition and works the same way for both 2D and WebGL
# canvases.
try:
data_url = page.evaluate(
"() => document.querySelector('canvas').toDataURL('image/png')"
)
except Exception as e:
print(f" [{debug_label}] toDataURL capture failed: {e}")
return False
if not data_url or not data_url.startswith("data:image/png;base64,"):
print(f" [{debug_label}] toDataURL returned unexpected content (possibly a tainted "
f"canvas from cross-origin content).")
return False
png_bytes = base64.b64decode(data_url.split(",", 1)[1])
thumb = scale_down(png_bytes, THUMB_SIZE)
thumb.save(out_path)
return True
finally:
page.close()
def main():
examples = discover_examples()
print(f"Found {len(examples)} example sketches.")
os.makedirs(OUT_DIR, exist_ok=True)
if not os.path.isdir(ASSETS_DIR):
print(f"ERROR: {ASSETS_DIR} not found -- can't serve local assets.")
return
asset_base_url, asset_httpd = start_local_asset_server(ASSETS_DIR)
print(f"Serving {ASSETS_DIR} locally at {asset_base_url} "
f"(reads straight from disk -- no dependency on what's been pushed live).")
manifest = []
rendered = 0
skipped = 0
failed = []
try:
with sync_playwright() as p:
browser = p.chromium.launch()
for ex in examples:
cat_out_dir = os.path.join(OUT_DIR, ex["category"])
os.makedirs(cat_out_dir, exist_ok=True)
out_path = os.path.join(cat_out_dir, ex["slug"] + ".png")
with open(ex["js_path"], errors="replace") as f:
js_code = strip_comment(f.read())
if _non_js_source_re.search(js_code):
# This .js file looks like it's actually still Processing/Java
# (.pde) source -- e.g. "int xvals[640];" -- not real
# JavaScript. This is a content bug in examples_js (the .js
# translation likely was never generated for this example),
# not something to paper over silently.
print(f" WARNING: {ex['category']}/{ex['slug']} -- {ex['js_path']} looks like "
f"unconverted Processing/Java source, not JavaScript. Skipping with a "
f"placeholder; fix the .js file at the source to get a real thumbnail.")
svg = make_placeholder_svg(ex["name"], THUMB_SIZE)
with open(out_path.replace(".png", ".svg"), "w") as f:
f.write(svg)
manifest.append({**ex, "thumb": ex["slug"] + ".svg", "placeholder": True, "reason": "non_js_source"})
skipped += 1
continue
if _data_loading_re.search(js_code):
# Don't attempt to render -- write a placeholder so the
# grid tile exists, but flag it clearly in the manifest.
svg = make_placeholder_svg(ex["name"], THUMB_SIZE)
with open(out_path.replace(".png", ".svg"), "w") as f:
f.write(svg)
manifest.append({**ex, "thumb": ex["slug"] + ".svg", "placeholder": True})
skipped += 1
continue
is_no_canvas = bool(_no_canvas_re.search(js_code))
try:
ok = render_thumbnail(
browser, js_code, out_path, asset_base_url,
debug_label=f"{ex['category']}/{ex['slug']}",
is_no_canvas=is_no_canvas,
)
if ok:
manifest.append({**ex, "thumb": ex["slug"] + ".png", "placeholder": False})
rendered += 1
print(f" rendered: {ex['category']}/{ex['slug']}" + (" (page capture, no canvas)" if is_no_canvas else ""))
else:
failed.append(ex["slug"])
reason = "page screenshot failed" if is_no_canvas else "no canvas found"
print(f" FAILED ({reason}): {ex['category']}/{ex['slug']}")
except Exception as e:
failed.append(ex["slug"])
print(f" FAILED ({e}): {ex['category']}/{ex['slug']}")
browser.close()
finally:
asset_httpd.shutdown()
manifest_path = os.path.join(OUT_DIR, "manifest.json")
with open(manifest_path, "w") as f:
json.dump(manifest, f, indent=2)
print()
print(f"Rendered: {rendered}")
print(f"Skipped (external-asset placeholder): {skipped}")
print(f"Failed: {len(failed)}")
for slug in failed:
print(f" - {slug}")
print(f"Manifest written to {manifest_path}")
print("Run regen.py next to rebuild examples/index.html using these thumbnails.")
if __name__ == "__main__":
main()