Initial Commit

This commit is contained in:
2026-05-14 10:23:47 +02:00
commit a655feff9e
8 changed files with 4357 additions and 0 deletions

View File

@@ -0,0 +1,262 @@
import os
import sys
import requests
import time
import json
import re
import subprocess
from langchain_community.llms import Ollama
SD_WEBUI_PATH = "/Users/giers/Tools/stable-diffusion-webui"
def is_sd_webui_running():
try:
r = requests.get("http://127.0.0.1:7860/sdapi/v1/txt2img", timeout=3)
# Should error because POST is required, but if it responds, it's running
return True
except Exception:
return False
def start_sd_webui_headless(webui_dir):
# Use --headless, disable extensions/UIs for fastest startup
args = [
"python3", "launch.py",
"--nowebui", # Don't launch browser UI
"--headless", # No local UI window
"--api", # Enable API
"--skip-torch-cuda-test",
"--no-hashing", # Faster startup
"--disable-nan-check", # Optional: faster
"--xformers"
]
proc = subprocess.Popen(
args,
cwd=webui_dir,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
return proc
def get_output_dir():
out_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "output")
os.makedirs(out_dir, exist_ok=True)
return out_dir
def get_next_available_filename(base, ext=".png"):
out_dir = get_output_dir()
i = 1
while True:
fname = f"{base}-{i}{ext}"
fpath = os.path.join(out_dir, fname)
if not os.path.exists(fpath):
return fpath
i += 1
def flush_print(*args, **kwargs):
print(*args, **kwargs)
sys.stdout.flush()
OLLAMA_URL = "http://localhost:11434/api/generate"
#MODEL = "mistral-small3.1:24b"
MODEL = "mistral:latest"
SD_URL = "http://127.0.0.1:7860/sdapi/v1/txt2img"
META_PROMPT = """
You are an expert at writing concise, detailed Stable Diffusion prompts for 3D rendered objects as seen in professional game development. When I give you an object name, carefully follow these instructions step by step:
1. Subject ({{OBJECT}}):
- Start with the object name ({{OBJECT}}).
- Add 23 specific visual or material details (for example: shape, surface texture, design features like “chrome plating,” “organic armor,” “glowing eyes”).
2. Medium & Style:
- Add keywords for a 3D render and concept art look:
concept art, 3D render, game asset, professional game designer, digital sculpture, hyperrealistic, octane render, Unreal Engine 5, high poly.
3. Presentation:
- Explicitly state the object is shown fully in frame, not cropped, and completely visible from a neutral angle.
- No background: isolated on pure white background, or transparent background.
4. Lighting & Quality:
- Use studio lighting, no dramatic shadows, no depth of field, no blur.
- Emphasize sharp focus, ultra detailed, 8K, clean silhouette.
5. Artist Influence (optional):
- If fitting, add a well-known concept artist (example: by Beeple).
6. Negative Prompt:
- Add: Negative prompt: blurry, lowres, bad anatomy, distorted proportions, background, scenery, environment, artifacting, watermark, text, cropped, partial view, depth of field, out of frame, blur, vignette
Output Format:
- Line 1: All positive prompt keywords (subject, details, medium, style, lighting, quality, artist, presentation, background).
- Line 2: Start with Negative prompt: and then the negative keywords above.
Instructions to the LLM:
- The first line must always start with the object and its details.
- The object must be fully visible, not cropped, and entirely in the image frame.
- There must be no background, and the background must be pure white or transparent.
- Do NOT use blur, depth of field, vignette, or any visual effects that obscure details.
- Do NOT add any scenery or environment.
- Be concise and avoid repetition.
Now, generate a Stable Diffusion prompt for the object: {{OBJECT}}.
"""
SECOND_PROMPT_TEMPLATE = """
You are a prompt-to-JSON converter for image generation tasks.
Given a Stable Diffusion prompt, extract the positive and negative prompts.
Also, analyze the subject:
- If it is a single character, creature, full object, or person standing/upright, set "dimensions" to [512,768] (vertical).
- If it is a landscape, wide object, multi-character group, or scene, set "dimensions" to [768,512] (horizontal).
- If it is something square or best seen as a square (e.g., shield, logo, face, emblem, single centered item), set "dimensions" to [768,768] (square).
Return a JSON object with these fields:
{
"positive prompt": "...",
"negative prompt": "...",
"dimensions": [W,H]
}
Respond only with the JSON.
Here is the Stable Diffusion prompt:
{PROMPT}
"""
# --- streaming LLM utility ---
def stream_ollama(prompt):
payload = {"model": MODEL, "prompt": prompt, "stream": True}
with requests.post(OLLAMA_URL, json=payload, stream=True, timeout=300) as r:
r.raise_for_status()
full_response = ""
for line in r.iter_lines():
if line:
try:
chunk = json.loads(line.decode("utf-8"))["response"]
except Exception:
chunk = line.decode("utf-8")
print(chunk, end="", flush=True)
full_response += chunk
print()
return full_response.strip()
def get_json_from_llm(prompt):
ollama_chain = Ollama(model=MODEL, base_url="http://localhost:11434")
response_gen = ollama_chain.stream(prompt)
json_output = ""
for chunk in response_gen:
print(chunk, end="", flush=True)
json_output += chunk
print()
# Try to extract JSON from output, even if LLM includes code block markers
json_start = json_output.find("{")
json_end = json_output.rfind("}")
if json_start != -1 and json_end != -1:
json_str = json_output[json_start:json_end+1]
try:
data = json.loads(json_str)
return data
except Exception as e:
print("Error parsing JSON:", e)
return None
print("Failed to extract JSON!")
return None
def sanitize_filename(s):
# Replace spaces with underscores, remove non-alphanum chars
s = s.strip().lower().replace(" ", "_")
return re.sub(r'[^a-z0-9_]+', '', s)
def call_stable_diffusion(prompt_json, user_input):
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import torch
# *** Use your diffusers-converted path! ***
model_path = "/Volumes/SD/ML-Models/diffusers/dreamshaper_8_diffusers"
if torch.backends.mps.is_available():
device = "mps"
print("Using Apple Silicon MPS backend")
else:
device = "cpu"
print("Warning: Running on CPU (slow)")
print("Loading model, this may take a while the first time...", flush=True)
pipe = StableDiffusionPipeline.from_pretrained(
model_path,
torch_dtype=torch.float32,
safety_checker=None,
local_files_only=True,
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to(device)
pipe.set_progress_bar_config(disable=True)
pipe.enable_attention_slicing()
prompt = prompt_json["positive prompt"][:75]
negative_prompt = prompt_json["negative prompt"][:75]
width, height = prompt_json["dimensions"]
num_images = 4
steps = prompt_json.get("steps", 30)
guidance_scale = prompt_json.get("cfg_scale", 6.5)
print(f"Generating {num_images} image(s)...", flush=True)
images = pipe(
prompt=[prompt]*num_images,
negative_prompt=[negative_prompt]*num_images,
width=width,
height=height,
num_inference_steps=steps,
guidance_scale=guidance_scale,
).images
img_base = sanitize_filename(user_input)
for idx, img in enumerate(images, start=1):
filename = get_next_available_filename(f"{img_base}-{idx}")
img.save(filename)
print(f"Saved: {filename}")
def main():
try:
if len(sys.argv) < 2:
print("Usage: python 3d-model-image-prompt-generator.py \"object name\"", flush=True)
sys.exit(1)
object_name = sys.argv[1]
print(f"\n--- Generating Stable Diffusion prompt for: {object_name} ---\n", flush=True)
prompt = META_PROMPT.replace("{{OBJECT}}", object_name)
# 1. SD Prompt Generation (streamed)
sd_prompt = stream_ollama(prompt)
print("\n--- End of Stable Diffusion prompt ---\n", flush=True)
# 2. JSON Conversion (streamed)
print("--- Generating JSON for image generation ---\n", flush=True)
second_prompt = SECOND_PROMPT_TEMPLATE.replace("{PROMPT}", sd_prompt)
prompt_json = get_json_from_llm(second_prompt)
if not prompt_json:
print("Failed to get valid JSON. Exiting.", flush=True)
sys.exit(1)
# 3. Augment JSON with hard-coded SD settings
prompt_json["checkpoint"] = "RealismPlus/dreamshaper_8.safetensors"
prompt_json["vae"] = "Automatic"
prompt_json["sampler"] = "DPM++ 2M Karras"
prompt_json["steps"] = 30
prompt_json["cfg_scale"] = 6.5
print("\n--- Final prompt JSON ---\n", flush=True)
print(json.dumps(prompt_json, indent=2, ensure_ascii=False), flush=True)
# 4. Call local Stable Diffusion via diffusers
call_stable_diffusion(prompt_json, object_name)
except Exception as e:
import traceback
print("Exception in main():", e, flush=True)
traceback.print_exc()
if __name__ == "__main__":
main()

142
generate_equirect.py Normal file
View File

@@ -0,0 +1,142 @@
#!/usr/bin/env python3
import argparse
import os
import subprocess
import torch
from PIL import Image, ImageDraw
import shutil
import tempfile
from diffusers import (
StableDiffusionPipeline,
DPMSolverMultistepScheduler,
StableDiffusionInpaintPipeline
)
def shift_image(img: Image.Image, shift: int) -> Image.Image:
w, h = img.size
out = Image.new("RGB", (w, h))
out.paste(img.crop((shift, 0, w, h)), (0, 0))
out.paste(img.crop((0, 0, shift, h)), (w - shift, 0))
return out
def create_mask(width: int, height: int, mask_w: int) -> Image.Image:
mask = Image.new("L", (width, height), 0)
draw = ImageDraw.Draw(mask)
left = (width - mask_w) // 2
draw.rectangle([left, 0, left + mask_w, height], fill=255)
return mask
def unshift_image(img: Image.Image, shift: int) -> Image.Image:
w, h = img.size
out = Image.new("RGB", (w, h))
out.paste(img.crop((w - shift, 0, w, h)), (0, 0))
out.paste(img.crop((0, 0, w - shift, h)), (shift, 0))
return out
def main():
parser = argparse.ArgumentParser(
description="Generate an equirectangular HDRI, make it seamless, and upscale it with Topaz Photo AI CLI."
)
parser.add_argument("--prompt", required=True,
help="Text prompt for generation and inpainting")
parser.add_argument("--output", required=True,
help="Filename for the final upscaled image (e.g. seamless.png)")
parser.add_argument("--work-dir", default=os.path.dirname(os.path.abspath(__file__)),
help="Working directory for intermediates and final outputs")
args = parser.parse_args()
# Output-Ordner (bleibt wie gehabt)
output_abs = os.path.abspath(args.output)
# Zwischenschritte landen im eigenem temp-Ordner:
with tempfile.TemporaryDirectory(dir=args.work_dir) as tempdir:
print(f"→ Using tempdir: {tempdir}")
model_path = "/Volumes/SD/ML-Models/diffusers/hdri-panorama-v1-diffusers"
topaz_cli = "/Applications/Topaz Photo AI.app/Contents/MacOS/Topaz Photo AI"
steps = 20
scale = 7.0
width, height = 1024, 512
if torch.backends.mps.is_available():
device = "mps"
elif torch.cuda.is_available():
device = "cuda"
else:
device = "cpu"
# 1) Generate base HDRI
gen_pipe = StableDiffusionPipeline.from_pretrained(
model_path,
torch_dtype=torch.float32
).to(device)
gen_pipe.scheduler = DPMSolverMultistepScheduler.from_config(gen_pipe.scheduler.config)
gen_pipe.enable_attention_slicing()
print("→ Generating equirectangular HDRI…")
image = gen_pipe(
prompt=args.prompt,
num_inference_steps=steps,
guidance_scale=scale-1.5,
width=width,
height=height
).images[0]
gen_path = os.path.join(tempdir, f"base_{width}x{height}.png")
image.save(gen_path)
print(f"→ Saved initial image to {gen_path}")
# 2) Make it seamless
shift_amt = width // 2
mask_w = width // 8
shifted = shift_image(image, shift_amt)
mask = create_mask(width, height, mask_w)
inpaint_pipe = StableDiffusionInpaintPipeline.from_pretrained(
"Lykon/dreamshaper-8-inpainting",
torch_dtype=torch.float32
).to(device)
inpaint_pipe.enable_attention_slicing()
print("→ Inpainting seam for seamless tiling…")
inpainted = inpaint_pipe(
prompt=args.prompt,
image=shifted,
mask_image=mask,
num_inference_steps=steps,
guidance_scale=scale,
width=width,
height=height
).images[0]
seamless_path = os.path.join(tempdir, os.path.basename(args.output))
inpainted = unshift_image(inpainted, shift_amt)
inpainted.save(seamless_path)
print(f"→ Crafted seamless image: {seamless_path}")
# 3) Upscale with Topaz Photo AI CLI
print("→ Upscaling with Topaz Photo AI CLI…")
result = subprocess.run(
[topaz_cli, "--cli", seamless_path, "-o", tempdir],
check=True
)
# Finde das letzte erstellte PNG im tempdir (das ist das hochskalierte!)
# Topaz kann einen Suffix anhängen, falls der Name schon existiert.
upscaled_files = sorted(
[os.path.join(tempdir, f) for f in os.listdir(tempdir) if f.lower().endswith(".png")],
key=os.path.getmtime,
reverse=True
)
if not upscaled_files:
print("→ No PNG output found in tempdir after Topaz run!")
return
upscaled = upscaled_files[0]
shutil.move(upscaled, output_abs)
print(f"→ Upscaled image moved to {output_abs}")
if __name__ == "__main__":
main()

218
image_to_3d.py Normal file
View File

@@ -0,0 +1,218 @@
#!/usr/bin/env python3
import base64
import json
import os
import subprocess
import sys
import tempfile
import time
from pathlib import Path
from typing import Any
import replicate
import requests
from PIL import Image
MODEL_NAME = "tencent/hunyuan-3d-3.1"
TIMEOUT = 900
PREDICTION_TIMEOUT = 10 * 60
POLL_INTERVAL = 2.0
MAX_INPUT_BYTES = 6 * 1024 * 1024
MAX_DATA_URI_BYTES = 1024 * 1024
MAX_INPUT_SIDE = 2048
SUPPORTED_EXTENSIONS = {".jpg", ".jpeg", ".png", ".webp"}
def notify(title: str, message: str) -> None:
script = f"display notification {json.dumps(message)} with title {json.dumps(title)}"
try:
subprocess.run(["osascript", "-e", script], check=False)
except OSError:
pass
def _has_alpha(img: Image.Image) -> bool:
return img.mode in {"RGBA", "LA"} or (
img.mode == "P" and "transparency" in img.info
)
def _save_compact_image(img: Image.Image, output_path: str) -> None:
if max(img.size) > MAX_INPUT_SIDE:
img = img.copy()
img.thumbnail((MAX_INPUT_SIDE, MAX_INPUT_SIDE), Image.Resampling.LANCZOS)
if _has_alpha(img):
img.save(output_path, "WEBP", quality=95, method=6)
return
if img.mode != "RGB":
img = img.convert("RGB")
img.save(output_path, "JPEG", quality=92, optimize=True)
def prepare_input_image(src_path: str, temp_paths: list[str]) -> str:
ext = Path(src_path).suffix.lower()
if ext in SUPPORTED_EXTENSIONS and os.path.getsize(src_path) <= MAX_DATA_URI_BYTES:
return src_path
img = Image.open(src_path)
suffix = ".webp" if _has_alpha(img) else ".jpg"
fd, temp_path = tempfile.mkstemp(suffix=suffix)
os.close(fd)
_save_compact_image(img, temp_path)
temp_paths.append(temp_path)
if os.path.getsize(temp_path) > MAX_INPUT_BYTES:
raise RuntimeError("Prepared image is still larger than Replicate's 6MB limit.")
return temp_path
def run_replicate(image_path: str, api_token: str) -> Any:
client = replicate.Client(api_token=api_token)
client.poll_interval = POLL_INTERVAL
with open(image_path, "rb") as image_file:
prediction = client.models.predictions.create(
model=MODEL_NAME,
input={
"image": image_file,
"generate_type": "Normal",
"face_count": 500000,
"enable_pbr": False,
},
wait=False,
file_encoding_strategy="base64",
)
print(f"Replicate prediction started: {prediction.id} ({prediction.status})")
deadline = time.monotonic() + PREDICTION_TIMEOUT
last_status = prediction.status
while prediction.status not in {"succeeded", "failed", "canceled"}:
if time.monotonic() >= deadline:
raise TimeoutError(
f"Timed out waiting for Replicate prediction {prediction.id} "
f"after {PREDICTION_TIMEOUT // 60} minutes. It may still be running."
)
time.sleep(client.poll_interval)
prediction.reload()
if prediction.status != last_status:
print(f"Replicate prediction {prediction.id}: {prediction.status}")
last_status = prediction.status
if prediction.status != "succeeded":
detail = prediction.error or prediction.logs or f"status={prediction.status}"
raise RuntimeError(f"Replicate prediction {prediction.id} failed: {detail}")
return prediction.output
def _extract_output_file(output: Any) -> Any:
if isinstance(output, (list, tuple)):
if not output:
raise RuntimeError("Replicate returned an empty output.")
return output[0]
if isinstance(output, dict):
for key in ("output", "model", "mesh", "glb", "url"):
if output.get(key):
return _extract_output_file(output[key])
raise RuntimeError(f"Replicate returned an unsupported output shape: {output}")
return output
def _bytes_from_url(url: str) -> bytes:
if url.startswith("data:"):
_, encoded = url.split(",", 1)
return base64.b64decode(encoded)
response = requests.get(url, timeout=TIMEOUT)
response.raise_for_status()
return response.content
def write_output(output: Any, output_path: str) -> None:
output_file = _extract_output_file(output)
if hasattr(output_file, "read"):
data = output_file.read()
elif hasattr(output_file, "url"):
data = _bytes_from_url(str(output_file.url))
elif isinstance(output_file, str):
data = _bytes_from_url(output_file)
else:
raise RuntimeError(f"Replicate returned an unsupported output type: {type(output_file)!r}")
with open(output_path, "wb") as f:
f.write(data)
def process_image(img_path: str, api_token: str | None = None) -> str | None:
api_token = (api_token or os.environ.get("REPLICATE_API_TOKEN", "")).strip()
if not api_token:
msg = "Missing Replicate API token. Add it in app settings or set REPLICATE_API_TOKEN."
print(msg)
notify("3D conversion error", msg)
return None
img_path = os.path.abspath(img_path)
if not os.path.isfile(img_path):
msg = f"Image not found: {img_path}"
print(msg)
notify("3D conversion error", msg)
return None
temp_paths: list[str] = []
try:
input_path = prepare_input_image(img_path, temp_paths)
base_name = os.path.splitext(os.path.basename(img_path))[0]
output_path = os.path.join(os.path.dirname(img_path), f"{base_name}.glb")
print(f"Running {MODEL_NAME} on Replicate...")
output = run_replicate(input_path, api_token)
write_output(output, output_path)
msg = f"3D model saved: {output_path}"
print(msg)
notify("3D conversion complete", msg)
return output_path
except Exception as e:
msg = f"3D conversion failed for {img_path}: {e}"
print(msg)
notify("3D conversion error", msg)
return None
finally:
for temp_path in temp_paths:
try:
os.remove(temp_path)
except OSError:
pass
def main() -> None:
if len(sys.argv) < 2:
msg = "Usage: python image_to_3d.py <image1> [image2 ...]"
notify("3D conversion error", "No image files provided.")
print(msg)
sys.exit(1)
outputs = []
for img_path in sys.argv[1:]:
print(f"\nProcessing: {img_path}")
result = process_image(img_path)
if result:
outputs.append(result)
if not outputs:
sys.exit(1)
notify("3D conversion", f"Finished {len(outputs)} model(s).")
if __name__ == "__main__":
main()

1241
main.py Normal file

File diff suppressed because it is too large Load Diff

12
requirements.txt Normal file
View File

@@ -0,0 +1,12 @@
numpy<2
accelerate==0.31.0
diffusers==0.27.2
huggingface-hub==0.23.4
Pillow==12.0.0
pywebview==5.4
replicate==1.0.7
requests==2.32.5
safetensors==0.7.0
torch==2.9.1
transformers==4.41.2

30
run.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
export HF_HOME="${HF_HOME:-$PWD/.hf_cache}"
export HUGGINGFACE_HUB_CACHE="${HUGGINGFACE_HUB_CACHE:-$HF_HOME/hub}"
mkdir -p "$HUGGINGFACE_HUB_CACHE"
if [ -n "${PYTHON:-}" ]; then
python_bin="$PYTHON"
elif command -v python3.11 >/dev/null 2>&1; then
python_bin="python3.11"
elif command -v python3 >/dev/null 2>&1; then
python_bin="python3"
else
echo "Could not find python3.11 or python3. Install Python 3.11, then run this script again." >&2
exit 1
fi
if [ ! -d ".venv" ]; then
"$python_bin" -m venv .venv
fi
source .venv/bin/activate
python -m pip install --upgrade pip
python -m pip install -r requirements.txt
exec python main.py

77
scene_setup.py Normal file
View File

@@ -0,0 +1,77 @@
import bpy
import sys
import os
argv = sys.argv
argv = argv[argv.index("--") + 1:] if "--" in argv else []
glb_path = argv[0] if len(argv) > 0 else None
hdri_path = argv[1] if len(argv) > 1 else None
#bpy.ops.wm.read_factory_settings(use_empty=True)
if "Cube" in bpy.data.objects:
bpy.data.objects.remove(bpy.data.objects["Cube"], do_unlink=True)
# GLB importieren
if glb_path and os.path.isfile(glb_path):
bpy.ops.import_scene.gltf(filepath=glb_path)
else:
print("GLB file missing:", glb_path)
# HDRI oder Sonne
hdri_loaded = False
if hdri_path and os.path.isfile(hdri_path):
try:
world = bpy.data.worlds.new("World") if not bpy.data.worlds else bpy.data.worlds[0]
bpy.context.scene.world = world
world.use_nodes = True
ntree = world.node_tree
nodes = ntree.nodes
for node in nodes: nodes.remove(node)
node_bg = nodes.new(type='ShaderNodeBackground')
node_env = nodes.new(type='ShaderNodeTexEnvironment')
node_out = nodes.new(type='ShaderNodeOutputWorld')
node_env.image = bpy.data.images.load(hdri_path)
node_env.location = (-300, 0)
node_bg.location = (0, 0)
node_out.location = (300, 0)
ntree.links.new(node_env.outputs['Color'], node_bg.inputs['Color'])
ntree.links.new(node_bg.outputs['Background'], node_out.inputs['Surface'])
hdri_loaded = True
except Exception as e:
print("Failed to load HDRI:", e)
hdri_loaded = False
if not hdri_loaded:
# Schöne Sonnenlampe (leicht schräg von oben)
light_data = bpy.data.lights.new(name="Sun", type='SUN')
light_data.energy = 4.5
light = bpy.data.objects.new(name="Sun", object_data=light_data)
bpy.context.collection.objects.link(light)
light.location = (4, 10, 10)
light.rotation_euler = (0.8, 0.3, 0.1) # leicht schräg
# Optionale Fill-Light/Soft-Ambient
light_data2 = bpy.data.lights.new(name="Fill", type='SUN')
light_data2.energy = 1.1
light2 = bpy.data.objects.new(name="Fill", object_data=light_data2)
bpy.context.collection.objects.link(light2)
light2.location = (-8, -6, 4)
light2.rotation_euler = (1.4, -0.8, -0.2)
for window in bpy.context.window_manager.windows:
for area in window.screen.areas:
if area.type == 'VIEW_3D':
for space in area.spaces:
if space.type == 'VIEW_3D':
space.shading.type = 'RENDERED'
for obj in bpy.data.objects:
# Entparenten, falls Parent ein Empty namens "world" ist
if obj.parent and obj.parent.name == "world":
obj.parent = None
if "world" in bpy.data.objects and bpy.data.objects["world"].type == "EMPTY":
bpy.data.objects.remove(bpy.data.objects["world"], do_unlink=True)
if "Cube" in bpy.data.objects:
bpy.data.objects.remove(bpy.data.objects["Cube"], do_unlink=True)

2375
web/index.html Normal file

File diff suppressed because it is too large Load Diff