code icon Code

Generate Image with Reference Images

Generate images using 1-14 reference images for character consistency. More references = better consistency across views.

Source Code

import fs from "fs";
import path from "path";

const ENDPOINT =
  "https://generativelanguage.googleapis.com/v1beta/models/gemini-3-pro-image-preview:generateContent";

// Support env var fallback for prompts with special characters
let [prompt, imagePaths, outputDir] = process.argv.slice(2);
prompt = process.env.IMAGE_PROMPT || prompt;

if (!prompt) {
  console.error("ERROR: Missing required argument 'prompt'.");
  console.error("The prompt describes what image to generate from the reference photos.");
  process.exit(1);
}

if (!imagePaths) {
  console.error("ERROR: Missing required argument 'imagePaths'.");
  console.error("Provide comma-separated absolute paths to 1-14 reference images.");
  process.exit(1);
}

if (!outputDir) {
  console.error("ERROR: Missing required argument 'outputDir'.");
  console.error("Specify the directory where the generated image should be saved.");
  process.exit(1);
}

// Parse image paths
const paths = imagePaths.split(",").map((p) => p.trim()).filter((p) => p);

if (paths.length < 1) {
  console.error("ERROR: No reference images provided. At least 1 image path required.");
  console.error("Usage: Pass comma-separated absolute paths to reference images.");
  process.exit(1);
}

if (paths.length > 14) {
  console.error(`ERROR: Too many reference images (${paths.length}). Maximum 14 supported.`);
  console.error("Recommendation: Use 3-5 images showing different angles for best results.");
  process.exit(1);
}

// Get MIME type from file extension
function getMimeType(filePath) {
  const ext = path.extname(filePath).toLowerCase();
  switch (ext) {
    case ".png":
      return "image/png";
    case ".webp":
      return "image/webp";
    case ".gif":
      return "image/gif";
    case ".jpg":
    case ".jpeg":
      return "image/jpeg";
    default:
      return null;
  }
}

// Exponential backoff retry logic
async function fetchWithRetry(url, options, maxRetries = 5) {
  for (let attempt = 0; attempt < maxRetries; attempt++) {
    try {
      const response = await fetch(url, options);
      const text = await response.text();

      if (response.ok) {
        return { response, text };
      }

      // Rate limit (429) or server error (500+) - retry with backoff
      if (response.status === 429 || response.status >= 500) {
        if (attempt < maxRetries - 1) {
          const baseDelay = Math.pow(2, attempt) * 1000;
          const jitter = Math.random() * 1000;
          const delay = baseDelay + jitter;

          console.log(
            `Rate limit hit (attempt ${attempt + 1}/${maxRetries}). Retrying in ${Math.round(delay / 1000)}s...`
          );
          await new Promise((resolve) => setTimeout(resolve, delay));
          continue;
        }
      }

      return { response, text };
    } catch (err) {
      if (attempt < maxRetries - 1) {
        const baseDelay = Math.pow(2, attempt) * 1000;
        const jitter = Math.random() * 1000;
        const delay = baseDelay + jitter;

        console.log(
          `Network error (attempt ${attempt + 1}/${maxRetries}). Retrying in ${Math.round(delay / 1000)}s...`
        );
        await new Promise((resolve) => setTimeout(resolve, delay));
        continue;
      }
      throw err;
    }
  }
}

async function main() {
  console.log(`Generating image with ${paths.length} reference images...`);

  // Build parts array with all reference images
  const parts = [];

  // Add each reference image
  for (const imagePath of paths) {
    // Check file exists
    if (!fs.existsSync(imagePath)) {
      console.error(`ERROR: Reference image not found.`);
      console.error(`  Path: ${imagePath}`);
      console.error(`  Check that the file exists and the path is correct.`);
      process.exit(1);
    }

    // Check supported format
    const mimeType = getMimeType(imagePath);
    if (!mimeType) {
      const ext = path.extname(imagePath);
      console.error(`ERROR: Unsupported image format '${ext}'.`);
      console.error(`  File: ${imagePath}`);
      console.error(`  Supported formats: PNG, JPEG/JPG, WebP, GIF`);
      process.exit(1);
    }

    // Read and encode image
    try {
      const imageBuffer = fs.readFileSync(imagePath);
      const base64Image = imageBuffer.toString("base64");

      parts.push({
        inline_data: {
          mime_type: mimeType,
          data: base64Image,
        },
      });

      console.log(`  ✓ Loaded: ${path.basename(imagePath)} (${mimeType})`);
    } catch (err) {
      console.error(`ERROR: Failed to read image file.`);
      console.error(`  File: ${imagePath}`);
      console.error(`  Reason: ${err.message}`);
      process.exit(1);
    }
  }

  // Add the text prompt last
  parts.push({ text: prompt });

  const body = {
    contents: [{ parts }],
    generationConfig: {
      responseModalities: ["TEXT", "IMAGE"],
    },
  };

  console.log(`Calling Gemini API with ${paths.length} reference images...`);

  const { response, text } = await fetchWithRetry(
    ENDPOINT,
    {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
        "x-goog-api-key": "PLACEHOLDER_TOKEN",
      },
      body: JSON.stringify(body),
    },
    3 // Max 3 retries for multi-ref (larger payload)
  );

  if (!response.ok) {
    console.error(`ERROR: Gemini API request failed.`);
    console.error(`  Status: ${response.status}`);
    try {
      const errorData = JSON.parse(text);
      console.error(`  Message: ${errorData.error?.message || text}`);
    } catch {
      console.error(`  Response: ${text.slice(0, 500)}`);
    }
    process.exit(1);
  }

  const data = JSON.parse(text);

  // Extract image from response
  const responseParts = data?.candidates?.[0]?.content?.parts || [];
  const imagePart = responseParts.find(
    (x) => (x.inlineData && x.inlineData.data) || (x.inline_data && x.inline_data.data)
  );

  if (!imagePart) {
    console.error("ERROR: No image returned from Gemini API.");
    console.error("  The API responded but did not include generated image data.");
    if (data?.candidates?.[0]?.finishReason) {
      console.error(`  Finish reason: ${data.candidates[0].finishReason}`);
    }
    console.error("  Try simplifying the prompt or using different reference images.");
    process.exit(1);
  }

  const inline = imagePart.inlineData || imagePart.inline_data;

  // Save the generated image
  fs.mkdirSync(outputDir, { recursive: true });

  const timestamp = Date.now();
  const slug = prompt
    .toLowerCase()
    .replace(/[^a-z0-9]+/g, "-")
    .replace(/^-|-$/g, "")
    .slice(0, 40);
  const outputName = slug ? `${slug}-${timestamp}` : `multiref-${timestamp}`;
  const outputPath = path.join(outputDir, `${outputName}.png`);

  fs.writeFileSync(outputPath, Buffer.from(inline.data, "base64"));

  // Auto-update media index
  const indexPath = path.join(path.dirname(outputDir), "media-index.yaml");
  const promptSummary = prompt.slice(0, 100).replace(/"/g, "'").replace(/\n/g, " ");
  const relativePath = path.relative(path.dirname(outputDir), outputPath);
  const entry = `- path: ${relativePath}\n  type: image\n  prompt_summary: "${promptSummary}"\n  reference_count: ${paths.length}\n  created_at: ${new Date().toISOString()}\n`;
  try {
    fs.appendFileSync(indexPath, entry);
    console.log(`✓ Updated media index: ${indexPath}`);
  } catch (err) {
    // Index update is optional
  }

  console.log(`✓ Saved: ${outputPath}`);
  console.log(
    JSON.stringify({
      success: true,
      path: outputPath,
      prompt: prompt,
      referenceCount: paths.length,
    })
  );
}

main().catch((err) => {
  console.error("ERROR: Unexpected failure during image generation.");
  console.error(`  ${err.message}`);
  if (err.stack) {
    console.error(`  Stack: ${err.stack.split('\n').slice(1, 3).join('\n  ')}`);
  }
  process.exit(1);
});