Extracting Images from PDF Documents with PDFium VCL in Delphi

PDF documents often contain valuable images—photographs, diagrams, charts, and graphics. The Extract Images demo shows how to extract all embedded images from a PDF document using PDFium VCL, saving them in optimal formats based on their characteristics.

Overview

This demo extracts embedded images (bitmaps) from PDF pages and saves them as separate image files. It includes image preview, format detection, and batch extraction with progress tracking.

Key Features

Extract All Images – Extract every embedded image from the PDF
Page Range Selection – Extract from specific pages only
Smart Format Detection – Automatically choose JPEG, PNG, or BMP based on image characteristics
Image Preview – Preview extracted images before saving
Detailed Information – View dimensions, format, and file size
Batch Processing – Extract multiple images with progress tracking

PDFium DLL Requirements

Before running any PDFium VCL application, ensure the PDFium DLL files are installed:

pdfium32.dll / pdfium64.dll – Standard versions (~5-6 MB)
pdfium32v8.dll / pdfium64v8.dll – With V8 JavaScript engine (~23-27 MB)

Installation: Run PDFiumVCL\DLLs\CopyDlls.bat as Administrator to automatically copy the DLLs to Windows system directories.

Basic Image Extraction

Access embedded images through the Bitmap and BitmapCount properties:

procedure ExtractImagesSimple;
var
  Pdf: TPdf;
  I, J: Integer;
  Bitmap: TBitmap;
begin
  Pdf := TPdf.Create(nil);
  try
    Pdf.FileName := 'document.pdf';
    Pdf.Active := True;
    
    // Loop through all pages
    for I := 1 to Pdf.PageCount do
    begin
      Pdf.PageNumber := I;
      
      // Loop through all images on this page
      for J := 0 to Pdf.BitmapCount - 1 do
      begin
        Bitmap := Pdf.Bitmap[J];
        try
          // Save as BMP
          Bitmap.SaveToFile(Format('Page%d_Image%d.bmp', [I, J + 1]));
        finally
          Bitmap.Free;
        end;
      end;
    end;
    
  finally
    Pdf.Active := False;
    Pdf.Free;
  end;
end;

procedure ExtractImagesSimple;

var

Pdf: TPdf;

I, J: Integer;

Bitmap: TBitmap;

begin

Pdf := TPdf.Create(nil);

try

Pdf.FileName := 'document.pdf';

Pdf.Active := True;

// Loop through all pages

for I := 1 to Pdf.PageCount do

begin

Pdf.PageNumber := I;

// Loop through all images on this page

for J := 0 to Pdf.BitmapCount - 1 do

begin

Bitmap := Pdf.Bitmap[J];

try

// Save as BMP

Bitmap.SaveToFile(Format('Page%d_Image%d.bmp', [I, J + 1]));

finally

Bitmap.Free;

end;

finally

Pdf.Active := False;

Pdf.Free;

end;

Complete Extraction with Format Detection

The demo implements intelligent format selection:

procedure TFormMain.ButtonExtractClick(Sender: TObject);
var
  I, J, StartPage, EndPage: Integer;
  Bitmap: TBitmap;
  FullFileName, DetectedFormat, ImageExtension: string;
  ImageInfo: TImageInfo;
begin
  FCancelled := False;
  FProcessedImages := 0;
  FTotalImages := 0;
  
  ClearExtractedImages;
  EnableControls(False);
  
  try
    Pdf.FileName := EditPdfFile.Text;
    Pdf.PageNumber := 0;
    Pdf.Active := True;
    
    ParsePageRange(EditPageRange.Text, StartPage, EndPage);
    if EndPage = -1 then
      EndPage := Pdf.PageCount;
      
    // Calculate total images for progress
    for I := StartPage to EndPage do
    begin
      Pdf.PageNumber := I;
      FTotalImages := FTotalImages + Pdf.BitmapCount;
    end;
    
    ProgressBar.Max := FTotalImages;
    
    // Extract images
    for I := StartPage to EndPage do
    begin
      if FCancelled then
        Break;
        
      Pdf.PageNumber := I;
      
      for J := 0 to Pdf.BitmapCount - 1 do
      begin
        if FCancelled then
          Break;
          
        Bitmap := Pdf.Bitmap[J];
        if Assigned(Bitmap) then
        begin
          try
            // Detect optimal format
            DetectedFormat := DetectImageFormat(Bitmap);
            ImageExtension := GetExtensionForFormat(DetectedFormat);
            
            FullFileName := Format('%s\Page%d_Image%d%s',
              [FCurrentOutputDir, I, J + 1, ImageExtension]);
              
            SaveBitmapInOptimalFormat(Bitmap, FullFileName);
            
            // Store image info for preview
            ImageInfo.FileName := FullFileName;
            ImageInfo.PageNumber := I;
            ImageInfo.ImageIndex := J + 1;
            ImageInfo.Width := Bitmap.Width;
            ImageInfo.Height := Bitmap.Height;
            ImageInfo.Format := DetectedFormat;
            ImageInfo.Bitmap := TBitmap.Create;
            ImageInfo.Bitmap.Assign(Bitmap);
            
            AddImageInfo(ImageInfo);
            
            Inc(FProcessedImages);
            ProgressBar.Position := FProcessedImages;
            
          finally
            Bitmap.Free;
          end;
        end;
      end;
    end;
    
    UpdateImageList;
    
  finally
    Pdf.Active := False;
    EnableControls(True);
  end;
end;

procedure TFormMain.ButtonExtractClick(Sender: TObject);

var

I, J, StartPage, EndPage: Integer;

Bitmap: TBitmap;

FullFileName, DetectedFormat, ImageExtension: string;

ImageInfo: TImageInfo;

begin

FCancelled := False;

FProcessedImages := 0;

FTotalImages := 0;

ClearExtractedImages;

EnableControls(False);

try

Pdf.FileName := EditPdfFile.Text;

Pdf.PageNumber := 0;

Pdf.Active := True;

ParsePageRange(EditPageRange.Text, StartPage, EndPage);

if EndPage = -1 then

EndPage := Pdf.PageCount;

// Calculate total images for progress

for I := StartPage to EndPage do

begin

Pdf.PageNumber := I;

FTotalImages := FTotalImages + Pdf.BitmapCount;

end;

ProgressBar.Max := FTotalImages;

// Extract images

for I := StartPage to EndPage do

begin

if FCancelled then

Break;

Pdf.PageNumber := I;

for J := 0 to Pdf.BitmapCount - 1 do

begin

if FCancelled then

Break;

Bitmap := Pdf.Bitmap[J];

if Assigned(Bitmap) then

begin

try

// Detect optimal format

DetectedFormat := DetectImageFormat(Bitmap);

ImageExtension := GetExtensionForFormat(DetectedFormat);

FullFileName := Format('%s\Page%d_Image%d%s',

[FCurrentOutputDir, I, J + 1, ImageExtension]);

SaveBitmapInOptimalFormat(Bitmap, FullFileName);

// Store image info for preview

ImageInfo.FileName := FullFileName;

ImageInfo.PageNumber := I;

ImageInfo.ImageIndex := J + 1;

ImageInfo.Width := Bitmap.Width;

ImageInfo.Height := Bitmap.Height;

ImageInfo.Format := DetectedFormat;

ImageInfo.Bitmap := TBitmap.Create;

ImageInfo.Bitmap.Assign(Bitmap);

AddImageInfo(ImageInfo);

Inc(FProcessedImages);

ProgressBar.Position := FProcessedImages;

finally

Bitmap.Free;

end;

UpdateImageList;

finally

Pdf.Active := False;

EnableControls(True);

end;

Intelligent Format Detection

Choose the optimal format based on image characteristics:

function TFormMain.DetectImageFormat(ABitmap: TBitmap): string;
begin
  // Check if image has transparency (alpha channel)
  if ABitmap.PixelFormat = pf32bit then
  begin
    // PNG for transparency support
    Result := 'PNG';
  end
  // Check if it's likely a photographic image
  else if (ABitmap.Width * ABitmap.Height &gt; 100000) and 
          (ABitmap.PixelFormat in [pf24bit, pf32bit]) then
  begin
    // Large, complex image - use JPEG for smaller file size
    Result := 'JPEG';
  end
  else
  begin
    // Small or simple image - preserve quality with BMP
    Result := 'BMP';
  end;
end;

function TFormMain.GetExtensionForFormat(const AFormat: string): string;
begin
  case UpperCase(AFormat)[1] of
    'J': Result := '.jpg';
    'P': Result := '.png';
    'B': Result := '.bmp';
  else
    Result := '.bmp';
  end;
end;

function TFormMain.DetectImageFormat(ABitmap: TBitmap): string;

begin

// Check if image has transparency (alpha channel)

if ABitmap.PixelFormat = pf32bit then

begin

// PNG for transparency support

Result := 'PNG';

end

// Check if it's likely a photographic image

else if (ABitmap.Width * ABitmap.Height > 100000) and

(ABitmap.PixelFormat in [pf24bit, pf32bit]) then

begin

// Large, complex image - use JPEG for smaller file size

Result := 'JPEG';

end

else

begin

// Small or simple image - preserve quality with BMP

Result := 'BMP';

end;

function TFormMain.GetExtensionForFormat(const AFormat: string): string;

begin

case UpperCase(AFormat)[1] of

'J': Result := '.jpg';

'P': Result := '.png';

'B': Result := '.bmp';

else

Result := '.bmp';

end;

Saving in Optimal Format

procedure TFormMain.SaveBitmapInOptimalFormat(ABitmap: TBitmap; 
  const AFileName: string);
var
  JpegImg: TJPEGImage;
  FileExt: string;
begin
  FileExt := UpperCase(ExtractFileExt(AFileName));
  
  if FileExt = '.JPG' then
  begin
    // Save as JPEG with good quality
    JpegImg := TJPEGImage.Create;
    try
      JpegImg.Assign(ABitmap);
      JpegImg.CompressionQuality := 85; // Good quality/size balance
      JpegImg.SaveToFile(AFileName);
    finally
      JpegImg.Free;
    end;
  end
  else if FileExt = '.PNG' then
  begin
    // PNG would require additional library
    // Fall back to BMP for compatibility
    ABitmap.SaveToFile(ChangeFileExt(AFileName, '.bmp'));
  end
  else
  begin
    // BMP - lossless quality
    ABitmap.SaveToFile(AFileName);
  end;
end;

procedure TFormMain.SaveBitmapInOptimalFormat(ABitmap: TBitmap;

const AFileName: string);

var

JpegImg: TJPEGImage;

FileExt: string;

begin

FileExt := UpperCase(ExtractFileExt(AFileName));

if FileExt = '.JPG' then

begin

// Save as JPEG with good quality

JpegImg := TJPEGImage.Create;

try

JpegImg.Assign(ABitmap);

JpegImg.CompressionQuality := 85; // Good quality/size balance

JpegImg.SaveToFile(AFileName);

finally

JpegImg.Free;

end;

end

else if FileExt = '.PNG' then

begin

// PNG would require additional library

// Fall back to BMP for compatibility

ABitmap.SaveToFile(ChangeFileExt(AFileName, '.bmp'));

end

else

begin

// BMP - lossless quality

ABitmap.SaveToFile(AFileName);

end;

Using TPdfImage for Raw Image Data

For advanced use cases, access raw image data:

procedure ProcessRawImageData;
var
  Pdf: TPdf;
  I: Integer;
  PdfImage: TPdfImage;
begin
  Pdf := TPdf.Create(nil);
  try
    Pdf.FileName := 'document.pdf';
    Pdf.Active := True;
    Pdf.PageNumber := 1;
    
    for I := 0 to Pdf.ImageCount - 1 do
    begin
      PdfImage := Pdf.Image[I];
      
      // Access raw image properties
      ShowMessage(Format('Image %d: %d x %d, %d bytes',
        [I, PdfImage.Width, PdfImage.Height, Length(PdfImage.Data)]));
        
      // PdfImage.Data contains raw pixel data
    end;
    
  finally
    Pdf.Active := False;
    Pdf.Free;
  end;
end;

procedure ProcessRawImageData;

var

Pdf: TPdf;

I: Integer;

PdfImage: TPdfImage;

begin

Pdf := TPdf.Create(nil);

try

Pdf.FileName := 'document.pdf';

Pdf.Active := True;

Pdf.PageNumber := 1;

for I := 0 to Pdf.ImageCount - 1 do

begin

PdfImage := Pdf.Image[I];

// Access raw image properties

ShowMessage(Format('Image %d: %d x %d, %d bytes',

[I, PdfImage.Width, PdfImage.Height, Length(PdfImage.Data)]));

// PdfImage.Data contains raw pixel data

end;

finally

Pdf.Active := False;

Pdf.Free;

end;

Displaying Image Information

procedure TFormMain.UpdatePreview(Index: Integer);
var
  Info: TImageInfo;
begin
  if (Index &gt;= 0) and (Index &lt; Length(FExtractedImages)) then
  begin
    Info := FExtractedImages[Index];
    
    // Update preview
    if Assigned(Info.Bitmap) then
      ImagePreview.Picture.Assign(Info.Bitmap);
      
    // Update info display
    MemoInfo.Lines.Clear;
    MemoInfo.Lines.Add('File: ' + ExtractFileName(Info.FileName));
    MemoInfo.Lines.Add('Page: ' + IntToStr(Info.PageNumber));
    MemoInfo.Lines.Add('Dimensions: ' + IntToStr(Info.Width) + 
                       ' x ' + IntToStr(Info.Height));
    MemoInfo.Lines.Add('Format: ' + Info.Format);
    if Info.Size &gt; 0 then
      MemoInfo.Lines.Add('Size: ' + FormatFloat('#,##0', Info.Size) + ' bytes');
  end;
end;

procedure TFormMain.UpdatePreview(Index: Integer);

var

Info: TImageInfo;

begin

if (Index >= 0) and (Index < Length(FExtractedImages)) then

begin

Info := FExtractedImages[Index];

// Update preview

if Assigned(Info.Bitmap) then

ImagePreview.Picture.Assign(Info.Bitmap);

// Update info display

MemoInfo.Lines.Clear;

MemoInfo.Lines.Add('File: ' + ExtractFileName(Info.FileName));

MemoInfo.Lines.Add('Page: ' + IntToStr(Info.PageNumber));

MemoInfo.Lines.Add('Dimensions: ' + IntToStr(Info.Width) +

' x ' + IntToStr(Info.Height));

MemoInfo.Lines.Add('Format: ' + Info.Format);

if Info.Size > 0 then

MemoInfo.Lines.Add('Size: ' + FormatFloat('#,##0', Info.Size) + ' bytes');

end;

Page Range Parsing

procedure TFormMain.ParsePageRange(const ARange: string; 
  var AStartPage, AEndPage: Integer);
var
  RangeStr: string;
  DashPos: Integer;
begin
  RangeStr := Trim(ARange);
  AStartPage := 1;
  AEndPage := -1; // -1 means extract to end
  
  if (RangeStr = '') or (UpperCase(RangeStr) = 'ALL') then
    Exit;
    
  DashPos := Pos('-', RangeStr);
  if DashPos &gt; 0 then
  begin
    // Range format: start-end
    AStartPage := StrToIntDef(Trim(Copy(RangeStr, 1, DashPos - 1)), 1);
    AEndPage := StrToIntDef(Trim(Copy(RangeStr, DashPos + 1, Length(RangeStr))), -1);
  end
  else
  begin
    // Single page
    AStartPage := StrToIntDef(RangeStr, 1);
    AEndPage := AStartPage;
  end;
end;

procedure TFormMain.ParsePageRange(const ARange: string;

var AStartPage, AEndPage: Integer);

var

RangeStr: string;

DashPos: Integer;

begin

RangeStr := Trim(ARange);

AStartPage := 1;

AEndPage := -1; // -1 means extract to end

if (RangeStr = '') or (UpperCase(RangeStr) = 'ALL') then

Exit;

DashPos := Pos('-', RangeStr);

if DashPos > 0 then

begin

// Range format: start-end

AStartPage := StrToIntDef(Trim(Copy(RangeStr, 1, DashPos - 1)), 1);

AEndPage := StrToIntDef(Trim(Copy(RangeStr, DashPos + 1, Length(RangeStr))), -1);

end

else

begin

// Single page

AStartPage := StrToIntDef(RangeStr, 1);

AEndPage := AStartPage;

end;

Open Extracted Images Folder

procedure TFormMain.ButtonOpenFolderClick(Sender: TObject);
begin
  if DirectoryExists(FCurrentOutputDir) then
    ShellExecute(Handle, 'open', PChar(FCurrentOutputDir), nil, nil, SW_SHOWNORMAL)
  else
    ShowMessage('Output directory does not exist.');
end;

procedure TFormMain.ButtonOpenFolderClick(Sender: TObject);

begin

if DirectoryExists(FCurrentOutputDir) then

ShellExecute(Handle, 'open', PChar(FCurrentOutputDir), nil, nil, SW_SHOWNORMAL)

else

ShowMessage('Output directory does not exist.');

end;

Use Cases

Digital Asset Extraction – Extract photos and graphics from marketing materials
Document Conversion – Prepare images for web or other formats
Archive Processing – Extract images from scanned document archives
Content Analysis – Extract images for machine learning or analysis

Conclusion

The Extract Images demo shows how easy it is to extract embedded images from PDF documents with PDFium VCL. The component handles the complex PDF parsing while you focus on how to use the extracted images in your application.

Combined with intelligent format detection, you can build professional image extraction tools that produce optimized output for any use case.

Explore PDFium Component at loslab.com and unlock the content in your PDF documents.