HOME


Mini Shell 1.0
Redirecting to https://devs.lapieza.net/iniciar-sesion Redirecting to https://devs.lapieza.net/iniciar-sesion.
DIR: /var/www/devs.lapieza.net/vendor/mindee/mindee/src/PDF/
Upload File :
Current File : /var/www/devs.lapieza.net/vendor/mindee/mindee/src/PDF/PDFCompressor.php
<?php

namespace Mindee\PDF;

use Mindee\Error\ErrorCode;
use Mindee\Error\MindeePDFException;
use Mindee\Error\MindeeUnhandledException;
use Mindee\Parsing\DependencyChecker;
use setasign\Fpdi\Fpdi;
use setasign\Fpdi\PdfParser\CrossReference\CrossReferenceException;
use Smalot\PdfParser\Config;
use Smalot\PdfParser\Document;
use Smalot\PdfParser\Page;
use Smalot\PdfParser\Parser;

/**
 * PDF compression class.
 */
class PDFCompressor
{
    /**
     * Compresses each page of a provided PDF stream. Skips if force_source_text isn't set and source text is detected.
     *
     * @param mixed   $input                      Path to the PDF file.
     * @param integer $quality                    Compression quality (70-100 for most JPG images in the test dataset).
     * @param boolean $forceSourceTextCompression If true, attempts to re-write detected text.
     * @param boolean $disableSourceText          If true, doesn't re-apply source text to the original PDF.
     * @throws MindeePDFException Throws if the operation fails at any step.
     * //phpcs:disable
     * @throws MindeeUnhandledException Throws if one of the dependencies isn't installed.
     */
    public static function compress(
        $input,
        int $quality = 85,
        bool $forceSourceTextCompression = false,
        bool $disableSourceText = true
    ): \CURLFile {
        //phpcs: enable
        DependencyChecker::isImageMagickAvailable();
        DependencyChecker::isGhostscriptAvailable();
        try {
            $pdfPath = PDFUtils::extractFilePath($input);
            $initialFileSize = filesize($pdfPath);
            $config = new Config();
            $config->setDataTmFontInfoHasToBeIncluded(true);
            $parser = new Parser([], $config);
            $pdf = $parser->parseFile($pdfPath);

            if (strlen($pdf->getText()) > 0) {
                if ($forceSourceTextCompression) {
                    if (!$disableSourceText) {
                        error_log("[WARNING] Re-writing PDF source-text is an EXPERIMENTAL feature.");
                    } else {
                        error_log("[WARNING] Source file '$pdfPath' contains text, but disable_source_text flag"
                            . " is set to false. Resulting file will not contain any embedded text.");
                    }
                } else {
                    error_log("[WARNING] Source-text detected in input PDF '$pdfPath'. Aborting operation.");
                    $outputPath = tempnam(sys_get_temp_dir(), 'compressed_pdf_') . '.pdf';
                    copy($pdfPath, $outputPath);
                    return PDFUtils::toCURLFile($outputPath);
                }
            }

            try {
                $fpdi = new CustomFPDI();
                $pageCount = $fpdi->setSourceFile($pdfPath);
            } catch (CrossReferenceException $e) {
                error_log("[WARNING] PDF format for '$pdfPath' is not directly supported." .
                    " Output PDF will be rasterized and source text won't be available.");
                $pdfPath = PDFUtils::downgradePdfVersion($pdfPath);
                $fpdi = new CustomFPDI();
                $pdf = $parser->parseFile($pdfPath);
                $pageCount = $fpdi->setSourceFile($pdfPath);
            }

            $outPdf = new CustomFPDI();
            for ($i = 1; $i <= $pageCount; $i++) {
                list($tempJpegFile, $orientation) = static::processPdfPage($pdfPath, $i, $quality);
                list($width, $height) = getimagesize($tempJpegFile);
                $outPdf->AddPage($orientation, [$width, $height]);
                $outPdf->Image($tempJpegFile, 0, 0, $width, $height);
                unlink($tempJpegFile);

                if (!$disableSourceText) {
                    static::injectTextForPage($pdf->getPages()[$i - 1], $outPdf);
                }
            }

            $outputPath = tempnam(sys_get_temp_dir(), 'compressed_pdf_') . '.pdf';
            $outPdf->Output('F', $outputPath);
            $finalPDFSize = filesize($outputPath);

            if ($initialFileSize < $finalPDFSize) {
                error_log("[WARNING] Compressed PDF for '$pdfPath' would be larger than input." .
                    " Aborting operation.");
                return PDFUtils::toCURLFile(PDFUtils::extractFilePath($input));
            }
            return PDFUtils::toCURLFile($outputPath);
        } catch (\Exception $e) {
            throw new MindeePDFException(
                "Couldn't compress PDF.",
                ErrorCode::FILE_OPERATION_ABORTED,
                $e
            );
        }
    }

    /**
     * @param Page $inputPage Input page.
     * @param CustomFPDI $outputPdf Output PDF handle.
     * @return void
     * @throws MindeePDFException Throws if text can't be inserted into the page.
     */
    private static function injectTextForPage(Page $inputPage, CustomFPDI $outputPdf): void
    {
        try {
            $textElements = PDFUtils::extractTextElements($inputPage);
            foreach ($textElements as $element) {
                PDFUtils::addTextElement($outputPdf, $element);
            }
        } catch (\Exception $e) {
            throw new MindeePDFException(
                "Couldn't inject text into the new file.",
                ErrorCode::PDF_CANT_EDIT,
                $e
            );
        }
    }

    /**
     * Creates the final output PDF, optionally injecting text from the original PDF.
     *
     * @param CustomFPDI $processedPdf The FPDI object containing the processed pages.
     * @param boolean $disableSourceText Whether to disable source text injection.
     * @param Document $originalPdf The original PDF document (used for text injection).
     * @return string Path to the output PDF file
     * @throws MindeePDFException If there's an error creating the output PDF.
     */
    private static function createOutputPdf(
        CustomFPDI $processedPdf,
        bool       $disableSourceText,
        Document   $originalPdf
    ): string {
        try {
            if (!$disableSourceText) {
                static::injectText($originalPdf, $processedPdf);
            }

            $outputPath = tempnam(sys_get_temp_dir(), 'compressed_pdf_') . '.pdf';
            $processedPdf->Output('F', $outputPath);

            return $outputPath;
        } catch (\Exception $e) {
            throw new MindeePDFException(
                "Couldn't create output PDF.",
                ErrorCode::PDF_CANT_CREATE,
                $e
            );
        }
    }


    /**
     * Extracts text from a source text PDF, and injects it into a newly-created one.
     *
     * @param Document $inputPdf Input PDF document.
     * @param CustomFPDI $outputPdf The output PDF object.
     * @return void
     * @throws MindeePDFException Throws if the text can't be injected.
     */
    private static function injectText(Document $inputPdf, CustomFPDI $outputPdf): void
    {
        try {
            $pages = $inputPdf->getPages();
            $pageCount = count($pages);

            for ($i = 1; $i <= $pageCount; $i++) {
                $textElements = PDFUtils::extractTextElements($pages[$i - 1]);

                if (!empty($textElements)) {
                    $tplIdx = $outputPdf->importPage($i);
                    $size = $outputPdf->getTemplateSize($tplIdx);
                    $outputPdf->AddPage($size['orientation'], [$size['width'], $size['height']]);
                    $outputPdf->useTemplate($tplIdx);
                    foreach ($textElements as $element) {
                        PDFUtils::addTextElement($outputPdf, $element);
                    }
                }
            }
        } catch (\Exception $e) {
            throw new MindeePDFException(
                "Couldn't inject text into the new file.",
                ErrorCode::PDF_CANT_EDIT,
                $e
            );
        }
    }


    /**
     * Processes a single PDF page, rasterizing it to a JPEG image.
     *
     * @param string $sourcePdfPath Path to the source PDF file.
     * @param integer $pageIndex The index of the page to process.
     * @param integer $imageQuality The quality setting for JPEG compression.
     * @return array Path to the temporary JPEG file and orientation of the page.
     * @throws MindeePDFException If there's an error processing the page.
     */
    private static function processPdfPage(string $sourcePdfPath, int $pageIndex, int $imageQuality): array
    {
        try {
            $singlePagePdf = new FPDI();
            $singlePagePdf->setSourceFile($sourcePdfPath);
            $tplId = $singlePagePdf->importPage($pageIndex);
            $size = $singlePagePdf->getTemplateSize($tplId);

            $singlePagePdf->AddPage($size['orientation'], [$size['width'], $size['height']]);
            $singlePagePdf->useTemplate($tplId);

            $tempPdfFile = tempnam(sys_get_temp_dir(), 'pdf_page_') . '.pdf';
            $singlePagePdf->Output('F', $tempPdfFile);

            $imagick = new \Imagick();
            $imagick->readImage($tempPdfFile);
            $imagick->setImageFormat('jpg');
            $imagick->setImageAlphaChannel(\Imagick::ALPHACHANNEL_REMOVE);
            $imagick->setImageCompression(\Imagick::COMPRESSION_JPEG);
            $imagick->setImageCompressionQuality($imageQuality);

            $tempJpegFile = tempnam(sys_get_temp_dir(), 'pdf_page_') . '.jpg';
            $imagick->writeImage($tempJpegFile);

            unlink($tempPdfFile);

            return [$tempJpegFile, $size['orientation']];
        } catch (\Exception $e) {
            throw new MindeePDFException(
                "Couldn't process PDF page $pageIndex.",
                ErrorCode::PDF_CANT_PROCESS,
                $e
            );
        }
    }
}