HOME


Mini Shell 1.0
Redirecting to https://devs.lapieza.net/iniciar-sesion Redirecting to https://devs.lapieza.net/iniciar-sesion.
DIR: /var/www/devs.lapieza.net/vendor/mindee/mindee/src/Input/
Upload File :
Current File : /var/www/devs.lapieza.net/vendor/mindee/mindee/src/Input/LocalInputSource.php
<?php

/**
 * Local input handling.
 */

namespace Mindee\Input;

use CURLFile;
use Exception;
use Mindee\Error\ErrorCode;
use Mindee\Error\MindeeImageException;
use Mindee\Error\MindeeMimeTypeException;
use Mindee\Error\MindeePDFException;
use Mindee\Error\MindeeSourceException;
use Mindee\Error\MindeeUnhandledException;
use Mindee\Image\ImageCompressor;
use Mindee\Parsing\DependencyChecker;
use Mindee\PDF\PDFCompressor;
use Mindee\PDF\PDFUtils;
use setasign\Fpdi\Fpdi;
use setasign\Fpdi\PdfParser\PdfParserException;
use setasign\Fpdi\PdfReader\PdfReaderException;

/**
 * List of allowed mime types for document parsing.
 */
const ALLOWED_MIME_TYPES = [
    'application/pdf',
    'image/heic',
    'image/png',
    'image/jpg',
    'image/jpeg',
    'image/tiff',
    'image/webp',
];

/**
 * Base class for all input sources coming from the local machine.
 */
abstract class LocalInputSource extends InputSource
{
    /**
     * @var CURLFile File object, as a CURLFile for simplicity.
     */
    public CURLFile $fileObject;
    /**
     * @var string Name of the file, mandatory for proper Mime type handling server-side.
     */
    public string $fileName;
    /**
     * @var string File Mime type, as a string.
     */
    public string $fileMimetype;
    /**
     * @var string|null Path of the file for files retrieved from a path.
     */
    public ?string $filePath;

    /**
     * @var boolean Sets the input mode to debug. Only used in unit tests.
     */
    protected bool $throwsOnClose;

    /**
     * Checks the mimetype integrity of a file.
     *
     * @return void
     * @throws MindeeMimeTypeException Throws if the Mime type isn't allowed.
     */
    private function checkMimeType()
    {
        if (!in_array($this->fileMimetype, ALLOWED_MIME_TYPES)) {
            $fileTypes = implode(', ', ALLOWED_MIME_TYPES);
            throw new MindeeMimeTypeException(
                "File type " .
                $this->fileMimetype .
                " not allowed, must be one of $fileTypes.",
                ErrorCode::USER_OPERATION_ERROR
            );
        }
    }

    /**
     * Base constructor, mostly used for Mime type checking.
     * @param boolean $fixPDF Whether the PDF should be fixed or not.
     */
    public function __construct(bool $fixPDF = false)
    {
        if ($fixPDF) {
            $this->fixPDF();
        }
        $this->checkMimeType();
        $this->throwsOnClose = false;
    }

    /**
     * Checks whether the file type is a PDF.
     *
     * @return boolean
     */
    public function isPDF(): bool
    {
        return $this->fileMimetype == 'application/pdf';
    }

    /**
     * Counts the amount of pages in a PDF.
     *
     * @return integer
     * @throws MindeePDFException Throws if the source pdf can't be properly processed.
     * @throws MindeeSourceException Throws if the source isn't a pdf.
     */
    public function countDocPages(): int
    {
        if (!$this->isPDF()) {
            throw new MindeeSourceException(
                "File is not a PDF.",
                ErrorCode::USER_OPERATION_ERROR
            );
        }
        $pdf = new FPDI();
        try {
            return $pdf->setSourceFile($this->fileObject->getFilename());
        } catch (PdfParserException $e) {
            throw new MindeePDFException(
                "Failed to read PDF file.",
                ErrorCode::PDF_CANT_PROCESS,
                $e
            );
        }
    }

    /**
     * Processes a PDF file.
     * To be implemented.
     *
     * @param string  $behavior    Behaviors available: KEEP_ONLY, REMOVE.
     * @param integer $onMinPages  Minimum of pages to apply the operation.
     * @param array   $pageIndexes Indexes of the pages to apply the operation to.
     * @return void
     * @throws MindeePDFException Throws if the operation is unknown, or if the resulting PDF can't be processed.
     */
    public function processPDF(string $behavior, int $onMinPages, array $pageIndexes)
    {
        if ($this->isPDFEmpty()) {
            throw new MindeePDFException(
                "Pages are empty in PDF file.",
                ErrorCode::USER_INPUT_ERROR
            );
        }
        if ($this->countDocPages() < $onMinPages) {
            return;
        }
        $allPages = range(0, $this->countDocPages() - 1);
        $pagesToKeep = [];
        if ($behavior == KEEP_ONLY) {
            foreach ($pageIndexes as $pageId) {
                if ($pageId < 0) {
                    $pageId = $this->countDocPages() + $pageId;
                }
                if (!in_array($pageId, $allPages)) {
                    error_log("Page index '" . $pageId . "' is not present in source document");
                } else {
                    $pagesToKeep[] = $pageId;
                }
            }
        } elseif ($behavior == REMOVE) {
            $pagesToRemove = [];
            foreach ($pageIndexes as $pageId) {
                if ($pageId < 0) {
                    $pageId = $this->countDocPages() + $pageId;
                }
                if (!in_array($pageId, $allPages)) {
                    error_log("Page index '" . $pageId . "' is not present in source document");
                } else {
                    $pagesToRemove[] = $pageId;
                }
            }
            $pagesToKeep = array_diff($allPages, $pagesToRemove);
        } else {
            throw new MindeePDFException(
                "Unknown operation '" . $behavior . "'.",
                ErrorCode::USER_OPERATION_ERROR
            );
        }
        if (count($pagesToKeep) < 1) {
            throw new MindeePDFException(
                "Resulting PDF would have no pages left.",
                ErrorCode::USER_OPERATION_ERROR
            );
        }
        $this->mergePDFPages($pagesToKeep);
    }

    /**
     * @param string $fileBytes Raw data as bytes.
     * @return void
     */
    private function saveBytesAsFile(string $fileBytes)
    {
        $cutPdfTempFile = tempnam(sys_get_temp_dir(), 'mindee_cut_pdf_');
        file_put_contents($cutPdfTempFile, $fileBytes);
        $this->filePath = $cutPdfTempFile;
        $this->fileObject = new CURLFile($cutPdfTempFile, $this->fileMimetype, $this->fileName);
    }

    /**
     * Create a new PDF from pages and set it as the main file object.
     * @param array $pageNumbers Array of page numbers to add to the newly created PDF.
     * @return void
     * @throws MindeePDFException Throws if the pdf file can't be processed.
     */
    public function mergePDFPages(array $pageNumbers)
    {
        try {
            $pdf = new FPDI();
            $pdf->setSourceFile($this->filePath);
            foreach ($pageNumbers as $pageNumber) {
                $pdf->AddPage();
                $pdf->useTemplate($pdf->importPage($pageNumber + 1));
            }
            $this->saveBytesAsFile($pdf->Output($this->fileName, 'S'));
            $pdf->Close();
        } catch (PdfParserException | PdfReaderException $e) {
            throw new MindeePDFException(
                "Failed to read PDF file.",
                ErrorCode::PDF_CANT_PROCESS,
                $e
            );
        }
    }

    /**
     * Checks whether the contents of a PDF are empty.
     * @param integer $threshold Semi-arbitrary threshold of minimum bytes on the page for it to be considered empty.
     *
     * @return boolean
     * @throws MindeePDFException Throws if the pdf file can't be processed.
     */
    public function isPDFEmpty(int $threshold = 1024): bool
    {
        try {
            $pdf = new FPDI();
            $pageCount = $pdf->setSourceFile($this->fileObject->getFilename());
            $pdf->Close();
            for ($pageNumber = 0; $pageNumber < $pageCount; $pageNumber++) {
                $pdfPage = new FPDI();
                $pdfPage->setSourceFile($this->fileObject->getFilename());
                $pdfPage->AddPage();
                $pdfPage->useTemplate($pdfPage->importPage($pageNumber + 1));
                if (strlen($pdfPage->Output('', 'S')) > $threshold) {
                    $pdfPage->Close();
                    return false;
                }
                $pdfPage->Close();
            }
        } catch (PdfParserException | PdfReaderException $e) {
            throw new MindeePDFException(
                "Failed to read PDF file.",
                ErrorCode::PDF_CANT_PROCESS,
                $e
            );
        }
        return true;
    }

    /**
     * Reads the contents of the file.
     *
     * @return array
     */
    public function readContents(): array
    {
        $fileHandle = fopen($this->fileObject->getFilename(), 'rb');
        $strContents = fread($fileHandle, filesize($this->fileObject->getFilename()));
        fclose($fileHandle);
        return [basename($this->fileObject->getFilename()), $strContents];
    }


    /**
     * Attempts to fix a PDF file.
     *
     * @return void
     * @throws MindeeSourceException Throws if the file couldn't be fixed.
     */
    private function fixPDF(): void
    {
        if (str_starts_with($this->fileMimetype, "image/")) {
            error_log("Input file is an image, skipping PDF fix.");
            return;
        }
        $bytesContent = file_get_contents($this->fileObject->getFilename());

        $pdfMarkerPosition = strrpos(strtoupper($bytesContent), '%PDF');

        if ($pdfMarkerPosition !== false) {
            $tempFile = tempnam(sys_get_temp_dir(), 'pdf_fix_');
            rename($tempFile, $tempFile .= "." . pathinfo($this->fileName, PATHINFO_EXTENSION));
            file_put_contents($tempFile, substr($bytesContent, $pdfMarkerPosition));

            $finfo = finfo_open(FILEINFO_MIME_TYPE);
            $this->fileMimetype = finfo_file($finfo, $tempFile);
            finfo_close($finfo);
            $this->fileObject = new CURLFile($tempFile, $this->fileMimetype, $this->fileName);
            return;
        }

        throw new MindeeSourceException(
            "PDF file could not be fixed.",
            ErrorCode::FILE_OPERATION_ERROR
        );
    }

    /**
     * Closes the handle/stream, if the input type supports it.
     *
     * @return void
     * @throws MindeeSourceException Throws when strict mode is enabled.
     */
    public function close(): void
    {
        if ($this->throwsOnClose) {
            throw new MindeeSourceException(
                "Closing is not implemented on this type of local input source.",
                ErrorCode::USER_OPERATION_ERROR
            );
        } else {
            error_log("Closing is not implemented on this type of local input source.");
        }
    }

    /**
     * Enables strict mode.
     * Currently only used to throw on misuse of close().
     *
     * @return void
     */
    public function enableStrictMode()
    {
        $this->throwsOnClose = true;
    }

    /**
     * @param integer      $quality                    Quality of the output file.
     * @param integer|null $maxWidth                   Maximum width (Ignored for PDFs).
     * @param integer|null $maxHeight                  Maximum height (Ignored for PDFs).
     * @param boolean      $forceSourceTextCompression Whether to force the operation on PDFs with source text.
     *            This will attempt to re-render PDF text over the rasterized original.
     *            The script will attempt to re-write text, but might not support all fonts & encoding.
     *            If disabled, ignored the operation.
     *            WARNING: this operation is strongly discouraged.
     * @param boolean      $disableSourceText          If the PDF has source text, whether to re-apply it to the
     *            original or not. Needs force_source_text to work.
     * @return void
     */
    public function compress(
        int $quality = 85,
        int $maxWidth = null,
        int $maxHeight = null,
        bool $forceSourceTextCompression = false,
        bool $disableSourceText = true
    ): void {
        if ($this->isPDF()) {
            $this->fileObject = PDFCompressor::compress(
                $this->fileObject,
                $quality,
                $forceSourceTextCompression,
                $disableSourceText
            );
            $this->fileMimetype = 'application/pdf';
            $pathInfo = pathinfo($this->filePath);
            $this->filePath = $pathInfo['dirname'] . DIRECTORY_SEPARATOR . $pathInfo['filename'] . '.pdf';
        } else {
            $this->fileObject = ImageCompressor::compress(
                $this->fileObject,
                $quality,
                $maxWidth,
                $maxHeight
            );
            $this->fileMimetype = 'image/jpeg';
            $pathInfo = pathinfo($this->filePath);
            $this->filePath = $pathInfo['dirname'] . DIRECTORY_SEPARATOR . $pathInfo['filename'] . '.jpg';
        }
    }

    /**
     * Checks the source file for source text.
     *
     * @return boolean Returns false if none is found, or if the file isn't a PDF.
     * @throws Exception Throws if an instance of pdf-parser can't be created.
     */
    public function hasSourceText(): bool
    {
        if (!$this->isPDF()) {
            return false;
        }
        return PDFUtils::hasSourceText($this->filePath);
    }
}