<?php

declare(strict_types=1);

namespace Atlas\ExportBundle\Service;

use ZipArchive;

/**
 * Builds a portable SPSS import package (.zip).
 *
 * The zip contains:
 *   - data.tsv      (tab-delimited data, first row = headers)
 *   - import.sps    (SPSS syntax to load data.tsv with labels/types/missing codes)
 */
final readonly class SpssBuilder
{
    /**
     * @param array<int, array<string, scalar|null>> $dataRows
     * @param array<int, array{
     *   name:string,
     *   label:string,
     *   type:string,
     *   width?:int,
     *   decimals?:int,
     *   missing_values?:array<int,string>
     * }> $variables
     * @param array<string, array<string,string>> $categoricalOptions
     *
     * @return array{filename:string,binary:string,contentType:string}
     */
    public function buildSpssPackage(
        array $dataRows,
        array $variables,
        array $categoricalOptions = [],
    ): array {
        $tsv = $this->toTsv($dataRows, $variables);
        $syntax = $this->toSpsSyntax($variables, $categoricalOptions);

        $zipBinary = $this->zipInMemory([
            'data.tsv'   => $tsv,
            'import.sps' => $syntax,
        ]);

        return [
            'filename'    => 'export_spss.zip',
            'binary'      => $zipBinary,
            'contentType' => 'application/zip',
        ];
    }

    /**
     * Build TSV with consistent column ordering based on $variables list.
     */
    private function toTsv(array $dataRows, array $variables): string
    {
        $headers = array_map(fn ($v) => $v['name'], $variables);

        $lines = [];
        $lines[] = implode("\t", $headers);

        foreach ($dataRows as $row) {
            $lineVals = [];
            foreach ($headers as $colName) {
                $val = $row[$colName] ?? '';
                if ($val === null) {
                    $val = '';
                }
                $val = (string) $val;
                $val = str_replace(["\t", "\r", "\n"], [' ', ' ', ' '], $val);

                $lineVals[] = $val;
            }
            $lines[] = implode("\t", $lineVals);
        }

        return implode("\n", $lines) . "\n";
    }

    /**
     * Build SPSS syntax:
     * - GET DATA ... reads data.tsv
     * - /VARIABLES= ... declares formats
     * - VARIABLE LABELS ... assigns descriptive labels
     * - VALUE LABELS ... declares categorical code frames
     * - MISSING VALUES ... maps skip/missing sentinels
     */
    private function toSpsSyntax(array $variables, array $categoricalOptions): string
    {
        $varDecls = [];
        $labelLines = [];
        $valueLabelBlocks = [];
        $missingValueLines = [];

        // Collect declarations, labels, and missing-code per var
        foreach ($variables as $var) {
            $spssVar = $this->spssVarName($var['name']);

            $logicalType = strtoupper($var['type']);
            $width = $var['width'] ?? null;
            $decimals = $var['decimals'] ?? null;

            $format = $this->pickFormat($logicalType, $width, $decimals);
            $varDecls[] = sprintf('%s %s', $spssVar, $format);

            $label = $var['label'] ?? $var['name'];
            $labelLines[] = sprintf(
                '  %s "%s"',
                $spssVar,
                $this->escapeQuotes($label)
            );

            if (isset($var['missing_values']) && \is_array($var['missing_values']) && $var['missing_values'] !== []) {
                $encodedCodes = [];
                foreach ($var['missing_values'] as $rawCode) {
                    // If purely numeric like -9 or -8 or 0.00, leave unquoted.
                    // Otherwise quote it.
                    if (preg_match('/^-?\d+(\.\d+)?$/', $rawCode) === 1) {
                        $encodedCodes[] = $rawCode;
                    } else {
                        $encodedCodes[] = "'" . $this->escapeQuotes($rawCode) . "'";
                    }
                }

                $missingValueLines[] = sprintf(
                    '  %s (%s)',
                    $spssVar,
                    implode(', ', $encodedCodes)
                );
            }
        }

        // VALUE LABELS per categorical var
        foreach ($categoricalOptions as $varName => $choices) {
            if (!\is_array($choices) || $choices === []) {
                continue;
            }

            $spssVar = $this->spssVarName($varName);
            $lines = [];
            $lines[] = sprintf('  %s', $spssVar);

            foreach ($choices as $value => $meaning) {
                $lines[] = sprintf(
                    '    %s "%s"',
                    $this->escapeValueToken((string)$value),
                    $this->escapeQuotes((string)$meaning)
                );
            }

            $valueLabelBlocks[] = implode("\n", $lines) . "\n.";
        }

        $syntax = [];
        $syntax[] = '* Auto-generated import script.';
        $syntax[] = 'GET DATA';
        $syntax[] = '  /TYPE=TXT';
        $syntax[] = "  /FILE='data.tsv'";
        $syntax[] = '  /DELCASE=LINE';
        $syntax[] = '  /DELIMITERS="\t"';
        $syntax[] = "  /QUALIFIER='\"'";
        $syntax[] = '  /ARRANGEMENT=DELIMITED';
        $syntax[] = '  /FIRSTCASE=2';
        $syntax[] = '  /IMPORTCASE=ALL';
        $syntax[] = '  /VARIABLES=';
        $syntax[] = '    ' . implode("\n    ", $varDecls) . '.';
        $syntax[] = '';
        $syntax[] = 'VARIABLE LABELS';
        $syntax[] = implode("\n", $labelLines) . "\n.";
        $syntax[] = '';

        if ($valueLabelBlocks !== []) {
            $syntax[] = 'VALUE LABELS';
            $syntax[] = $this->joinValueLabelBlocks($valueLabelBlocks);
            $syntax[] = '';
        }

        if ($missingValueLines !== []) {
            $syntax[] = 'MISSING VALUES';
            $syntax[] = implode("\n", $missingValueLines) . "\n.";
            $syntax[] = '';
        }

        $syntax[] = 'EXECUTE.';
        $syntax[] = '';

        return implode("\n", $syntax);
    }

    /**
     * Map logical var types to SPSS formats.
     * INTEGER / CATEGORY => F8.0
     * DECIMAL => F8.<decimals>
     * DATE => A11 (we import "DD/MM/YYYY" as string for now)
     * TEXT => A{width}
     */
    private function pickFormat(string $logicalType, ?int $width, ?int $decimals): string
    {
        switch ($logicalType) {
            case 'INTEGER':
            case 'CATEGORY':
                return 'F8.0';

            case 'DECIMAL':
                $d = $decimals ?? 2;
                return 'F8.' . $d;

            case 'DATE':
                return 'A11'; // "DD/MM/YYYY" ~10 chars, allow 11

            case 'TEXT':
            default:
                $len = $width && $width > 0 ? $width : 255;
                if ($len > 4000) {
                    $len = 4000;
                }
                return 'A' . $len;
        }
    }

    /**
     * SPSS VALUE LABELS syntax groups multiple variables using "/" between them.
     */
    private function joinValueLabelBlocks(array $blocks): string
    {
        $first = true;
        $out = '';
        foreach ($blocks as $block) {
            $lines = explode("\n", $block);
            if ($first) {
                $out .= implode("\n", $lines);
                $first = false;
            } else {
                $lines[0] = '/' . ltrim($lines[0]);
                $out .= "\n" . implode("\n", $lines);
            }
        }

        if (!str_ends_with($out, "\n.")) {
            $out .= "\n.";
        }

        return $out;
    }

    /**
     * Sanitize variable names for SPSS:
     * - must start with a letter
     * - only letters, numbers, underscore
     * - max 64 chars
     */
    private function spssVarName(string $raw): string
    {
        $clean = preg_replace('/[^A-Za-z0-9_]/', '_', $raw);
        if ($clean === null || $clean === '') {
            $clean = 'VAR';
        }
        if (!preg_match('/^[A-Za-z]/', $clean)) {
            $clean = 'V_' . $clean;
        }

        return substr($clean, 0, 64);
    }

    private function escapeQuotes(string $s): string
    {
        return str_replace('"', '""', $s);
    }

    /**
     * For VALUE LABELS:
     *  - numeric codes unquoted
     *  - string codes quoted
     */
    private function escapeValueToken(string $v): string
    {
        if (preg_match('/^-?\d+(\.\d+)?$/', $v) === 1) {
            return $v;
        }

        $safe = $this->escapeQuotes($v);
        return '"' . $safe . '"';
    }

    /**
     * @param array<string,string> $files filename => contents
     * @return string ZIP binary
     */
    private function zipInMemory(array $files): string
    {
        $tmpZipPath = tempnam(sys_get_temp_dir(), 'spsszip_');

        if ($tmpZipPath === false) {
            return '';
        }

        $zip = new ZipArchive();
        $ok  = $zip->open($tmpZipPath, ZipArchive::OVERWRITE | ZipArchive::CREATE);

        if ($ok !== true) {
            return '';
        }

        foreach ($files as $name => $content) {
            $zip->addFromString($name, $content);
        }

        $zip->close();

        $binary = file_get_contents($tmpZipPath);
        @unlink($tmpZipPath);

        return $binary === false ? '' : $binary;
    }
}
