% \iffalse meta-comment
%
%% File: tagpdf-tree.dtx
%
% Copyright (C) 2019-2024 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/tagpdf
%
% for those people who are interested.
%
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
 pdftitle=tagpdf-tree module (tagpdf)}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
% \title{^^A
%   The \pkg{tagpdf-tree} module\\ Commands trees and main dictionaries   ^^A
%   \\ Part of the tagpdf package
% }
%
% \author{^^A
%  Ulrike Fischer\thanks
%    {^^A
%      E-mail:
%        \href{mailto:fischer@troubleshooting-tex.de}
%          {fischer@troubleshooting-tex.de}^^A
%    }^^A
% }
%
% \date{Version 0.99f, released 2024-09-16}
% \maketitle
% \begin{implementation}
%    \begin{macrocode}
%<@@=tag>
%<*header>
\ProvidesExplPackage {tagpdf-tree-code} {2024-09-16} {0.99f}
 {part of tagpdf - code related to writing trees and dictionaries to the pdf}
%</header>
%    \end{macrocode}
% \section{Trees, pdfmanagement and finalization code}
%
% The code to finish the structure is in a hook.
% This will perhaps at the end be a kernel hook.
% TODO check right place for the code
% The pdfmanagement code is the kernel hook after
% shipout/lastpage so all code affecting it should be before.
% Objects can be written later, at least in pdf mode.
%    \begin{macrocode}
%<*package>
\hook_gput_code:nnn{begindocument}{tagpdf}
  {
    \bool_if:NT \g_@@_active_tree_bool
      {
        \sys_if_output_pdf:TF
          {
            \AddToHook{enddocument/end} { \@@_finish_structure: }
          }
          {
            \AddToHook{shipout/lastpage} { \@@_finish_structure: }
          }
      }
  }
%    \end{macrocode}
% \subsection{Check structure}
% \begin{macro}{\@@_tree_final_checks:}
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_final_checks:
 {
   \int_compare:nNnF {\seq_count:N\g_@@_struct_stack_seq}={1}
    {
      \msg_warning:nn {tag}{tree-struct-still-open}
      \int_step_inline:nnn{2}{\seq_count:N\g_@@_struct_stack_seq}
       {\tag_struct_end:}
    }
   \msg_note:nn {tag}{tree-statistic} 
 }
%    \end{macrocode}
% \end{macro}
% 
% \subsection{Catalog: MarkInfo and StructTreeRoot and OpenAction}
% The StructTreeRoot and the MarkInfo entry must be added to the catalog.
% If there is an OpenAction entry we must update it,
% so that it contains also a structure destination.
% We do it late so that we can win, but before the pdfmanagement hook.
% \begin{macro}{@@/struct/1}
% This is the object for the root object, the StructTreeRoot
%    \begin{macrocode}
\pdf_object_new_indexed:nn { @@/struct }{ 1 }
%    \end{macrocode}
% \end{macro}
% 
% \begin{variable}{\g_@@_tree_openaction_struct_tl}
% We need a variable that indicates which structure is wanted in the OpenAction. By default we use
% 2 (the Document structure).
%    \begin{macrocode}
\tl_new:N   \g_@@_tree_openaction_struct_tl
\tl_gset:Nn \g_@@_tree_openaction_struct_tl { 2 }
%    \end{macrocode}
% \end{variable}
% 
% \begin{macro}{viewer/startstructure (setup-key)}
% We also need an option to setup the start structure. So we setup a key
% which sets the variable to the current structure. This still requires
% hyperref to do most of the job, this should perhaps be changed. 
%    \begin{macrocode}
\keys_define:nn { @@ / setup }
 {
   viewer/startstructure .code:n = 
    {
       \tl_gset:Ne \g_@@_tree_openaction_struct_tl {#1}      
    }
  ,viewer/startstructure .default:n =  { \int_use:N \c@g_@@_struct_abs_int }
 }
%    \end{macrocode}
% \end{macro}
% The OpenAction should only be updated if it is there. So we inspect the 
% Catalog-prop:
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_update_openaction:
  {
    \prop_get:cnNT 
     { \__kernel_pdfdict_name:n { g__pdf_Core/Catalog } }
     {OpenAction}
     \l_@@_tmpa_tl
     {
%    \end{macrocode}
% we only do something if the OpenAction is an array (as set by hyperref)
% in other cases we hope that the author knows what they did.
%    \begin{macrocode}
       \tl_if_head_eq_charcode:eNT { \tl_trim_spaces:V\l_@@_tmpa_tl } [ %]
         {
           \seq_set_split:NnV\l_@@_tmpa_seq{/}\l_@@_tmpa_tl
           \pdfmanagement_add:nne {Catalog} { OpenAction }
             {
               <<
                 /S/GoTo \c_space_tl 
                 /D~\l_@@_tmpa_tl\c_space_tl
                 /SD~[\pdf_object_ref_indexed:nn{@@/struct}{\g_@@_tree_openaction_struct_tl}
%    \end{macrocode}
% there should be always a /Fit etc in the array but better play safe here ...
%    \begin{macrocode}
                      \int_compare:nNnTF{ \seq_count:N \l_@@_tmpa_seq } > {1}
                       { /\seq_item:Nn\l_@@_tmpa_seq{2} }
                       { ] }
               >>  
             }                      
         }
     }    
  }  
%    \end{macrocode}
%
%    \begin{macrocode}
\hook_gput_code:nnn{shipout/lastpage}{tagpdf}
  {
    \bool_if:NT \g_@@_active_tree_bool
      {
        \pdfmanagement_add:nnn { Catalog / MarkInfo } { Marked } { true }
        \pdfmanagement_add:nne
          { Catalog }
          { StructTreeRoot }
          { \pdf_object_ref_indexed:nn { @@/struct } { 1 } }
        \@@_tree_update_openaction:  
      }
  }
%    \end{macrocode}
%
% \subsection{Writing the IDtree}
% 
% The ID are currently quite simple: every structure has an ID build from
% the prefix ID together with the structure number padded with enough zeros to
% that we get directly an lexical order. We ship them out in bundles 
% At first a seq to hold the references for the kids
% \begin{variable}{\g_@@_tree_id_pad_int}
%    \begin{macrocode}
\int_new:N\g_@@_tree_id_pad_int
%    \end{macrocode}
% \end{variable}
% Now we get the needed padding
%    \begin{macrocode}
\cs_generate_variant:Nn \tl_count:n {e} 
\hook_gput_code:nnn{begindocument}{tagpdf}
 {
   \int_gset:Nn\g_@@_tree_id_pad_int 
    {\tl_count:e { \@@_property_ref_lastpage:nn{tagstruct}{1000}}+1}
 }

%    \end{macrocode}
% This is the main code to write the tree it basically splits the 
% existing structure numbers in chunks of length 50 
% TODO consider is 50 is a good length.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_idtree:
  {
    \tl_clear:N \l_@@_tmpa_tl
    \tl_clear:N \l_@@_tmpb_tl
    \int_zero:N \l_@@_tmpa_int
    \int_step_inline:nnn {2} {\c@g_@@_struct_abs_int}
      {     
        \int_incr:N\l_@@_tmpa_int
        \tl_put_right:Ne \l_@@_tmpa_tl
          {
            \@@_struct_get_id:n{##1}~\pdf_object_ref_indexed:nn {@@/struct}{##1}~
          }        
        \int_compare:nNnF {\l_@@_tmpa_int}<{50} %
          {
            \pdf_object_unnamed_write:ne {dict}
              { /Limits~[\@@_struct_get_id:n{##1-\l_@@_tmpa_int+1}~\@@_struct_get_id:n{##1}]
                /Names~[\l_@@_tmpa_tl]
              }
            \tl_put_right:Ne\l_@@_tmpb_tl {\pdf_object_ref_last:\c_space_tl}   
            \int_zero:N \l_@@_tmpa_int
            \tl_clear:N \l_@@_tmpa_tl
          }
      }
     \tl_if_empty:NF \l_@@_tmpa_tl
      {
        \pdf_object_unnamed_write:ne {dict}
          {
           /Limits~
             [\@@_struct_get_id:n{\c@g_@@_struct_abs_int-\l_@@_tmpa_int+1}~
              \@@_struct_get_id:n{\c@g_@@_struct_abs_int}]
           /Names~[\l_@@_tmpa_tl]
          }
        \tl_put_right:Ne\l_@@_tmpb_tl {\pdf_object_ref_last:}     
      }
      \pdf_object_unnamed_write:ne {dict}{/Kids~[\l_@@_tmpb_tl]}
      \@@_prop_gput:cne
          { g_@@_struct_1_prop }
          { IDTree }
          { \pdf_object_ref_last: }
   }
%    \end{macrocode}
% 
% \subsection{Writing structure elements}
% The following commands are needed to write out the structure.
% \begin{macro}{\@@_tree_write_structtreeroot:}
% This writes out the root object.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_structtreeroot:
  {
     \@@_prop_gput:cne
       { g_@@_struct_1_prop }
       { ParentTree }
       { \pdf_object_ref:n { @@/tree/parenttree } }
     \@@_prop_gput:cne
       { g_@@_struct_1_prop }
       { RoleMap }
       { \pdf_object_ref:n { @@/tree/rolemap } } 
     \@@_struct_fill_kid_key:n { 1 }
     \prop_gremove:cn { g_@@_struct_1_prop } {S}   
     \@@_struct_get_dict_content:nN { 1 } \l_@@_tmpa_tl        
     \pdf_object_write_indexed:nnne
         { @@/struct } { 1 }
         {dict}
         {
          \l_@@_tmpa_tl            
         }
%    \end{macrocode}
% Better put S back, see https://github.com/latex3/tagging-project/issues/86
%    \begin{macrocode}
      \prop_gput:cnn { g_@@_struct_1_prop } {S}{ /StructTreeRoot }     
  } 
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_tree_write_structelements:}
% This writes out the other struct elems, the absolute number is in the counter.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_structelements:
  {
    \int_step_inline:nnnn {2}{1}{\c@g_@@_struct_abs_int}
      {
        \@@_struct_write_obj:n { ##1 }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{ParentTree}
% \begin{macro}{@@/tree/parenttree}
% The object which will hold the parenttree
%    \begin{macrocode}
\pdf_object_new:n { @@/tree/parenttree }
%    \end{macrocode}
% \end{macro}
% The ParentTree maps numbers to objects or (if the number represents a page)
% to arrays of objects. The numbers refer to two dictinct types of entries:
% page streams and real objects like annotations.
% The numbers must be distinct and ordered.
% So we rely on abspage for the pages and put the real objects at the end.
% We use a counter to have a chance to get the correct number
% if code is processed twice.
%
% \begin{macro}{\c@g_@@_parenttree_obj_int}
%  This is a counter for the real objects. It starts at the absolute last page
%  value. It relies on l3ref.
%    \begin{macrocode}
\newcounter  { g_@@_parenttree_obj_int }
\hook_gput_code:nnn{begindocument}{tagpdf}
  {
    \int_gset:Nn
      \c@g_@@_parenttree_obj_int
      { \@@_property_ref_lastpage:nn{abspage}{100}  }
  }
%    \end{macrocode}
% \end{macro}
% We store the number/object references in a tl-var. If more structure is needed
% one could switch to a seq.
%  \begin{variable}{ \g_@@_parenttree_objr_tl }
%    \begin{macrocode}
\tl_new:N \g_@@_parenttree_objr_tl
%    \end{macrocode}
% \end{variable}
%
%  \begin{macro}{ \@@_parenttree_add_objr:nn }
%  This command stores a StructParent number and a objref into the tl var.
%  This is only for objects like annotations, pages are handled elsewhere.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_parenttree_add_objr:nn #1 #2 %#1 StructParent number, #2 objref
  {
    \tl_gput_right:Ne \g_@@_parenttree_objr_tl
      {
        #1 \c_space_tl #2 ^^J
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{variable}{\l_@@_parenttree_content_tl}
% A tl-var which will get the page related parenttree content.
%    \begin{macrocode}
\tl_new:N \l_@@_parenttree_content_tl
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}{\@@_tree_fill_parenttree:}
% This is the main command to assemble the page related entries of the parent tree.
% It wanders through the pages and the mcid numbers and collects all mcid of one page.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_parenttree_rerun_msg: {}
\cs_new_protected:Npn \@@_tree_fill_parenttree:
  {
    \int_step_inline:nnnn{1}{1}{\@@_property_ref_lastpage:nn{abspage}{-1}} %not quite clear if labels are needed. See lua code
      { %page ##1
        \prop_clear:N \l_@@_tmpa_prop
        \int_step_inline:nnnn{1}{1}{\@@_property_ref_lastpage:nn{tagmcabs}{-1}}
          {
            %mcid####1
            \int_compare:nT
              {\property_ref:enn{mcid-####1}{tagabspage}{-1}=##1} %mcid is on current page
              {% yes
                \prop_put:Nee
                  \l_@@_tmpa_prop
                  {\property_ref:enn{mcid-####1}{tagmcid}{-1}}
                  {\prop_item:Nn \g_@@_mc_parenttree_prop {####1}}
              }
          }
        \tl_put_right:Ne\l_@@_parenttree_content_tl
          {
            \int_eval:n {##1-1}\c_space_tl
            [\c_space_tl %]
          }
        \int_step_inline:nnnn %####1
          {0}
          {1}
          { \prop_count:N \l_@@_tmpa_prop -1 }
          {
            \prop_get:NnNTF \l_@@_tmpa_prop {####1} \l_@@_tmpa_tl
              {% page#1:mcid##1:\l_@@_tmpa_tl :content
                \tl_put_right:Ne \l_@@_parenttree_content_tl
                  {
                    \prop_if_exist:cTF  { g_@@_struct_ \l_@@_tmpa_tl _prop  }
                      {
                        \pdf_object_ref_indexed:nn { @@/struct }{ \l_@@_tmpa_tl }
                      }
                      {
                        null
                      }
                    \c_space_tl
                  }
              }
              {
                \cs_set_protected:Npn \@@_tree_parenttree_rerun_msg:
                 {
                   \msg_warning:nn { tag } {tree-mcid-index-wrong}
                 }   
              }
          }
        \tl_put_right:Nn
          \l_@@_parenttree_content_tl
          {%[
            ]^^J
          }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_tree_lua_fill_parenttree:}
% This is a special variant for luatex.
% lua mode must/can do it differently.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_lua_fill_parenttree:
  {
    \tl_set:Nn \l_@@_parenttree_content_tl
      {
        \lua_now:e
          {
            ltx.@@.func.output_parenttree
              (
                \int_use:N\g_shipout_readonly_int
              )
          }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\@@_tree_write_parenttree:}
% This combines the two parts and writes out the object.
% TODO should the check for lua be moved into the backend code?
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_parenttree:
  {
    \bool_if:NTF \g_@@_mode_lua_bool
      {
        \@@_tree_lua_fill_parenttree:
      }
      {
        \@@_tree_fill_parenttree:
      }
    \@@_tree_parenttree_rerun_msg:  
    \tl_put_right:NV \l_@@_parenttree_content_tl\g_@@_parenttree_objr_tl
    \pdf_object_write:nne  { @@/tree/parenttree }{dict}
      {
        /Nums\c_space_tl [\l_@@_parenttree_content_tl]
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Rolemap dictionary}
% The Rolemap dictionary describes relations between new tags and standard types.
% The main part here is handled in the role module, here we only define the
% command which writes it to the PDF.
% \begin{variable}{@@/tree/rolemap}
% At first we reserve again an object. 
% Rolemap is also used in PDF 2.0 as a fallback.
%    \begin{macrocode}
\pdf_object_new:n { @@/tree/rolemap }
%    \end{macrocode}
% \end{variable}
%
% \begin{macro}{\@@_tree_write_rolemap:}
% This writes out the rolemap, basically it simply pushes out
% the dictionary which has been filled in the role module.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_rolemap:
 {     
   \bool_if:NT \g_@@_role_add_mathml_bool 
     { 
       \prop_map_inline:Nn \g_@@_role_NS_mathml_prop
        {
          \prop_gput:Nnn \g_@@_role_rolemap_prop {##1}{Span}
        }
     }
   \prop_map_inline:Nn\g_@@_role_rolemap_prop
     {
       \tl_if_eq:nnF {##1}{##2}
        {
          \pdfdict_gput:nne {g_@@_role/RoleMap_dict}
           {##1}
           {\pdf_name_from_unicode_e:n{##2}}
        }   
     }
   \pdf_object_write:nne  { @@/tree/rolemap }{dict}
    {
     \pdfdict_use:n{g_@@_role/RoleMap_dict}
    }
 }
%    \end{macrocode}
% \end{macro}
%
% \subsection{Classmap dictionary}
% Classmap and attributes are setup in the struct module, here is only the
% code to write it out. It should only done if values have been used.
% \begin{macro}{\@@_tree_write_classmap:}
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_classmap:
  {
    \tl_clear:N \l_@@_tmpa_tl
%    \end{macrocode}
% We process the older sec for compatibility with the table code. 
% TODO: check if still needed
%    \begin{macrocode}
    \seq_map_inline:Nn \g_@@_attr_class_used_seq
     {
       \prop_gput:Nnn \g_@@_attr_class_used_prop {##1}{}
     }
    \prop_map_inline:Nn \g_@@_attr_class_used_prop
     {
       \tl_put_right:Ne \l_@@_tmpa_tl
        {
          ##1\c_space_tl
          <<
           \prop_item:Nn
            \g_@@_attr_entries_prop
            {##1}
          >>
         \iow_newline:
        }
     }
    \tl_if_empty:NF
      \l_@@_tmpa_tl
      {
        \pdf_object_new:n { @@/tree/classmap }
        \pdf_object_write:nne
          { @@/tree/classmap }
          {dict}
          { \l_@@_tmpa_tl }
        \@@_prop_gput:cne
          { g_@@_struct_1_prop }
          { ClassMap }
          { \pdf_object_ref:n { @@/tree/classmap }  }
      }
  }
%    \end{macrocode}
% \end{macro}
% \subsection{Namespaces}
% Namespaces are handle in the role module, here is the code to write them out.
% Namespaces are only relevant for pdf2.0.
% \begin{variable}{@@/tree/namespaces}
%    \begin{macrocode}
\pdf_object_new:n { @@/tree/namespaces }
%    \end{macrocode}
% \end{variable}
% \begin{macro}{\@@_tree_write_namespaces:}
%    \begin{macrocode}
\cs_new_protected:Npn \@@_tree_write_namespaces:
  {
   \pdf_version_compare:NnF < {2.0}
    {      
      \prop_map_inline:Nn \g_@@_role_NS_prop
        {
          \pdfdict_if_empty:nF {g_@@_role/RoleMapNS_##1_dict}
            {
              \pdf_object_write:nne {@@/RoleMapNS/##1}{dict}
                {
                  \pdfdict_use:n {g_@@_role/RoleMapNS_##1_dict}
                }
              \pdfdict_gput:nne{g_@@_role/Namespace_##1_dict}
                {RoleMapNS}{\pdf_object_ref:n {@@/RoleMapNS/##1}}
            }
          \pdf_object_write:nne{tag/NS/##1}{dict}
            {
               \pdfdict_use:n {g_@@_role/Namespace_##1_dict}
            }
        }
      \pdf_object_write:nne {@@/tree/namespaces}{array}
        {
          \prop_map_tokens:Nn \g_@@_role_NS_prop{\use_ii:nn}
        }
    }  
  }
%    \end{macrocode}
% \end{macro}
% \subsection{Finishing the structure}
% This assembles the various parts.
% TODO (when tabular are done or if someone requests it): IDTree
%  \begin{macro}{ \@@_finish_structure: }
%    \begin{macrocode}
\hook_new:n {tagpdf/finish/before}
\cs_new_protected:Npn \@@_finish_structure:
  {
    \bool_if:NT\g_@@_active_tree_bool
      {
        \hook_use:n {tagpdf/finish/before}
        \@@_tree_final_checks:
        \iow_term:n{Package~tagpdf~Info:~writing~ParentTree}
        \@@_check_benchmark_tic:
        \@@_tree_write_parenttree:
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~IDTree}
        \@@_check_benchmark_tic:
        \@@_tree_write_idtree:
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~RoleMap}
        \@@_check_benchmark_tic:
        \@@_tree_write_rolemap:
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~ClassMap}
        \@@_check_benchmark_tic:
        \@@_tree_write_classmap:
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~NameSpaces}
        \@@_check_benchmark_tic:
        \@@_tree_write_namespaces:
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~StructElems}
        \@@_check_benchmark_tic:
        \@@_tree_write_structelements: %this is rather slow!!
        \@@_check_benchmark_toc:
        \iow_term:n{Package~tagpdf~Info:~writing~Root}
        \@@_check_benchmark_tic:
        \@@_tree_write_structtreeroot:
        \@@_check_benchmark_toc:
      }
  }
%</package>
%    \end{macrocode}
% \end{macro}
%
% \subsection{StructParents entry for Page}
% We need to add to the Page resources the |StructParents| entry, this is simply the
% absolute page number.
%    \begin{macrocode}
%<*package>
\hook_gput_code:nnn{begindocument}{tagpdf}
  {
    \bool_if:NT\g_@@_active_tree_bool
      {
       \hook_gput_code:nnn{shipout/before} { tagpdf/structparents }
         {
           \pdfmanagement_add:nne
             { Page }
             { StructParents }
             { \int_eval:n { \g_shipout_readonly_int} }
         }
      }
  }
%</package>
%    \end{macrocode}
% \end{implementation}
% \PrintIndex