% \iffalse meta-comment
%
%% File: tagpdf-mc-luacode.dtx
%
% Copyright (C) 2019-2024 Ulrike Fischer
%
% It may be distributed and/or modified under the conditions of the
% LaTeX Project Public License (LPPL), either version 1.3c of this
% license or (at your option) any later version.  The latest version
% of this license is in the file
%
%    https://www.latex-project.org/lppl.txt
%
% This file is part of the "tagpdf bundle" (The Work in LPPL)
% and all files in that bundle must be distributed together.
%
% -----------------------------------------------------------------------
%
% The development version of the bundle can be found at
%
%    https://github.com/latex3/tagpdf
%
% for those people who are interested.
%<*driver>
\DocumentMetadata{}
\documentclass{l3doc}
\usepackage{array,booktabs,caption}
\hypersetup{pdfauthor=Ulrike Fischer,
 pdftitle=tagpdf-mc module (tagpdf)}
\begin{document}
  \DocInput{\jobname.dtx}
\end{document}
%</driver>
% \fi
% \title{^^A
%   The \pkg{tagpdf-mc-luacode} module\\ Code related to Marked Content (mc-chunks), luamode-specific  ^^A
%   \\ Part of the tagpdf package
% }
%
% \author{^^A
%  Ulrike Fischer\thanks
%    {^^A
%      E-mail:
%        \href{mailto:fischer@troubleshooting-tex.de}
%          {fischer@troubleshooting-tex.de}^^A
%    }^^A
% }
%
% \date{Version 0.99f, released 2024-09-16}
% \maketitle
% \begin{implementation}
% The code is split into three parts: code shared by all engines,
% code specific to luamode and code not used by luamode.
%
% \section{Marked content code -- luamode code}
% luamode uses attributes to mark mc-chunks. The two attributes used are
% defined in the backend file. The backend also load the lua file, as it can contain
% functions needed elsewhere.
% The attributes for mc are global (between 0.6 and 0.81 they were local but this
% was reverted). The attributes are setup only in lua, and one should use
% the lua functions to set and get them.\\
% |g_@@_mc_type_attr|:  the value represent the type\\
% |g_@@_mc_cnt_attr|:   will hold the |\c@g_@@_MCID_abs_int value|
%
% Handling attribute needs a different system to number the page wise mcid's:
% a |\tagmcbegin ... \tagmcend| pair no longer surrounds exactly one mc chunk:
% it can be split at page breaks.
% We know the included mcid(s) only after the ship out.
% So for the |struct -> mcid| mapping we
% need to record |struct -> mc-cnt| (in |\g_@@_mc_parenttree_prop| and/or a lua table
% and at shipout |mc-cnt-> {mcid, mcid, ...}|
% and when building the trees connect both.
%
% Key definitions are overwritten for luatex to store that data in lua-tables.
% The data for the mc are in |ltx.@@.mc[absnum]|.
% The fields of the table are:\\
% tag : the type (a string)\\
% raw : more properties (string)\\
% label: a string. \\
% artifact: the presence indicates an artifact, the value (string) is the type.\\
% kids: a array of tables\\
% |{1={kid=num2,page=pagenum1}, 2={kid=num2,page=pagenum2},...}|,\\
% this describes the chunks the mc has been split to by the traversing code\\
% parent: the number of the structure it is in. Needed to build the parent tree.
%
%    \begin{macrocode}
%<@@=tag>
%<*luamode>
\ProvidesExplPackage {tagpdf-mc-code-lua} {2024-09-16} {0.99f}
  {tagpdf - mc code only for the luamode }
%</luamode>
%<*debug>
\ProvidesExplPackage {tagpdf-debug-lua} {2024-09-16} {0.99f}
 {part of tagpdf - debugging code related to marking chunks - lua mode}
%</debug>
%    \end{macrocode}
% The main function which wanders through the shipout box to inject the literals.
% if the new callback is there, it is used.
%    \begin{macrocode}
%<*luamode>
\hook_gput_code:nnn{begindocument}{tagpdf/mc}
  {
    \bool_if:NT\g__tag_active_space_bool
      {
        \lua_now:e
          {
            if~luatexbase.callbacktypes.pre_shipout_filter~then~
              luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~
              ltx.@@.func.space_chars_shipout(TAGBOX)~return~true~
              end, "tagpdf")~
              if~luatexbase.declare_callback_rule~then~
                luatexbase.declare_callback_rule("pre_shipout_filter", "luaotfload.dvi", "after", "tagpdf")
              end~
            end
          }
       \lua_now:e
         {
           if~luatexbase.callbacktypes.pre_shipout_filter~then~
           token.get_next()~
           end
         }\@secondoftwo\@gobble
           {
             \hook_gput_code:nnn{shipout/before}{tagpdf/lua}
               {
                \lua_now:e
                   { ltx.@@.func.space_chars_shipout (tex.box["ShipoutBox"]) }
               }
           }
      }
    \bool_if:NT\g__tag_active_mc_bool
      {
        \lua_now:e
          {
            if~luatexbase.callbacktypes.pre_shipout_filter~then~
              luatexbase.add_to_callback("pre_shipout_filter", function(TAGBOX)~
              ltx.@@.func.mark_shipout(TAGBOX)~return~true~
              end, "tagpdf")~
            end
          }
       \lua_now:e
         {
           if~luatexbase.callbacktypes.pre_shipout_filter~then~
           token.get_next()~
           end
         }\@secondoftwo\@gobble
           {
             \hook_gput_code:nnn{shipout/before}{tagpdf/lua}
               {
                 \lua_now:e
                   { ltx.@@.func.mark_shipout (tex.box["ShipoutBox"]) }
               }
           }
      }
  }
%    \end{macrocode}
% \subsection{Commands}
%  \begin{macro}{\@@_add_missing_mcs_to_stream:Nn}
%    This command is used in the output routine by the ptagging code.
%    It should do nothing in luamode.
%    \begin{macrocode}
\cs_new_protected:Npn \@@_add_missing_mcs_to_stream:Nn #1#2 {}
%    \end{macrocode}
% \end{macro}
% \begin{macro}[pTF]{\@@_mc_if_in:,\tag_mc_if_in:}
% This tests, if we are in an mc, for attributes
% this means to check against a number.
%    \begin{macrocode}
\prg_new_conditional:Nnn \@@_mc_if_in: {p,T,F,TF}
  {
    \int_compare:nNnTF
      { -2147483647 }
      =
      {\lua_now:e
         {
           tex.print(\int_use:N \c_document_cctab,tex.getattribute(luatexbase.attributes.g__tag_mc_type_attr))
         }
      }
      { \prg_return_false:  }
      { \prg_return_true: }
  }

\prg_new_eq_conditional:NNn \tag_mc_if_in: \@@_mc_if_in: {p,T,F,TF}
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}
%  {
%     \@@_mc_lua_set_mc_type_attr:n
%    ,\@@_mc_lua_set_mc_type_attr:o
%    ,\@@_mc_lua_unset_mc_type_attr:
%  }
% This takes a tag name, and sets the attributes globally to the related number.
%    \begin{macrocode}
\cs_new:Nn \@@_mc_lua_set_mc_type_attr:n % #1 is a tag name
  {
    %TODO ltx.@@.func.get_num_from("#1") seems not to return a suitable number??
    \tl_set:Ne\l_@@_tmpa_tl{\lua_now:e{ltx.@@.func.output_num_from ("#1")} }
    \lua_now:e
      {
        tex.setattribute
         (
          "global",
          luatexbase.attributes.g_@@_mc_type_attr,
          \l_@@_tmpa_tl
         )
      }
    \lua_now:e
      {
        tex.setattribute
         (
           "global",
           luatexbase.attributes.g_@@_mc_cnt_attr,
           \@@_get_mc_abs_cnt:
         )
      }
  }

\cs_generate_variant:Nn\@@_mc_lua_set_mc_type_attr:n { o }

\cs_new:Nn \@@_mc_lua_unset_mc_type_attr:
  {
    \lua_now:e
      {
        tex.setattribute
          (
            "global",
            luatexbase.attributes.g_@@_mc_type_attr,
            -2147483647
          )
      }
     \lua_now:e
      {
        tex.setattribute
          (
            "global",
            luatexbase.attributes.g_@@_mc_cnt_attr,
            -2147483647
          )
      }
  }

%    \end{macrocode}
% \end{macro}
% \begin{macro}
%  {
%    \@@_mc_insert_mcid_kids:n
%   ,\@@_mc_insert_mcid_single_kids:n
%  }
% These commands will in the finish code replace the dummy for a mc by the real mcid kids
% we need a variant for the case that it is the only kid, to get the array right
%    \begin{macrocode}
\cs_new:Nn \@@_mc_insert_mcid_kids:n
  {
    \lua_now:e { ltx.@@.func.mc_insert_kids (#1,0) }
  }

\cs_new:Nn \@@_mc_insert_mcid_single_kids:n
  {
    \lua_now:e {ltx.@@.func.mc_insert_kids (#1,1) }
  }
%    \end{macrocode}
% \end{macro}
%
%\begin{macro}{\@@_mc_handle_stash:n,\@@_mc_handle_stash:e}
% This is the lua variant for the command to
% put an mcid absolute number in the current structure.
%    \begin{macrocode}
%</luamode>
%<*luamode|debug>
%<luamode>\cs_new_protected:Npn \@@_mc_handle_stash:n #1 %1 mcidnum
%<debug>\cs_set_protected:Npn \@@_mc_handle_stash:n #1 %1 mcidnum
  {
    \@@_check_mc_used:n { #1 }
    \seq_gput_right:cn % Don't fill a lua table due to the command in the item,
                       % so use the kernel command
      { g_@@_struct_kids_\g_@@_struct_stack_current_tl _seq }
      {
        \@@_mc_insert_mcid_kids:n {#1}%
      }
%<debug>    \seq_gput_right:cn % Don't fill a lua table due to the command in the item,
%<debug>                       % so use the kernel command
%<debug>      { g_@@_struct_debug_kids_\g_@@_struct_stack_current_tl _seq }
%<debug>      {
%<debug>        MC~#1%
%<debug>      }      
    \lua_now:e
      {
        ltx.@@.func.store_struct_mcabs
          (
            \g_@@_struct_stack_current_tl,#1
          )
      }
    \prop_gput:Nee
      \g_@@_mc_parenttree_prop
      { #1 }
      { \g_@@_struct_stack_current_tl }
  }
%</luamode|debug>
%<*luamode>
\cs_generate_variant:Nn \@@_mc_handle_stash:n { e }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\tag_mc_begin:n}
% This is the lua version of the user command.
% We currently don't check if there is nesting as it doesn't matter so
% much in lua.
%    \begin{macrocode}
\cs_set_protected:Nn \tag_mc_begin:n
  {
    \@@_check_if_active_mc:T
      {
        \group_begin:
        %\@@_check_mc_if_nested:
        \bool_gset_true:N \g_@@_in_mc_bool
        \bool_set_false:N\l_@@_mc_artifact_bool
        \tl_clear:N \l_@@_mc_key_properties_tl
        \int_gincr:N \c@g_@@_MCID_abs_int
%    \end{macrocode}
% set the default tag to the structure:
%    \begin{macrocode}
        \tl_set_eq:NN \l_@@_mc_key_tag_tl \g_@@_struct_tag_tl
        \tl_gset_eq:NN\g_@@_mc_key_tag_tl \g_@@_struct_tag_tl  
        \lua_now:e
          {
            ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"tag","\g_@@_struct_tag_tl")
          }                     
        \keys_set:nn { @@ / mc }{ label={}, #1 }
        %check that a tag or artifact has been used
        \@@_check_mc_tag:N \l_@@_mc_key_tag_tl
        %set the attributes:
        \@@_mc_lua_set_mc_type_attr:o  { \l_@@_mc_key_tag_tl }
        \bool_if:NF \l_@@_mc_artifact_bool
          { % store the absolute num name in a label:
            \tl_if_empty:NF {\l_@@_mc_key_label_tl}
              {
                \exp_args:NV
                 \@@_mc_handle_mc_label:e  \l_@@_mc_key_label_tl
              }
           % if not stashed record the absolute number
            \bool_if:NF \l_@@_mc_key_stash_bool
              {
                \exp_args:NV\@@_struct_get_parentrole:nNN 
                  \g_@@_struct_stack_current_tl 
                  \l_@@_get_parent_tmpa_tl
                  \l_@@_get_parent_tmpb_tl
                \@@_check_parent_child:VVnnN
                  \l_@@_get_parent_tmpa_tl
                  \l_@@_get_parent_tmpb_tl
                  {MC}{}
                  \l_@@_parent_child_check_tl 
                \int_compare:nNnT {\l_@@_parent_child_check_tl}<{0}
                 {
                   \prop_get:cnN 
                    { g_@@_struct_ \g_@@_struct_stack_current_tl _prop}
                    {S}
                    \l_@@_tmpa_tl
                   \msg_warning:nneee 
                     { tag } 
                     {role-parent-child} 
                     { \l_@@_get_parent_tmpa_tl/\l_@@_get_parent_tmpb_tl  } 
                     { MC~(real content) }
                     { 
                       not~allowed~
                       (struct~\g_@@_struct_stack_current_tl,~\l_@@_tmpa_tl)
                     } 
                 }    
                \@@_mc_handle_stash:e { \@@_get_mc_abs_cnt: }
              }
          }
        \group_end:
     }
  }
%    \end{macrocode}
% \end{macro}
% \begin{macro}{\tag_mc_end:}
%
% TODO: check how the use command must be guarded.
%    \begin{macrocode}
\cs_set_protected:Nn \tag_mc_end:
  {
    \@@_check_if_active_mc:T
      {
        %\@@_check_mc_if_open:
        \bool_gset_false:N \g_@@_in_mc_bool
        \bool_set_false:N\l_@@_mc_artifact_bool
        \@@_mc_lua_unset_mc_type_attr:
        \tl_set:Nn  \l_@@_mc_key_tag_tl { }
        \tl_gset:Nn \g_@@_mc_key_tag_tl { }
      }
  }
%    \end{macrocode}
% \end{macro}
%
% \begin{macro}{\tag_mc_reset_box:N}
% This allows to reset the mc-attributes in box. On base and generic mode it should do 
% nothing.
%    \begin{macrocode}
\cs_set_protected:Npn \tag_mc_reset_box:N #1
  {
    \lua_now:e
     {
       local~type=tex.getattribute(luatexbase.attributes.g_@@_mc_type_attr)
       local~mc=tex.getattribute(luatexbase.attributes.g_@@_mc_cnt_attr)
       ltx.__tag.func.update_mc_attributes(tex.getbox(\int_use:N #1),mc,type)
     }
  }
%    \end{macrocode}
% \end{macro}
% 
% \begin{macro}{ \@@_get_data_mc_tag: }
% The command to retrieve the current mc tag.
% TODO: Perhaps this should use the attribute instead.
%    \begin{macrocode}
\cs_new:Npn \@@_get_data_mc_tag: { \g_@@_mc_key_tag_tl }
%    \end{macrocode}
% \end{macro}

% \subsection{Key definitions}
% \begin{macro}
%   {
%    tag (mc-key),
%    raw (mc-key),
%    alt (mc-key),
%    actualtext (mc-key),
%    label (mc-key),
%    artifact (mc-key)
%   }
%   TODO: check conversion, check if local/global setting is right.
%    \begin{macrocode}
\keys_define:nn { @@ / mc }
  {
    tag .code:n = %
      {
        \tl_set:Ne   \l_@@_mc_key_tag_tl { #1 }
        \tl_gset:Ne  \g_@@_mc_key_tag_tl { #1 }
        \lua_now:e
          {
            ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"tag","#1")
          }
      },
    raw .code:n =
      {
        \tl_put_right:Ne \l_@@_mc_key_properties_tl { #1 }
        \lua_now:e
          {
            ltx.@@.func.store_mc_data(\@@_get_mc_abs_cnt:,"raw","#1")
          }
      },
    alt .code:n      = % Alt property
      {
        \tl_if_empty:oF{#1}
          {
            \str_set_convert:Noon
              \l_@@_tmpa_str
              { #1 }
              { default }
              { utf16/hex }
            \tl_put_right:Nn \l_@@_mc_key_properties_tl { /Alt~< }
            \tl_put_right:No \l_@@_mc_key_properties_tl { \l_@@_tmpa_str>~ }
            \lua_now:e
              {
                ltx.@@.func.store_mc_data
                  (
                    \@@_get_mc_abs_cnt:,"alt","/Alt~<\str_use:N \l_@@_tmpa_str>"
                  )
              }
           }  
      },
    alttext .meta:n = {alt=#1},  
    actualtext .code:n      = % Alt property
      {
        \tl_if_empty:oF{#1}
          {
            \str_set_convert:Noon
              \l_@@_tmpa_str
              { #1 }
              { default }
              { utf16/hex }
            \tl_put_right:Nn \l_@@_mc_key_properties_tl { /Alt~< }
            \tl_put_right:No \l_@@_mc_key_properties_tl { \l_@@_tmpa_str>~ }
            \lua_now:e
              {
                ltx.@@.func.store_mc_data
                  (
                    \@@_get_mc_abs_cnt:,
                    "actualtext",
                    "/ActualText~<\str_use:N \l_@@_tmpa_str>"
                  )
              }
          }  
      },
    label .code:n =
      {
        \tl_set:Nn\l_@@_mc_key_label_tl { #1 }
        \lua_now:e
          {
            ltx.@@.func.store_mc_data
              (
                \@@_get_mc_abs_cnt:,"label","#1"
              )
          }
      },
    __artifact-store .code:n =
      {
        \lua_now:e
          {
            ltx.@@.func.store_mc_data
              (
                \@@_get_mc_abs_cnt:,"artifact","#1"
              )
          }
      },
    artifact .code:n       =
      {
        \exp_args:Nne
          \keys_set:nn
            { @@ / mc}
            { __artifact-bool, __artifact-type=#1, tag=Artifact }
        \exp_args:Nne
          \keys_set:nn
            { @@ / mc }
            { __artifact-store=\l_@@_mc_artifact_type_tl }
      },
    artifact .default:n    = { notype }
  }

%</luamode>
%    \end{macrocode}
% \end{macro}
%
% \end{implementation}
% \PrintIndex