const { hypherDehyphenate } = require("./hyphenate.js");

// just a helper to turn a query snapshot from fb into an array
function checkLines(node) {
  let myType = node.getAttribute("data-hederis-type");
  let isLine = false;
  if (
    (myType && myType.match("^hspa?n")) ||
    node.className.indexOf("textmanipulation") > -1
  ) {
    let myparent = node.parentNode;
    while (myparent && myparent.nodeType != 1) {
      myparent = myparent.parentNode;
    }
    myType = myparent.getAttribute("data-hederis-type");
    while (
      myparent &&
      (!myType || !myType.match("hblk")) &&
      (!myType || !myType.match("hwpr")) &&
      (!myType || !myType.match("hsec")) &&
      !isLine
    ) {
      if ((" " + myparent.className + " ").indexOf(" line ") > -1) {
        isLine = true;
      }
      myparent = myparent.parentNode;
      if (myparent) {
        myType = myparent.getAttribute("data-hederis-type");
      }
    }
  }
  return isLine;
}

function textNodesUnder(node) {
  try {
    var all = [];
    for (node = node.firstChild; node; node = node.nextSibling) {
      if (node && node.nodeType == 3) all.push(node);
      else all = all.concat(textNodesUnder(node));
    }
    return all;
  } catch (error) {
    console.error(error);
  }
}

function createMarker(el, counter, doc) {
  let marker = doc.createElement(el);
  //marker.appendChild(document.createTextNode(counter));
  marker.setAttribute("role", "doc-pagebreak");
  marker.setAttribute("class", "doc-pagebreak");
  marker.setAttribute("id", `pg${counter}`);
  marker.setAttribute("aria-label", counter);
  marker.appendChild(doc.createTextNode(counter));
  return marker;
}

function stripPageBreaks(blks) {
  blks.map(para => {
    let lines = para.querySelectorAll("span.line");
    if (lines.length > 0) {
      let lastline = lines[lines.length - 1];
      if (lastline.className.match(/pageBreakAfter/g)) {
        para.className = para.className.replace(/\bpageBreakAfter\b/g, "");
        para.className = para.className.trim();
      }
    }
  });
}

function stripLetterSpacing(blks) {
  blks.map(para => {
    para.style.letterSpacing = null;
  })
}

function stripImageSizing(imgs) {
  imgs.map(img => {
    img.removeAttribute("height");
    img.removeAttribute("width");
    img.removeAttribute("data-height");
    img.removeAttribute("data-width");
  });
}

function stripBlanksAfterImages(blanks) {
  for (var i = 0; i < blanks.length; ++i) {
    var blank = blanks[i];
    if (!blank.getAttribute("class") || blank.getAttribute("class") === "") {
      blank.remove();
    }
  }
}

function stripTBody(table_bodies) {
  for (var i = 0; i < table_bodies.length; ++i) {
    var tbody = table_bodies[i];
    tbody.replaceWith(...tbody.childNodes);
  }
}

function restoreFootnotes(footnote_markers, footnotes) {
  for (var i = 0; i < footnote_markers.length; ++i) {
    var note = footnote_markers[i];
    var sourceid = note.getAttribute("href").replace("#","");
    if (sourceid) {
      for (var j = 0; j < footnotes.length; ++j) {
        if (footnotes[j].id && footnotes[j].id === sourceid) {
          note.replaceWith(footnotes[j]);
          note.remove();
        }
      }
    }
  }
}

function checkAttributeMatch(source, comp, attr) {
  let isMatch = false;
  let srcAttr = source.getAttribute(attr);
  let compAttr = comp.getAttribute(attr);
  if (srcAttr) {
    srcAttr = srcAttr.trim();
  }
  if (compAttr) {
    compAttr = compAttr.trim();
  }
  if (
    (srcAttr === compAttr) ||
    (!srcAttr && !compAttr)
  ) {
    isMatch = true;
  }
  return isMatch;
}

function mergeSpans(spans) {
  // process a reversed list
  try {
    for (var i = 0; i < spans.length; ++i) {
      var span = spans[i];
      var prev = span.previousSibling;
      var canMerge = true;
      if (prev) {
        // make sure prev is the same element type
        if (prev.nodeType !== 1) {
          canMerge = false;
        } else {
          // see if it can be merged
          if (!checkAttributeMatch(span, prev, "style")) {
            canMerge = false;
          } else if (!checkAttributeMatch(span, prev, "data-id")) {
            canMerge = false;
          } else if (!checkAttributeMatch(span, prev, "class")) {
            canMerge = false;
          } else if (!checkAttributeMatch(span, prev, "data-hederis-type")) {
            canMerge = false;
          } else if (!checkAttributeMatch(span, prev, "data-pi-attrs")) {
            canMerge = false;
          }
        }
        if (canMerge) {
          prev.innerHTML = prev.innerHTML + span.innerHTML;
          span.remove();
        }
      }
    }
  } catch (error) {
    console.error(error);
  }
}

// if an inline is not nested inside of an hblk, move it into one
function nestInlines(inlines) {
  var inlineTypes = ["span", "em", "strong", "i", "b", "img"];
  try {
    for (var i = 0; i < inlines.length; ++i) {
      var span = inlines[i];
      var prev = span.previousElementSibling;
      if (prev && prev.getAttribute("data-hederis-type") && prev.getAttribute("data-hederis-type").indexOf("hblk") > -1) {
        prev.appendChild(span);
      } else {
        var newp = document.createElement("p");
        newp.setAttribute("data-hederis-type", "hblkp");
        span.after(newp);
        newp.appendChild(span);
      }
    }
  } catch (error) {
    console.error(error);
  }
}

function unpage_html(input_html, css_content, for_epub=false, doc=document) {
  console.log("running unpage");
  let regex = new RegExp("#[a-zA-Z0-9_-]+\\s*{", "g");
  let raw_ids = css_content.match(regex);
  let re = new RegExp("\\s*{", "g");
  let id_list = [];
  if (raw_ids) {
    id_list = raw_ids.map(x => x.replace("#", "").replace(re, ""));
  }

  let frag = doc.createDocumentFragment();
  // we can't set innerHTML directly on a document frag, hence the wrap
  let wrap = doc.createElement("div");
  wrap.id = "wrap";
  wrap.innerHTML = input_html;
  frag.appendChild(wrap);

  // document.querySelectorAll("[data-split-to]:not([data-split-from])")
  // which should be the same as "[data-split-original]"

  if (for_epub) {
    let pages = to_arr(frag.querySelectorAll(".pagedjs_page"));
    let skip = ["hblkimg"];

    // for some reason two extra pages are getting added
    // to the beginning of the epub html;
    // this gets rid of them.
    let remove = 0;
    for (let i = 0; i < pages.length; i++) {
      if (pages[i].classList.contains("pagedjs_named_page")) {
        break;
      } else {
        remove += 1;
      }
    }
    pages.splice(0, remove);

    // start counting pages, for numbering.
    // TKTK eventually replace this with the actual page number.
    let counter = 1;
    let prev_was_eonly = false;
    
    pages.map(page => {
      let ebook_only = page.querySelector("*[data-hederis-type^=hsec][data-pi-format=ebook]");
      // skip if the whole page was e-only, 
      // or if its a blank page following an e-only section.
      if (!ebook_only && !(prev_was_eonly && page.classList.contains("pagedjs_blank_page"))) {
        let page_content = page.querySelector(".pagedjs_page_content > div");
        // counter = page.getAttribute("data-page-number");
        if (page_content) {
          let marker = createMarker("span", counter, doc);
          // get the first element on each page. 
          let firstBlockOnPage = page.querySelector("*[data-hederis-type^=hblk]");
          // find the first word inside that element;
          // if it has no words, just add the page marker inside it
          if (firstBlockOnPage) {
            if (
              firstBlockOnPage.getAttribute("data-hederis-type") && 
              skip.includes(firstBlockOnPage.getAttribute("data-hederis-type"))
            ) {
              marker = createMarker("div", counter, doc);
              firstBlockOnPage.parentNode.insertBefore(marker, firstBlockOnPage);
            } else if (!firstBlockOnPage.textContent || firstBlockOnPage.textContent === "") {
              firstBlockOnPage.appendChild(marker);
            } else if (
              !firstBlockOnPage.closest("section[data-hederis-type^=hsec][data-split-from]") || 
              !firstBlockOnPage.closest("section[data-hederis-type^=hwpr][data-split-from]")
            ) { 
              firstBlockOnPage.insertBefore(marker, firstBlockOnPage.firstChild);
            } else {
              // else insert the page marker after the first word of that element (to account for hyphens)
              // get the element children
              let els = firstBlockOnPage.querySelectorAll("*"),
                res = Array.from(els).find(v => v.textContent && v.textContent != "");

              let firstChild = res;

              if (res && res.length > 1) {
                firstChild = res[0];
              } else {
                firstChild = firstBlockOnPage.firstChild;
              }

              if (firstChild) {
                if (firstChild.nodeType === 3) {
                  let par = firstChild.parentNode;
                  let insertionPoint = firstChild.textContent.indexOf(" ");
                  if (insertionPoint >= 0) {
                    par.insertBefore(marker, firstChild.splitText(insertionPoint));
                  } else {
                    if (par.nextSibling) {
                      par.parentNode.insertBefore(marker, par.nextSibling);
                    } else {
                      par.appendChild(marker)
                    }
                  }          
                } else {
                  firstChild.appendChild(marker);
                }
              }
            }
          }
        }
        counter += 1;
        prev_was_eonly = false;
      } else {
        prev_was_eonly = true;
      }
    });
  }

  frag = hypherDehyphenate(frag);

  let adjustments = to_arr(frag.querySelectorAll(".textmanipulation"));

  adjustments.map(span => {
    if (span.childNodes.length <= 0) {
      span.remove();
    } else if (
      span.getAttribute("style") &&
      span.getAttribute("style").match(/letter-spacing:\s*-/g)
    ) {
      span.setAttribute("data-hederis-type", "hspantighten");
    } else if (
      span.getAttribute("style") &&
      span.getAttribute("style").match(/letter-spacing:\s*\d/g)
    ) {
      span.setAttribute("data-hederis-type", "hspanloosen");
    }
  });

  // breaks.map(para => {
  //   if (
  //     para.className.indexOf("continued") > -1 ||
  //     para.className.indexOf("continuation") > -1
  //   ) {
  //     para.className = para.className
  //       .replace("bakedPageBreakAfter", "")
  //       .trim();
  //   }
  // });

  let breaks = to_arr(frag.querySelectorAll(".bakedPageBreakAfter"));

  breaks.map(para => {
    if (
      para.className.indexOf("continued") > -1 ||
      para.className.indexOf("continuation") > -1
    ) {
      para.className = para.className
        .replace(/\bbakedPageBreakAfter\b/g, " ")
        .trim();
    }
  });

  let pagebrs = frag.querySelectorAll("[data-break-type=hspanpagebrafter]:not([data-hederis-type^=hblk])");

  Array.from(pagebrs).forEach(el => {
    // see if the parent is a line with the pagebr class
    let mypar = el.parentNode;
    if (mypar) {
      if (mypar.nodeType === 3) {
        mypar = mypar.parentNode;
      }
      if (
        mypar &&
        (!mypar.getAttribute("data-hederis-type") || 
         !mypar.getAttribute("data-hederis-type").match("^hblk"))
      ) {
        mypar = mypar.closest("[data-hederis-type^=hblk]");
      }
      if (mypar && mypar.querySelectorAll("span.line.hspanpagebrafter").length <= 0) {
        if (!el.className.match("hspanpagebrafter")) {
          el.classList.add("hspanpagebrafter");
        }
      }
    }
  });

  let linebrs = frag.querySelectorAll("[data-break-type=hspanbrafter]:not([data-hederis-type^=hblk])");

  Array.from(linebrs).forEach(el => {
    // see if the parent is a line with the pagebr class
    let mypar = el.parentNode;
    while (
      mypar && 
      (!mypar.getAttribute("data-hederis-type") ||
       !mypar.getAttribute("data-hederis-type").match("^hblk"))
    ) {
      mypar = mypar.parentNode;
    }
    if (mypar && mypar.querySelectorAll("span.line.hspanbrafter").length <= 0) {
      if (!el.className.match("hspanbrafter")) {
        el.classList.add("hspanbrafter");
      }
    }
  });

  let hyphenated = to_arr(frag.querySelectorAll(".pagedjs_hyphen"));

  hyphenated.map(orig => {
    // only run on the unbaked html:
    let lines = orig.querySelectorAll("span.line");
    let processed = checkLines(orig);

    if (lines.length <= 0 && !processed) {
      // find the last child within the node that
      // either is a text node or has text inside it;
      // replace the final hyphen.
      let lastChild = "";
      let allText = orig.childNodes;
      if (!allText) {
        lastChild = undefined;
      } else {
        lastChild = allText[allText.length - 1];
      }
      let counter = 2;
      // if the last child is a pagemarker, move back.
      if (
        lastChild &&
        lastChild.nodeType === 1 &&
        lastChild.className.match(/doc-pagebreak/g)
      ) {
        lastChild = allText[allText.length - counter];
      }
      while (lastChild && lastChild.nodeType !== 3) {
        while (
          lastChild &&
          counter <= allText.length &&
          (!lastChild.textContent || lastChild.textContent === "")
        ) {
          lastChild = allText[allText.length - counter];
          counter += 1;
        }
        if (lastChild && lastChild.nodeType !== 3) {
          allText = lastChild.childNodes;
          if (!allText) {
            lastChild = undefined;
          } else {
            lastChild = allText[allText.length - 1];
          }
          counter = 2;
        }
      }

      if (lastChild) {
        lastChild.nodeValue = lastChild.nodeValue.replace(/‑$/g, "");
        lastChild.parentNode.setAttribute("data-textelement", "true");
      }
    }
    orig.className = orig.className.replace(/\bpagedjs_hyphen\b/g, "");
    orig.className = orig.className.trim();
  });

  let split_originals = to_arr(
    frag.querySelectorAll(
      "[data-hederis-type^=hsec][data-split-original], [data-hederis-type^=hwpr][data-split-original], [data-hederis-type^=hblk][data-split-original]"
    )
  );
  // console.log(split_originals);
  // and then for each of these, go through and get a map of all the froms for that one. and take their inner html and add it to this one.
  split_originals.map(orig => {
    let id = orig.getAttribute("data-split-to");
    // add page-break attrs as needed
    let childspans = orig.querySelectorAll("span.line");
    if (childspans.length > 0) {
      let lastline = childspans[childspans.length - 1];
      if (!lastline.className.match("hspanpagebrafter")) {
        lastline.className = lastline.className + " hspanpagebrafter";
      }
    }
    // these are the splits from this specific original
    let splits = to_arr(frag.querySelectorAll(`[data-split-from="${id}"]`));
    splits.map(s => {
      // check to see if the node or any of its parents are lines:
      let node = s;
      let myType = node.getAttribute("data-hederis-type");
      let isLine = checkLines(s);

      if (!isLine) {
        // let data = s.children.length ?
        if (s.childNodes.length) {
          Array.from(s.childNodes).forEach(split_child =>
            orig.append(split_child)
          );
        } else {
          // idk if this will work
          // console.log("JUST TEXT: ", s.innerHTML, id);
          orig.append(s.innerHTML);
        }
        s.remove();
      }
    });
    let marker = orig.querySelector(
      "span[role=doc-pagebreak][data-hyphenated=true]"
    );
    if (marker) {
      // insert the marker before the next non-word character
      // find the previous text sibling
      let prev;
      if (
        orig.getAttribute("data-textelement") &&
        orig.getAttribute("data-textelement") === "true"
      ) {
        prev = orig;
      } else {
        prev = orig.querySelector("[data-textelement=true]");
      }
      // find the next text sibling
      let next = marker.nextSibling;
      while (next && (!next.textContent || next.textContent === "")) {
        next = next.nextSibling;
      }
      if (next && next.nodeType !== 3) {
        let all = textNodesUnder(next);
        if (all.length > 0) {
          next = all[0];
        } else {
          next = undefined;
        }
      }
      if (next && prev) {
        let lstr = next.textContent.match(/^\w*/g);
        prev.firstChild.textContent = prev.firstChild.textContent + lstr;
        next.textContent = next.textContent.replace(/^\w*/, "");
      }
    }
  });
  // combine manually-broken lines and paras
  let contds = to_arr(frag.querySelectorAll(".continued:not(.continuation)"));
  contds.map(orig => {
    let id = orig.getAttribute("id");
    let splits = to_arr(
      frag.querySelectorAll(`.continuation[data-continued-from="${id}"]`)
    );
    splits.map(s => {
      if (s.childNodes.length) {
        Array.from(s.childNodes).forEach(split_child =>
          orig.append(split_child)
        );
      } else {
        orig.append(s.innerHTML);
      }
      s.remove();
    });
    orig.className = orig.className
      .replace(/\bcontinued\b/g, "")
      .replace(/\bpageBreakAfter\b/g, "");
    orig.className = orig.className.trim();
    let regex = new RegExp("text-align-last:\\s*[a-zA-Z]*;", "g");
    let myStyle = orig.getAttribute("style");
    if (myStyle) {
      myStyle = myStyle.replace(regex, "");
      orig.setAttribute("style", myStyle);
    }
  });
  // console.log("after", frag.querySelectorAll(`[data-split-from]`).length);
  // now remove allll the attrs pagedjs adds
  to_arr(frag.querySelectorAll("img")).forEach(el => {
    let mysrc = el.getAttribute("data-img-src");
    if (mysrc && mysrc != "") {
      el.setAttribute("src", mysrc);
    } else {
      // if the src matches a hosted image url, fix it
      let newsrc = el.getAttribute("src");
      if (newsrc && newsrc.match("https://firebase")) {
        newsrc = newsrc.replace(/https:\/\/firebase\S+images%2[fF]/g, "").replace(/\?alt=.*$/g, "");
        el.setAttribute("src", newsrc);
      }
      el.setAttribute("data-img-src", newsrc);
    }
    let regexa = new RegExp("bottom:\\s*\\S+;", "g");
    let regexb = new RegExp("left:\\s*\\S+;", "g");
    let myStyle = el.getAttribute("style");
    if (myStyle) {
      myStyle = myStyle.replace(regexa, "").replace(regexb, "");
      el.setAttribute("style", myStyle);
    }
    // check for empty next paras
    let nextSib = el.nextSibling;
    while (
      nextSib &&
      nextSib.nodeType === 1 &&
      nextSib.tagName.toLowerCase() === "p" &&
      !nextSib.getAttribute("data-hederis-type")
    ) {
      nextSib.remove();
      nextSib = el.nextSibling;
    }
  });
  var empties = frag.querySelectorAll("p:empty:not([data-hederis-type])");
  to_arr(empties).forEach(el => {
    if (!el.getAttribute("data-hederis-type")) {
      el.remove();
    }
  });

  let blks = to_arr(
    frag.querySelectorAll("*.pageBreakAfter[data-hederis-type^=hblk]")
  );
  stripPageBreaks(blks);

  // remove any letter-spacing style attr values that were added in 
  // run_paged/fixOverflow()
  let spaced_blks = to_arr(
    frag.querySelectorAll("*[data-hederis-type^=hblk][style*=letter-spacing]")
  );
  stripLetterSpacing(spaced_blks);

  let imgs = to_arr(
    frag.querySelectorAll("*[data-hederis-type=hblkimg]")
  );
  stripImageSizing(imgs);

  let blanks = to_arr(
    frag.querySelectorAll("p[data-hederis-type=hblkp]:not([id]):empty")
  );
  stripBlanksAfterImages(blanks);

  let table_bodies = to_arr(
    frag.querySelectorAll("table tbody")
  );
  stripTBody(table_bodies);

  let footnote_markers = to_arr(
    frag.querySelectorAll("a[data-footnote-call]")
  );
  let footnotes = to_arr(
    frag.querySelectorAll("*[data-footnote-marker]")
  );
  restoreFootnotes(footnote_markers, footnotes);
  let inlines = to_arr(
    frag.querySelectorAll("*[data-hederis-type^=hsec] > *[data-hederis-type^=hsp], *[data-hederis-type^=hwpr] > *[data-hederis-type^=hsp]")
  );
  nestInlines(inlines);

  to_arr(frag.querySelectorAll("*")).forEach(el => {
    // console.log(el);
    if (
      el.getAttribute("data-ui-moved") &&
      el.getAttribute("data-ui-moved") === "true"
    ) {
      let myid = el.id;
      if (myid) {
        let floatedClone = frag.querySelector(
          "#" + myid + "[data-ui-addition='true']"
        );
        if (floatedClone) {
          el.innerHTML = floatedClone.innerHTML;
        }
      }
      let regex = new RegExp("display:\\s*none;", "g");
      let myStyle = el.getAttribute("style");
      if (myStyle) {
        myStyle = myStyle.replace(regex, "");
        if (myStyle.match(/\S/)) {
          el.setAttribute("style", myStyle);
        } else {
          el.removeAttribute("style")
        }
      }
      //el.removeAttribute("data-ui-moved")
    }
    if (
      el.getAttribute("data-ui-addition") &&
      el.getAttribute("data-ui-addition") === "true"
    ) {
      el.remove();
    }
    if (
      el.getAttribute("data-split-original") &&
      el.getAttribute("data-split-original") === "true"
    ) {
      let regex = new RegExp("text-align-last:\\s*[a-zA-Z]*;", "g");
      let myStyle = el.getAttribute("style");
      if (myStyle) {
        myStyle = myStyle.replace(regex, "");
        if (myStyle.match(/\S/)) {
          el.setAttribute("style", myStyle);
        } else {
          el.removeAttribute("style")
        }
      }
    }
    el.removeAttribute("height");
    el.removeAttribute("data-ref");
    // el.removeAttribute("data-id");
    el.removeAttribute("data-page");
    el.removeAttribute("data-break-before");
    el.removeAttribute("data-next-break-before");
    el.removeAttribute("data-break-after");
    el.removeAttribute("data-split-to");
    el.removeAttribute("data-split-from");
    el.removeAttribute("data-split-original");
    el.removeAttribute("data-after-page");
    el.removeAttribute("data-previous-break-after");
    el.removeAttribute("data-counter-null-reset");
    el.removeAttribute("data-counter-none-reset");
    el.removeAttribute("data-counter-page-reset");
    el.removeAttribute("data-counter-footnote-reset"); 
    el.removeAttribute("data-counter-reset");
    el.removeAttribute("data-counter-footnote-marker-reset");
    el.removeAttribute("data-nth-of-type");
    el.removeAttribute("data-following");
    el.removeAttribute("data-last-split-element");
    el.removeAttribute("data-align-last-split-element");
    el.removeAttribute("data-note-policy");
    el.removeAttribute("data-note-display");
    el.removeAttribute("data-note");
    el.removeAttribute("data-has-notes");
    el.removeAttribute("data-undisplayed");
    el.removeAttribute("data-footnote-marker");
    el.removeAttribute("data-textelement");
    el.removeAttribute("data-overflow");

    el.className = el.className.replace(/\bpagedjs_hypen\b/g, "");

    // preserve ids for elements with subselectors
    let my_id = el.getAttribute("id");
    if (id_list.includes(my_id)) {
      let my_pis = el.getAttribute("data-pi-attrs");
      let str = `id:\\s*${my_id};?`;
      let id_regex = new RegExp(str, "g");
      if (!my_pis) {
        my_pis = `id: ${my_id};`;
      } else if (!my_pis.match(id_regex)) {
        my_pis = `id: ${my_id}; ${my_pis}`;
      }
      el.setAttribute("data-pi-attrs", my_pis);
    }
  });

  let spans = to_arr(frag.querySelectorAll("span + span, em + em, strong + strong, u + u, sup + sup, sub + sub"));
  mergeSpans(spans.reverse());

  // See if the content was locked previously;
  // We need this for legacy handling.
  // let locked = undefined;
  // to_arr(frag.querySelectorAll("[data-hederis-type^=hsec]")).forEach(sect => {
  //   if (sect.getAttribute("data-locked") !== undefined) {
  //     locked = sect.getAttribute("data-locked");
  //     // moving handling for this to the database
  //     sect.removeAttribute("data-locked");
  //   }
  // });

  // console.log(locked);

  // now to get the actual content
  let html_content = to_arr(
    frag.querySelectorAll(".pagedjs_page_content > div")
  )
    .map(el => el.innerHTML)
    .join("\n");

  return { html_content };
  // return {html_content, locked};
}

function to_arr(arr_like) {
  return Array.prototype.slice.call(arr_like);
}

module.exports = {
  unpage_html,
};
