import { DEFAULT_BATCH_SIZE, SPARQL_ENDPOINT_URL, WIKI_ENDPOINT_URL } from "./constants";
import { CATEGORY, WIKI_DATA } from "./wikidata";


class SPARQLQueryDispatcher {
  constructor(endpoint) {
    this.endpoint = endpoint;
  }

  query(sparqlQuery) {
    const fullUrl = this.endpoint + '?query=' + encodeURIComponent(sparqlQuery);
    const headers = { 'Accept': 'application/sparql-results+json' };

    return fetch(fullUrl, { headers }).then(body => {
      return body.json();
    });
  }
}



const _cache = {};

const SPARQLqueryDispatcher = new SPARQLQueryDispatcher(SPARQL_ENDPOINT_URL);

const getEntityLabel = (entity, language = "en") => entity["labels"][language].value;

const getEntityLabelFromCache = (id, language = "en") => (id in _cache) ? getEntityLabel(_cache[id], language) : null;

const getUrlJsonEntity = (id) => `https://www.wikidata.org/wiki/Special:EntityData/${id}.json`;

const getUrlJsonLinks = (name) => `${WIKI_ENDPOINT_URL}&action=query&pllimit=500&titles=${encodeURIComponent(name)}&prop=links`;

const getUrlJsonWikitext = (stub) => `${WIKI_ENDPOINT_URL}&action=parse&page=${stub}&redirects=1&prop=wikitext`;

const getUrlJsonLinkIds = (links) => `${WIKI_ENDPOINT_URL}&action=query&prop=pageprops&ppprop=wikibase_item&redirects=1&titles=${links.map((link) => encodeURIComponent(link)).join("|")}`;

const getSparqlQuery = (rootId, linkIds, { domain = CATEGORY.HUMAN, filter = null, onlyWithProps = false, language = "en" }) => {
  const f = filter ? domain.filters[filter] : {};
  const params = domain.params || [];

  const paramsFieldQuery = params.map((p) => `${p.field}Label`).join(" ");
  const paramsQuery = params.map((p) => `OPTIONAL { ?item wdt:${p.property} ${p.field} . }`).join("\n        ");
  const linkIdValues = `${linkIds.map((id) => `wd:${id}`).join(" ")}`;
  const inclusionQuery = domain.subclasses ? "(wdt:P31|wdt:P279)/wdt:P279*" : "wdt:P31";
  const propsQuery = `wd:${rootId} ?prop ?item .`;
  const filterQuery = filter ? `FILTER ( ${f.negation ? "NOT" : ""} EXISTS { ?item wdt:${f.property} ${f.type} } )` : "";
  const propsLabelQuery = "?pprop wikibase:directClaim ?prop .";

  return `
    SELECT DISTINCT ?item ?itemLabel ?type ?itemDescription ?ppropLabel ?article ${paramsFieldQuery}
    WHERE
    {
        VALUES ?item { ${linkIdValues} }
        ?item ${inclusionQuery} wd:${domain.id}.
        ${paramsQuery}
        ${onlyWithProps ? propsQuery : `OPTIONAL { ${propsQuery}${propsLabelQuery} }`}
        ${filterQuery}
        OPTIONAL {
          ?article schema:about ?item .
          ?article schema:inLanguage "en" .
          FILTER (SUBSTR(str(?article), 1, 25) = "https://en.wikipedia.org/")
        }
        BIND("${domain.label}" AS ?type)
        SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],${language}". }
        ${onlyWithProps ? propsLabelQuery : ""}
    }`;
}

const cleanWikiText = (text) => text
  .replaceAll("''", "")
  // .replace(/[\s\*\/<>\[\\\]\^_{\}~]+/g, " ")
  .replace(/[\s\*\/\\\^_{\}~]+/g, " ")
  .replace(/\[\[([^\[\]\|]+)[^\[\]]*\]\]/g, "$1")
  // .replace(/(\]\][^\[]*)(\|)([^\]]*\[\[)/g, "$1$2")
  .replace(/([a-z]) ([,'])/g, "$1$2")
  .replace(/(\w|\d)\.(\w|\d)/g, "$1$2")
  .replaceAll("&amp;", "&")
  .trim()

const extractLinksFromWikiText = (text) => {
  if (!text) return [];
  // const regexRefs = /<ref[^>]*>[^<>]*<\/ref>/g;
  let regexLinks = /\[\[([^\[\]\|]+)[^\[\]]*\]\]/g;
  // const regexLinks = /([^\.|>]*\[\[([^\[\]]+)[^\[\]]*\]\][^\.|<]*)/g;

  const terminators = [
    "==See also==",
    "==References==",
    "==External links==",
  ];
  
  let clippedText = text.replace(/<ref[^>]*>[^<>]*<\/ref>/g, "").replace(/<ref[^>]*\/>/g, "");
  for (const t of terminators) {
    clippedText = clippedText.split(t)[0];
  }

  const processedMatches = clippedText
    .replace(/ (vs|eg|ie|dr|mr|mrs|ms|rep|sen)\. /gi, " $1 ")
    .split(/(\.|\|-)\s+/g)
    .flatMap((line) => {
      const matches = [...new Set(line.matchAll(regexLinks))];
      const splitMatches = matches.flatMap((match) => match[1] && match[1].includes("|") ? match[1].split("|") : match[1])
      return splitMatches.map((link) => [ link, cleanWikiText(line) ])
    })
    .sort((a, b) => a[0] === b[0] 
      ? b[1].length - a[1].length 
      : a[0].localeCompare(b[0]));

  return new Map(processedMatches);
}

const compressEntity = (entity, language = "en") => {
  const compressed = { id: entity.id, claims: {}, descriptions: {}, labels: {} };
  compressed["descriptions"][language] = entity.descriptions[language];
  compressed["labels"][language] = entity.labels[language];
  for (let [id, claims] of Object.entries(entity.claims)) {
    for (const claim of claims) {
      try {
        const compressedClaim = {
          mainsnak: {
            property: claim.mainsnak.property,
            datavalue: { value: { id: claim.mainsnak.datavalue.value.id } }
          }
        }
        if (!(id in compressed.claims)) compressed.claims[id] = [];
        compressed.claims[id].push(compressedClaim);
      } catch (error) {
        console.log("Could not compress claim");
        console.log(claim);
      }
    }
  }
  return compressed;
}

export const safeFetch = async (url, callback = null) => {
  callback = callback || ((v) => v);
  return fetch(url)
    .then((res) => res.json())
    .then((result) => {
      return callback(result);
    }, (error) => {
      console.log("Error!");
      throw new Error(error);
    });
}

const safeFetchBatch = async (items, callback, batchLog = null, batchSize = DEFAULT_BATCH_SIZE, depth = 1) => {
  if (batchLog) batchLog(depth);
  if (!items.length) return [];
  return (await callback(items.slice(0, batchSize)))
    .concat(await safeFetchBatch(items.slice(batchSize), callback, batchLog, batchSize, depth + 1));
}

const safeFetchEntity = async (id, callback, language = "en") => {
  if (id in _cache) {
    console.log("Hit the cache!");
    return callback(_cache[id]);
  } else {
    console.log("API fetch..");
    return safeFetch(getUrlJsonEntity(id), (result) => {
      const entity = result.entities[id];
      _cache[id] = compressEntity(entity, language);
      return callback(entity);
    });
  }
}

const safeFetchLinks = async (entity, baseUrl = null, plcontinue = null, depth = 1) => {
  baseUrl = baseUrl || getUrlJsonLinks(getEntityLabel(entity));
  const url = (plcontinue === null) ? baseUrl : `${baseUrl}&plcontinue=${plcontinue}`;

  return safeFetch(url, async (result) => {
    const links = [];
    for (const page of Object.values(result.query.pages)) {
      for (const link of page.links) {
        links.push(link.title);
      }
    }
    if ("continue" in result) {
      const otherLinks = await safeFetchLinks(entity, baseUrl, result.continue.plcontinue, depth + 1);
      links.concat(otherLinks);
    }
    return links;
  });
}

const safeFetchWikiTextLinks = async (stub, baseUrl = null, plcontinue = null, depth = 1) => {
  baseUrl = baseUrl || getUrlJsonWikitext(stub);
  const url = (plcontinue === null) ? baseUrl : `${baseUrl}&plcontinue=${plcontinue}`;
  const text = await safeFetch(url, (result) => result.parse ? result.parse.wikitext["*"] : null);
  return text;
}

const safeFetchLinkIds = async (descriptions, links) => {
  return safeFetch(getUrlJsonLinkIds(links.map((item) => item[0])), (result) => {
    return Object.values(result.query.pages || {})
      .filter((entry) => entry.pageprops && entry.pageprops.wikibase_item)
      .map((entry) => ({ 
        name: entry.title, 
        id: entry.pageprops.wikibase_item,
        description: descriptions.get(entry.title),
      }));
  });
}

const safeFetchLinkData = async (rootId, linkIds, options = {}) => {
  const query = getSparqlQuery(rootId, linkIds.map((entry) => entry.id), options);
  const json = await SPARQLqueryDispatcher.query(query);
  return json.results.bindings.map((data) => ({...data, id: data.item.value.slice(31)}));
}

export const generateNode = async (id, domain = CATEGORY.REAL_HUMAN, onlyWithProps = false, language = "en") => {
  const options = { domain, onlyWithProps, language };
  const [linkNodeData] = await safeFetchLinkData(id, [{id}], options);
  if (linkNodeData) linkNodeData.type = { value: domain.label };
  return linkNodeData;
}



export const generateLinks = async (id, stub, progressUpdater, linkCategory = CATEGORY.REAL_HUMAN, relativesOnly = false, language = "en") => {
  const batchLogger = (from, to, numElements, logText, batchSize = DEFAULT_BATCH_SIZE) => {
    let progressChunk = to - from;
    let numBatches = Math.ceil(numElements / batchSize);
    let interval = progressChunk / numBatches;
    return (batchNumber) => progressUpdater(
      typeof logText === 'function' ? logText(batchNumber, numBatches) : logText,
      from + (interval * (batchNumber - 1)),
      from + (interval * batchNumber),
    )
  }

  progressUpdater(`Downloading entity wikipedia page text`, 0, 5);
  const text = await safeFetchWikiTextLinks(stub);

  progressUpdater(`Extracting links from entity page`, 30, 30);
  const links = extractLinksFromWikiText(text);  // no request

  progressUpdater(`Accessing core data for ${links.size} linked wiki entities`);
  const linksList = Array.from(links);
  const linkIds = await safeFetchBatch(
    linksList, 
    (batch) => safeFetchLinkIds(links, batch),
    batchLogger(30, 60, links.size, (i, total) => `- Fetching batch ${i}/${total}`),
  );
  const linkIdsDistinct = new Map(linkIds.map(item => [item["id"], item]));
  const linkIdRecords = [...linkIdsDistinct.values()];

  progressUpdater("Requesting extended data for linked pages");
  const options = { domain: linkCategory, onlyWithProps: relativesOnly, language: language };
  const linkDataBatchSize = 100;
  let data = await safeFetchBatch(
    linkIdRecords,
    (batch) => safeFetchLinkData(id, batch, options),
    batchLogger(60, 90, linkIdRecords.length, (i, total) => `- Fetching batch ${i}/${total}`, linkDataBatchSize),
    linkDataBatchSize,
  );
  data = data.map(item => ({
    ...item,
    linkDescription: linkIdsDistinct.has(item.id)
      ? linkIdsDistinct.get(item.id).description
      : links.get(item.itemLabel.value)
  }));
  progressUpdater("Constructing and rendering updated graph", 90, 95);
  return data;
}

const isPropertyOfType = async (propertyId, propertyTypeId) => {
  const entity = await safeFetchEntity(propertyId, (e) => e);
  return hasPropertyOfType(entity, propertyTypeId);
}

const asyncFilter = async (arr, predicate) => Promise.all(arr.map(predicate))
  .then((results) => arr.filter((_v, index) => results[index]));

const getPropertyValuesOfType = async (entity, propertyTypeId) => {
  const propertyIds = Object.keys(entity.claims);
  const propertyIdsOfType = await asyncFilter(propertyIds, async (id) => await isPropertyOfType(id, propertyTypeId)); 
  return getPropertyFieldValues(entity, propertyIdsOfType);
}

const getPropertyFieldValues = (entity, propertyIds = null) => {
  if (propertyIds === null) propertyIds = Object.keys(entity.claims);
  const propertyFields = propertyIds.map((propertyId) => entity.claims[propertyId] || []).flat();
  return propertyFields
    .map((field) => {
      try {
        return { 
          property: field.mainsnak.property,
          label: getEntityLabelFromCache(field.mainsnak.property),
          id: field.mainsnak.datavalue.value.id,
        }
      } catch (error) {
        return null;
      }
    })
    .filter((item) => item !== null && item.id !== undefined);
}

const hasPropertyOfType = (entity, typeIds, propertyId = null) => {
  const propertyIds = (propertyId === null) ? null : [propertyId];
  const propertyFieldValueIds = getPropertyFieldValues(entity, propertyIds);
  return propertyFieldValueIds.some((item) => typeIds.includes(item.id));
}

const hasPropertiesOfTypeRecursive = async (entity, typeIds, propertyIds = null, depth = 5) => {
  const propertyFieldValueIds = getPropertyFieldValues(entity, propertyIds);
  const matchesFound = propertyFieldValueIds.some((item) => typeIds.includes(item.id));

  if (!matchesFound && depth > 0) {
    return propertyFieldValueIds.map(async (id) => {
      const subentity = await safeFetchEntity(id, (e) => e);
      return hasPropertiesOfTypeRecursive(subentity, typeIds, propertyIds, depth - 1);
    }).some((res) => res);
  } else {
    return matchesFound;
  }
}

const parseHuman = async (entity, language = "en") => {
  return {
    id: entity.id,
    name: entity.labels[language].value,
    description: entity.descriptions[language].value,
    links: await getPropertyValuesOfType(entity, WIKI_DATA.property_types.HUMAN_RELATIONSHIPS),
    attributes: await getPropertyValuesOfType(entity, WIKI_DATA.property_types.HUMAN_INFORMATION),
  }
}
