
"""
    Note there was an issue with loading the stanza types on startup.
    Therefore I switched to check the types via the module names,
    which means I dont have to load any stanza functions.
"""


def class_name(x):
    module = x.__class__.__module__
    name = x.__class__.__name__
    if module is None or (module == str.__class__.__module__):
        return name
    else:
        return ".".join([module, name])


def has_type(x, what):
    assert isinstance(what, str)
    return class_name(x) == what


def available_download_methods(x):
    return {item.name: item.value for item in x.pipeline.core.DownloadMethod}


def is_document(x):
    """
    """
    return has_type(x, "stanza.models.common.doc.Document")


def is_sentence(x):
    return has_type(x, "stanza.models.common.doc.Sentence")


def is_word(x):
    return has_type(x, "stanza.models.common.doc.Word")


def is_token(x):
    return has_type(x, "stanza.models.common.doc.Token")


def words(x):
    if is_document(x):
        return [word.text for word in x.iter_words()]
    elif is_sentence(x):
        return [word.text for word in x.words]
    else:
        raise ValueError("type '%s' is not supported by function words" % type(x))


def tokens(x):
    if is_document(x):
        return [token.text for token in x.iter_tokens()]
    elif is_sentence(x):
        return [token.text for token in x.tokens]
    else:
        raise ValueError("type '%s' is not supported by function tokens" % type(x))


def entities(x):
    """
    The conversion in R is easier since R has rbind
    """
    return [ent.to_dict() for ent in x.entities]


def dependencies(x):
    if is_sentence(x):
        return [item.to_dict() for item in x.dependencies]
    else:
        raise ValueError("type '%s' is not supported by function dependencies" % type(x))
    

def sents(x, type="word"):
    """
    type can be ["word", "token"]
    """
    if type == "word":
        fun = words
    else:
        fun = tokens
    if is_document(x):
        return [fun(sent) for sent in x.sentences]
    else:
        raise ValueError("type '%s' is not supported by function sents" % type(x))


def multi_word_token(x):
    """
    Note Multi-Word Token are not Multi-Word Expressions, see
    https://universaldependencies.org/u/overview/tokenization.html
    """
    # FIXME
    if is_document(x):
        it = 0
        words = {"tid": [], "wid": [], "token": [], "word": []}
        for sent in x.sentences:
            for token in sent.tokens:
                it += 1
                for word in token.words:
                    words["tid"].append(it)
                    words["wid"].append(word.id)
                    words["token"].append(token.text)
                    words["word"].append(word.text)
    else:
        raise ValueError("type '%s' is not supported by function multi_word_token" % type(x))
    return words


def features(x, atype="word"):
    """Obtain features from a stanza document

    Args:
        x (Document): An object inheriting from class `Document`.
        atype (str): A string of annotation types, allowed values are
            `["word", "token", "sentence"]`.

    Returns:
        A list with containing the converted data.

    IGNORE:
        Members of the stanza object "Document".
            text
            sentences
            num_tokens
            num_words
            ents
            entities
    IGNORE
    """
    assert is_document(x)
    allowed_annotation_types = ["word", "token", "sentence"]
    assert atype in allowed_annotation_types
    tid = 0
    dat = []
    if atype == "word":
        for sid, sent in enumerate(x.sentences):
            for token in sent.tokens:
                tid += 1
                for word in token.words:
                    di = {"sid": sid + 1, "tid": tid}
                    di.update(word_to_dict(word))
                    dat.append(di)
    elif atype == "token":
        for sid, sent in enumerate(x.sentences):
            for token in sent.tokens:
                di = {"sid": sid + 1, "tid": tid}
                di.update(token_to_dict(token))
                dat.append(di)
    elif atype == "sentence":
        for sid, sent in enumerate(x.sentences):
            di = {"sid": sid + 1}
            di.update(sentence_to_dict(sent))
            dat.append(di)
    return dat


def document_to_dict(x):
    pass


def sentence_to_dict(x, type="word"):
    """
    doc
    text
    dependencies
    tokens
    words
    ents
    entities (alias for ents)
    sentiment
    dependencies_string
    """
    di = {"text": x.text, "sentiment": x.sentiment}
    return di


def token_to_dict(x):
    """
    id
    text
    misc
    words
    start_char
    end_char
    ner
    sent
    """
    di = {"tid": x.id, "text": x.text, "misc": x.misc,
          "start_char": x.start_char, "end_char": x.end_char, "ner": x.ner}
    return di


def word_to_dict(x):
    """
    id
    text
    lemma
    upos
    xpos
    feats
    head
    deprel
    deps
    misc
    parent
    pos
    sent
    """
    assert is_word(x)
    # "parent": x.parent.id, 
    di = {"wid": x.id, "text": x.text, "lemma": x.lemma,
          "upos": x.upos, "xpos": x.xpos, "feats": x.feats, "head": x.head,
          "deprel": x.deprel, "deps": x.deps, "misc": x.misc, "pos": x.pos}
    return di


def span_to_dict(x):
    """
    doc
    text
    tokens
    words
    type
    start_char
    end_char
    sent
    """
    pass
