/**
 * Copyright (C) 2019 MongoDB, Inc.  All Rights Reserved.
 */

#pragma once

#include <functional>
#include <pcrecpp.h>
#include <type_traits>

#include "mongo/base/clonable_ptr.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/db/field_ref.h"
#include "mongo/db/matcher/schema/encrypt_schema_gen.h"
#include "mongo/db/pipeline/expression.h"
#include "mongo/util/str.h"
#include "resolved_encryption_info.h"

namespace mongo {
/**
 * 'kRemote' represents the validation schema from mongod and 'kLocal' represents the schema
 * generated using the drivers.
 */
enum class EncryptionSchemaType { kRemote, kLocal };

class EncryptionSchemaTreeNode;

/**
 * Explicitly declare a type for cloning an EncryptionSchemaTreeNode, for compatibility with
 * clonable_ptr and to avoid relying on the implicit clone factory which requires a fully defined
 * type. We need this since an EncryptionSchemaTreeNode holds a
 * clonable_ptr<EncryptionSchemaTreeNode> as a member of the class.
 */
template <>
struct clonable_traits<EncryptionSchemaTreeNode> {
    struct clone_factory_type {
        std::unique_ptr<EncryptionSchemaTreeNode> operator()(const EncryptionSchemaTreeNode&) const;
    };
};

/**
 * A class that represents a node in an encryption schema tree.
 *
 * The children of this node are accessed via a string map, where the key used in the map represents
 * the next path component.
 *
 * Example schema with encrypted fields "user.ssn" and "account":
 *
 * {$jsonSchema: {
 *      type: "object",
 *      properties: {
 *          user: {
 *              type: "object",
 *              properties: {
 *                  ssn: {encrypt:{}},
 *                  address: {type: "string"}
 *              }
 *          },
 *          account: {encrypt: {}},
 *      }
 * }}
 *
 * Results in the following encryption schema tree:
 *
 *                   NotEncryptedNode
 *                       /    \
 *                 user /      \ account
 *                     /        \
 *        NotEncryptedNode    EncryptedNode
 *               /     \
 *          ssn /       \ address
 *             /         \
 *     EncryptedNode  NotEncryptedNode
 */
class EncryptionSchemaTreeNode {
public:
    struct PatternPropertiesChild {
        PatternPropertiesChild(StringData regexStringData,
                               std::unique_ptr<EncryptionSchemaTreeNode> child)
            : regex(regexStringData.toString()), child(std::move(child)) {
            const auto& errorStr = regex.error();
            uassert(51141,
                    str::stream() << "Invalid regular expression in 'patternProperties': "
                                  << regexStringData << " PCRE error string: " << errorStr,
                    errorStr.empty());
        }

        pcrecpp::RE regex;
        clonable_ptr<EncryptionSchemaTreeNode> child;

        bool operator==(const PatternPropertiesChild& other) const {
            return regex.pattern() == other.regex.pattern() && *child == *other.child;
        }

        bool operator!=(const PatternPropertiesChild& other) const {
            return !(*this == other);
        }

        bool operator<(const PatternPropertiesChild& other) const {
            return (regex.pattern() < other.regex.pattern());
        }
    };

    /**
     * Converts a JSON schema, represented as BSON, into an encryption schema tree. Returns a
     * pointer to the root of the tree or throws an exception if either the schema is invalid or is
     * valid but illegal from an encryption analysis perspective.
     *
     * If 'schemaType' is kRemote, allows schema validation keywords which have no implication on
     * encryption since they are used for schema enforcement on mongod.
     */
    static std::unique_ptr<EncryptionSchemaTreeNode> parse(BSONObj schema,
                                                           EncryptionSchemaType schemaType);

    virtual ~EncryptionSchemaTreeNode() = default;

    virtual std::unique_ptr<EncryptionSchemaTreeNode> clone() const = 0;

    /**
     * Override this method to return the node's EncryptionMetadata, or boost::none if it holds
     * none.
     */
    virtual boost::optional<ResolvedEncryptionInfo> getEncryptionMetadata() const = 0;

    /**
     * Returns true if this tree contains at least one EncryptionSchemaEncryptedNode or
     * EncryptionSchemaStateMixedNode.
     */
    virtual bool mayContainEncryptedNode() const;

    /**
     * Returns true if this tree contains at least one EncryptionSchemaEncryptedNode with the
     * random algorithm or at least one EncryptionSchemaStateMixedNode.
     */
    virtual bool mayContainRandomlyEncryptedNode() const;

    /**
     * Certain EncryptionSchemaTreeNode derived classes may contain literals, stored in-place to
     * mark for encryption. This function returns a vector for holding them or boost::none if the
     * derived class type does not support attached literals.
     *
     * This method does not return literals from any of this node's descendants in the tree.
     */
    virtual boost::optional<std::vector<std::reference_wrapper<ExpressionConstant>>&> literals() {
        return boost::none;
    }

    /**
     * Adds 'node' at 'path' under this node. Adds unencrypted nodes as neccessary to
     * reach the final component of 'path'. Returns a pointer to a node that was overwritten or
     * nullptr if there did not already exist a node with the given path. It is invalid to call
     * this function with an empty FieldRef.
     */
    clonable_ptr<EncryptionSchemaTreeNode> addChild(FieldRef path,
                                                    std::unique_ptr<EncryptionSchemaTreeNode> node);

    /**
     * Adds 'node' as a special "wildcard" child which is used for all field names that don't have
     * explicit child nodes. For instance, consider the schema
     *
     * {
     *   type: "object",
     *   properties: {a: {type: "number"}, b: {type: "string"}},
     *   required: ["a", "b"],
     *   additionalProperties: {encrypt: {}}
     * }
     *
     * This schema matches objects where "a" is number, "b" is a string, and all other properties
     * are encrypted. This requires a special child in the encryption tree which has no particular
     * field name associated with it:
     *
     *                   NotEncryptedNode
     *                  /    |           \
     *               a /     | b          \ *
     *                /      |             \
     *  NotEncryptedNode  NotEncryptedNode  EncryptedNode
     *
     * The "*" in the diagram above indicates wildcard behavior: this child applies for all field
     * names other than "a" and "b".
     */
    void addAdditionalPropertiesChild(std::unique_ptr<EncryptionSchemaTreeNode> node) {
        _additionalPropertiesChild = std::move(node);
    }

    /**
     * Adds 'node' as a special child associated with a regular expression rather than a fixed field
     * name. For instance, consider the schema
     *
     * {
     *   type: "object",
     *   properties: {a: {type: "number"}, b: {type: "string"}},
     *   patternProperties: {"^c": {encrypt: {}}}
     * }
     *
     * This schema matches objects where "a" is a number (if it exists), "b" is a string (if it
     * exists), and any property names which begin with "c" are encrypted. The 'patternProperties'
     * keyword results in a node in the encryption tree which is associated with the regex /^c/. The
     * encryption schema tree would look like this:
     *
     *                   NotEncryptedNode
     *                  /    |           \
     *               a /     | b          \ /^c/
     *                /      |             \
     *  NotEncryptedNode  NotEncryptedNode  EncryptedNode
     */
    void addPatternPropertiesChild(StringData regex,
                                   std::unique_ptr<EncryptionSchemaTreeNode> node) {
        _patternPropertiesChildren.insert(PatternPropertiesChild{regex, std::move(node)});
    }

    /**
     * If the given path maps to an encryption node in the tree then returns the associated
     * EncryptionMetadata, otherwise returns boost::none. Any numerical path components will
     * *always* be treated as field names, not array indexes.
     */
    boost::optional<ResolvedEncryptionInfo> getEncryptionMetadataForPath(
        const FieldRef& path) const {
        auto node = getNode(path);
        return getEncryptionMetadataForNode(node);
    }

    /**
     * Returns true if the prefix passed in is the prefix of an encrypted path. Returns false if
     * the prefix does not exist. Should not be called if any part of the prefix is encrypted.
     */
    bool mayContainEncryptedNodeBelowPrefix(const FieldRef& prefix) const {
        return _mayContainEncryptedNodeBelowPrefix(prefix, 0);
    }

    /**
     * Returns the node at a given path if it exists. Returns nullptr if no such path exists in the
     * tree. Respects additional and pattern properties. Throws an exception if there are multiple
     * matching nodes with conflicting metadata.
     */
    const EncryptionSchemaTreeNode* getNode(FieldRef path) const {
        return _getNode(path, 0);
    }
    EncryptionSchemaTreeNode* getNode(FieldRef path) {
        return const_cast<std::remove_const_t<decltype(this)>>(
            const_cast<std::add_const_t<decltype(this)>>(this)->_getNode(path, 0));
    }

    /**
     * Remove the specified node from the schema. Does nothing if path does not exist. Returns true
     * if a node was removed. Ignores additional and pattern properties.
     */
    bool removeNode(FieldRef path);

    // Note that comparing EncryptionSchemaStateMixedNodes for equality will fail, since their
    // encryption metadata isn't know until a query is run.
    bool operator==(const EncryptionSchemaTreeNode& other) const;

    bool operator!=(const EncryptionSchemaTreeNode& other) const {
        return !(*this == other);
    }

private:
    static boost::optional<ResolvedEncryptionInfo> getEncryptionMetadataForNode(
        const EncryptionSchemaTreeNode* node) {
        if (node) {
            return node->getEncryptionMetadata();
        }
        return boost::none;
    }

    /**
     * Returns a const pointer to the child if it exists. Ignores additionalProperties and
     * patternProperties children.
     */
    const EncryptionSchemaTreeNode* getNamedChild(const StringData& name) const {
        auto childrenIt = _propertiesChildren.find(name);
        if (childrenIt != _propertiesChildren.end()) {
            return childrenIt->second.get();
        }
        return nullptr;
    }

    EncryptionSchemaTreeNode* getNamedChild(const StringData& name) {
        auto childrenIt = _propertiesChildren.find(name);
        if (childrenIt != _propertiesChildren.end()) {
            return childrenIt->second.get();
        }
        return nullptr;
    }

    /**
     * Given the property name 'name', returns a list of child nodes for the subschemas that are
     * relevant. This follows the rules associated with the JSON Schema 'properties',
     * 'patternProperties', and 'additionalProperties' keywords. If there is a child added to the
     * tree via addChild() with the edge name exactly matching 'name', then that child will be
     * included in the output list. In addition, children added via addPatternPropertiesChild()
     * whose regex matches 'name' will be included in the output list.
     *
     * If no regular addChild() nodes or 'patternProperties' child nodes are found, but a node has
     * been added via addAdditionalPropertiesChild(), then returns this 'additionalProperties'
     * child.
     *
     * If no child with 'name' exists, no 'patternProperties' child whose regex matches 'name'
     * exists, and there is no 'additionalProperties' child, then returns an empty vector.
     */
    std::vector<EncryptionSchemaTreeNode*> getChildrenForPathComponent(StringData name) const;

    /**
     * This method is responsible for recursively descending the encryption tree until the end of
     * the path is reached or there's no edge to take. The 'index' parameter is used to indicate
     * which part of 'path' we're currently at, and is expected to increment as we descend the tree.
     *
     * Throws an AssertionException if 'path' contains a prefix to an encrypted field.
     *
     * Throws if multiple relevant subschemas return conflicting encryption metadata. This can
     * happen for 'patternProperties', since we may need to descend the subtrees for multiple
     * matching patterns.
     */
    const EncryptionSchemaTreeNode* _getNode(const FieldRef& path, size_t index = 0) const;

    bool _mayContainEncryptedNodeBelowPrefix(const FieldRef& prefix, size_t level) const;

    StringMap<clonable_ptr<EncryptionSchemaTreeNode>> _propertiesChildren;

    // Holds any children which are associated with a regex rather than a specific field name.
    std::set<PatternPropertiesChild> _patternPropertiesChildren;

    // If non-null, this special child is used when no applicable child is found by name in
    // '_propertiesChildren' or by regex in '_patternPropertiesChildren'. Used to implement
    // encryption analysis for the 'additionalProperties' keyword.
    clonable_ptr<EncryptionSchemaTreeNode> _additionalPropertiesChild;
};

/**
 * Represents a path that is not encrypted. May be either an internal node or a leaf node.
 */
class EncryptionSchemaNotEncryptedNode final : public EncryptionSchemaTreeNode {
public:
    boost::optional<ResolvedEncryptionInfo> getEncryptionMetadata() const final {
        return boost::none;
    }

    std::unique_ptr<EncryptionSchemaTreeNode> clone() const final {
        return std::make_unique<EncryptionSchemaNotEncryptedNode>(*this);
    }
};

/**
 * Node which represents an encrypted field per the corresponding JSON Schema. A path is considered
 * encrypted only if it's final component lands on this node.
 */
class EncryptionSchemaEncryptedNode final : public EncryptionSchemaTreeNode {
public:
    EncryptionSchemaEncryptedNode(ResolvedEncryptionInfo metadata)
        : _metadata(std::move(metadata)) {}

    boost::optional<ResolvedEncryptionInfo> getEncryptionMetadata() const final {
        return _metadata;
    }

    bool mayContainEncryptedNode() const final {
        return true;
    }

    bool mayContainRandomlyEncryptedNode() const final {
        return _metadata.algorithm == FleAlgorithmEnum::kRandom;
    }

    std::unique_ptr<EncryptionSchemaTreeNode> clone() const final {
        return std::make_unique<EncryptionSchemaEncryptedNode>(*this);
    }

    boost::optional<std::vector<std::reference_wrapper<ExpressionConstant>>&> literals() final {
        return _literals;
    }

private:
    const ResolvedEncryptionInfo _metadata;

    std::vector<std::reference_wrapper<ExpressionConstant>> _literals;
};

/**
 * Node which represents a field which may or may not be encrypted. Since the actual state of the
 * node can't be known before the query is actually executed, attempting to get the encryption
 * metadata of this node will throw an exception.
 */
class EncryptionSchemaStateMixedNode final : public EncryptionSchemaTreeNode {
public:
    boost::optional<ResolvedEncryptionInfo> getEncryptionMetadata() const final {
        uasserted(31133,
                  "Cannot get metadata for path whose encryption properties are not known until "
                  "runtime.");
    }

    bool mayContainEncryptedNode() const final {
        // The field may be encrypted at runtime, safest option is to return true.
        return true;
    }

    bool mayContainRandomlyEncryptedNode() const final {
        return true;
    }

    std::unique_ptr<EncryptionSchemaTreeNode> clone() const final {
        return std::make_unique<EncryptionSchemaStateMixedNode>(*this);
    }
};

/**
 * Node which represents a field for which we can choose an encryption status and type at a later
 * time. When this node exists in a schema tree, it indicates a referenceable path which does not
 * yet have an ResolvedEncryptionInfo assigned but could support one.
 *
 * This node has two possible futures. Either an encryption info will be chosen for it when it is
 * compared to an encrypted node of that type and it will be converted into an
 * EncryptionSchemaEncryptedNode. At that time, the attached literals would be marked for
 * encryption.
 *
 * Alternatively, it may be compared to something unencrypted or be staged for evaluation by the
 * server. If this is the case, it must become an EncryptionSchemaNotEncryptedNode and any attached
 * literals should be left alone and forgotten. If a schema tree reaches its final state and one of
 * these nodes still exists, the effect is the same as manually converting it to an
 * EncryptionSchemaNotEncryptedNode.
 */
class EncryptionSchemaUnknownNode final : public EncryptionSchemaTreeNode {
public:
    boost::optional<ResolvedEncryptionInfo> getEncryptionMetadata() const final {
        return boost::none;
    }

    std::unique_ptr<EncryptionSchemaTreeNode> clone() const final {
        return std::make_unique<EncryptionSchemaUnknownNode>(*this);
    }

    boost::optional<std::vector<std::reference_wrapper<ExpressionConstant>>&> literals() final {
        return _literals;
    }

private:
    std::vector<std::reference_wrapper<ExpressionConstant>> _literals;
};

}  // namespace mongo
