﻿// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

#nullable disable

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
using System.Threading;
using Microsoft.CodeAnalysis.EmbeddedLanguages.LanguageServices;
using Microsoft.CodeAnalysis.LanguageServices;
using Microsoft.CodeAnalysis.Shared.Extensions;
using Roslyn.Utilities;

namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions.LanguageServices
{
    /// <summary>
    /// Helper class to detect regex pattern tokens in a document efficiently.
    /// </summary>
    internal sealed class RegexPatternDetector
    {
        private const string _patternName = "pattern";

        /// <summary>
        /// Cache so that we can reuse the same <see cref="RegexPatternDetector"/> when analyzing a particular
        /// compilation model.  This saves the time from having to recreate this for every string literal that features
        /// examine for a particular compilation.
        /// </summary>
        private static readonly ConditionalWeakTable<Compilation, RegexPatternDetector> _modelToDetector = new();

        private readonly EmbeddedLanguageInfo _info;
        private readonly INamedTypeSymbol _regexType;
        private readonly HashSet<string> _methodNamesOfInterest;

        /// <summary>
        /// Helps match patterns of the form: language=regex,option1,option2,option3
        /// 
        /// All matching is case insensitive, with spaces allowed between the punctuation.
        /// 'regex' or 'regexp' are both allowed.  Option values will be or'ed together
        /// to produce final options value.  If an unknown option is encountered, processing
        /// will stop with whatever value has accumulated so far.
        /// 
        /// Option names are the values from the <see cref="RegexOptions"/> enum.
        /// </summary>
        private static readonly Regex s_languageCommentDetector =
            new(@"^((//)|(')|(/\*))\s*lang(uage)?\s*=\s*regex(p)?\b((\s*,\s*)(?<option>[a-zA-Z]+))*",
                RegexOptions.ExplicitCapture | RegexOptions.IgnoreCase | RegexOptions.Compiled);

        private static readonly Dictionary<string, RegexOptions> s_nameToOption =
            typeof(RegexOptions).GetTypeInfo().DeclaredFields
                .Where(f => f.FieldType == typeof(RegexOptions))
                .ToDictionary(f => f.Name, f => (RegexOptions)f.GetValue(null), StringComparer.OrdinalIgnoreCase);

        public RegexPatternDetector(
            EmbeddedLanguageInfo info,
            INamedTypeSymbol regexType,
            HashSet<string> methodNamesOfInterest)
        {
            _info = info;
            _regexType = regexType;
            _methodNamesOfInterest = methodNamesOfInterest;
        }

        public static RegexPatternDetector TryGetOrCreate(
            Compilation compilation, EmbeddedLanguageInfo info)
        {
            // Do a quick non-allocating check first.
            if (_modelToDetector.TryGetValue(compilation, out var detector))
            {
                return detector;
            }

            return _modelToDetector.GetValue(
                compilation, _ => TryCreate(compilation, info));
        }

        private static RegexPatternDetector TryCreate(
            Compilation compilation, EmbeddedLanguageInfo info)
        {
            var regexType = compilation.GetTypeByMetadataName(typeof(Regex).FullName);
            if (regexType == null)
            {
                return null;
            }

            var methodNamesOfInterest = GetMethodNamesOfInterest(regexType, info.SyntaxFacts);
            return new RegexPatternDetector(info, regexType, methodNamesOfInterest);
        }

        public static bool IsPossiblyPatternToken(SyntaxToken token, ISyntaxFacts syntaxFacts)
        {
            if (!syntaxFacts.IsStringLiteral(token))
                return false;

            return IsMethodOrConstructorArgument(token, syntaxFacts) ||
                   HasRegexLanguageComment(token, syntaxFacts, out _);
        }

        private static bool HasRegexLanguageComment(
            SyntaxToken token, ISyntaxFacts syntaxFacts, out RegexOptions options)
        {
            if (HasRegexLanguageComment(token.GetPreviousToken().TrailingTrivia, syntaxFacts, out options))
                return true;

            for (var node = token.Parent; node != null; node = node.Parent)
            {
                if (HasRegexLanguageComment(node.GetLeadingTrivia(), syntaxFacts, out options))
                    return true;

                // Stop walking up once we hit a statement.  We don't need/want statements higher up the parent chain to
                // have any impact on this token.
                if (syntaxFacts.IsStatement(node))
                    break;
            }

            options = default;
            return false;
        }

        private static bool HasRegexLanguageComment(
            SyntaxTriviaList list, ISyntaxFacts syntaxFacts, out RegexOptions options)
        {
            foreach (var trivia in list)
            {
                if (HasRegexLanguageComment(trivia, syntaxFacts, out options))
                {
                    return true;
                }
            }

            options = default;
            return false;
        }

        private static bool HasRegexLanguageComment(
            SyntaxTrivia trivia, ISyntaxFacts syntaxFacts, out RegexOptions options)
        {
            if (syntaxFacts.IsRegularComment(trivia))
            {
                // Note: ToString on SyntaxTrivia is non-allocating.  It will just return the
                // underlying text that the trivia is already pointing to.
                var text = trivia.ToString();
                var (matched, matchOptions) = TryMatch(text);
                if (matched)
                {
                    options = matchOptions;
                    return true;
                }
            }

            options = default;
            return false;
        }

        private static (bool success, RegexOptions options) TryMatch(string text)
        {
            var options = RegexOptions.None;
            var match = s_languageCommentDetector.Match(text);
            if (!match.Success)
            {
                return default;
            }

            var optionGroup = match.Groups["option"];
            foreach (Capture capture in optionGroup.Captures)
            {
                if (s_nameToOption.TryGetValue(capture.Value, out var specificOption))
                {
                    options |= specificOption;
                }
                else
                {
                    // hit something we don't understand.  bail out.  that will help ensure
                    // users don't have weird behavior just because they misspelled something.
                    // instead, they will know they need to fix it up.
                    return default;
                }
            }

            return (true, options);
        }

        private static bool IsMethodOrConstructorArgument(SyntaxToken token, ISyntaxFacts syntaxFacts)
            => syntaxFacts.IsLiteralExpression(token.Parent) &&
               syntaxFacts.IsArgument(token.Parent.Parent);

        /// <summary>
        /// Finds public, static methods in <see cref="Regex"/> that have a parameter called
        /// 'pattern'.  These are helpers (like <see cref="Regex.Replace(string, string, string)"/> 
        /// where at least one (but not necessarily more) of the parameters should be treated as a
        /// pattern.
        /// </summary>
        private static HashSet<string> GetMethodNamesOfInterest(INamedTypeSymbol regexType, ISyntaxFacts syntaxFacts)
        {
            var result = syntaxFacts.IsCaseSensitive
                ? new HashSet<string>()
                : new HashSet<string>(StringComparer.OrdinalIgnoreCase);

            var methods = from method in regexType.GetMembers().OfType<IMethodSymbol>()
                          where method.DeclaredAccessibility == Accessibility.Public
                          where method.IsStatic
                          where method.Parameters.Any(p => p.Name == _patternName)
                          select method.Name;

            result.AddRange(methods);

            return result;
        }

        public bool IsRegexPattern(SyntaxToken token, SemanticModel semanticModel, CancellationToken cancellationToken, out RegexOptions options)
        {
            options = default;
            if (!IsPossiblyPatternToken(token, _info.SyntaxFacts))
            {
                return false;
            }

            var syntaxFacts = _info.SyntaxFacts;
            if (HasRegexLanguageComment(token, syntaxFacts, out options))
            {
                return true;
            }

            var stringLiteral = token;
            var literalNode = stringLiteral.Parent;
            var argumentNode = literalNode.Parent;
            Debug.Assert(syntaxFacts.IsArgument(argumentNode));

            var argumentList = argumentNode.Parent;
            var invocationOrCreation = argumentList.Parent;
            if (syntaxFacts.IsInvocationExpression(invocationOrCreation))
            {
                var invokedExpression = syntaxFacts.GetExpressionOfInvocationExpression(invocationOrCreation);
                var name = GetNameOfInvokedExpression(invokedExpression);
                if (_methodNamesOfInterest.Contains(name))
                {
                    // Is a string argument to a method that looks like it could be a Regex method.  
                    // Need to do deeper analysis.

                    // Note we do not use GetAllSymbols here because we don't want to incur the
                    // allocation.
                    var symbolInfo = semanticModel.GetSymbolInfo(invocationOrCreation, cancellationToken);
                    var method = symbolInfo.Symbol;
                    if (TryAnalyzeInvocation(argumentNode, semanticModel, method, cancellationToken, out options))
                    {
                        return true;
                    }

                    foreach (var candidate in symbolInfo.CandidateSymbols)
                    {
                        if (TryAnalyzeInvocation(argumentNode, semanticModel, candidate, cancellationToken, out options))
                        {
                            return true;
                        }
                    }
                }
            }
            else if (syntaxFacts.IsObjectCreationExpression(invocationOrCreation))
            {
                var typeNode = syntaxFacts.GetObjectCreationType(invocationOrCreation);
                var name = GetNameOfType(typeNode, syntaxFacts);
                if (name != null)
                {
                    if (syntaxFacts.StringComparer.Compare(nameof(Regex), name) == 0)
                    {
                        var constructor = semanticModel.GetSymbolInfo(invocationOrCreation, cancellationToken).GetAnySymbol();
                        if (_regexType.Equals(constructor?.ContainingType))
                        {
                            // Argument to "new Regex".  Need to do deeper analysis
                            return AnalyzeStringLiteral(
                                argumentNode, semanticModel, cancellationToken, out options);
                        }
                    }
                }
            }
            else if (syntaxFacts.IsImplicitObjectCreationExpression(invocationOrCreation))
            {
                var constructor = semanticModel.GetSymbolInfo(invocationOrCreation, cancellationToken).GetAnySymbol();
                if (_regexType.Equals(constructor?.ContainingType))
                {
                    // Argument to "new Regex".  Need to do deeper analysis
                    return AnalyzeStringLiteral(
                        argumentNode, semanticModel, cancellationToken, out options);
                }
            }

            return false;
        }

        private bool TryAnalyzeInvocation(
            SyntaxNode argumentNode, SemanticModel semanticModel, ISymbol method,
            CancellationToken cancellationToken, out RegexOptions options)
        {
            if (method != null &&
                method.DeclaredAccessibility == Accessibility.Public &&
                method.IsStatic &&
                _regexType.Equals(method.ContainingType))
            {
                return AnalyzeStringLiteral(
                    argumentNode, semanticModel, cancellationToken, out options);
            }

            options = default;
            return false;
        }

        public RegexTree TryParseRegexPattern(SyntaxToken token, SemanticModel semanticModel, CancellationToken cancellationToken)
        {
            if (!this.IsRegexPattern(token, semanticModel, cancellationToken, out var options))
            {
                return null;
            }

            var chars = _info.VirtualCharService.TryConvertToVirtualChars(token);
            return RegexParser.TryParse(chars, options);
        }

        private bool AnalyzeStringLiteral(
            SyntaxNode argumentNode,
            SemanticModel semanticModel,
            CancellationToken cancellationToken,
            out RegexOptions options)
        {
            options = default;

            var parameter = _info.SemanticFacts.FindParameterForArgument(semanticModel, argumentNode, cancellationToken);
            if (parameter?.Name != _patternName)
            {
                return false;
            }

            options = GetRegexOptions(argumentNode, semanticModel, cancellationToken);
            return true;
        }

        private RegexOptions GetRegexOptions(SyntaxNode argumentNode, SemanticModel semanticModel, CancellationToken cancellationToken)
        {
            var syntaxFacts = _info.SyntaxFacts;
            var argumentList = argumentNode.Parent;
            var arguments = syntaxFacts.GetArgumentsOfArgumentList(argumentList);
            foreach (var siblingArg in arguments)
            {
                if (siblingArg != argumentNode)
                {
                    var expr = syntaxFacts.GetExpressionOfArgument(siblingArg);
                    if (expr != null)
                    {
                        var exprType = semanticModel.GetTypeInfo(expr, cancellationToken);
                        if (exprType.Type?.Name == nameof(RegexOptions))
                        {
                            var constVal = semanticModel.GetConstantValue(expr, cancellationToken);
                            if (constVal.HasValue)
                            {
                                return (RegexOptions)(int)constVal.Value;
                            }
                        }
                    }
                }
            }

            return RegexOptions.None;
        }

        private string GetNameOfType(SyntaxNode typeNode, ISyntaxFacts syntaxFacts)
        {
            if (syntaxFacts.IsQualifiedName(typeNode))
            {
                return GetNameOfType(syntaxFacts.GetRightSideOfDot(typeNode), syntaxFacts);
            }
            else if (syntaxFacts.IsIdentifierName(typeNode))
            {
                return syntaxFacts.GetIdentifierOfSimpleName(typeNode).ValueText;
            }

            return null;
        }

        private string GetNameOfInvokedExpression(SyntaxNode invokedExpression)
        {
            var syntaxFacts = _info.SyntaxFacts;
            if (syntaxFacts.IsSimpleMemberAccessExpression(invokedExpression))
            {
                return syntaxFacts.GetIdentifierOfSimpleName(syntaxFacts.GetNameOfMemberAccessExpression(invokedExpression)).ValueText;
            }
            else if (syntaxFacts.IsIdentifierName(invokedExpression))
            {
                return syntaxFacts.GetIdentifierOfSimpleName(invokedExpression).ValueText;
            }

            return null;
        }

        internal static class TestAccessor
        {
            public static (bool success, RegexOptions options) TryMatch(string text)
                => RegexPatternDetector.TryMatch(text);
        }
    }
}
