//  Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// 		http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package regexp

import (
	"regexp/syntax"
	"unicode"

	unicode_utf8 "unicode/utf8"

	"github.com/couchbase/vellum/utf8"
)

type compiler struct {
	sizeLimit uint
	insts     prog
	instsPool []inst

	sequences  utf8.Sequences
	rangeStack utf8.RangeStack
	startBytes []byte
	endBytes   []byte
}

func newCompiler(sizeLimit uint) *compiler {
	return &compiler{
		sizeLimit:  sizeLimit,
		startBytes: make([]byte, unicode_utf8.UTFMax),
		endBytes:   make([]byte, unicode_utf8.UTFMax),
	}
}

func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
	err := c.c(ast)
	if err != nil {
		return nil, err
	}
	inst := c.allocInst()
	inst.op = OpMatch
	c.insts = append(c.insts, inst)
	return c.insts, nil
}

func (c *compiler) c(ast *syntax.Regexp) (err error) {
	if ast.Flags&syntax.NonGreedy > 1 {
		return ErrNoLazy
	}

	switch ast.Op {
	case syntax.OpEndLine, syntax.OpBeginLine,
		syntax.OpBeginText, syntax.OpEndText:
		return ErrNoEmpty
	case syntax.OpWordBoundary, syntax.OpNoWordBoundary:
		return ErrNoWordBoundary
	case syntax.OpEmptyMatch:
		return nil
	case syntax.OpLiteral:
		for _, r := range ast.Rune {
			if ast.Flags&syntax.FoldCase > 0 {
				next := syntax.Regexp{
					Op:    syntax.OpCharClass,
					Flags: ast.Flags & syntax.FoldCase,
					Rune0: [2]rune{r, r},
				}
				next.Rune = next.Rune0[0:2]
				return c.c(&next)
			}
			c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
				r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
			if err != nil {
				return err
			}
			for _, seq := range c.sequences {
				c.compileUtf8Ranges(seq)
			}
		}
	case syntax.OpAnyChar:
		next := syntax.Regexp{
			Op:    syntax.OpCharClass,
			Flags: ast.Flags & syntax.FoldCase,
			Rune0: [2]rune{0, unicode.MaxRune},
		}
		next.Rune = next.Rune0[:2]
		return c.c(&next)
	case syntax.OpAnyCharNotNL:
		next := syntax.Regexp{
			Op:    syntax.OpCharClass,
			Flags: ast.Flags & syntax.FoldCase,
			Rune:  []rune{0, 0x09, 0x0B, unicode.MaxRune},
		}
		return c.c(&next)
	case syntax.OpCharClass:
		return c.compileClass(ast)
	case syntax.OpCapture:
		return c.c(ast.Sub[0])
	case syntax.OpConcat:
		for _, sub := range ast.Sub {
			err := c.c(sub)
			if err != nil {
				return err
			}
		}
		return nil
	case syntax.OpAlternate:
		if len(ast.Sub) == 0 {
			return nil
		}
		jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
		// does not handle last entry
		for i := 0; i < len(ast.Sub)-1; i++ {
			sub := ast.Sub[i]
			split := c.emptySplit()
			j1 := c.top()
			err := c.c(sub)
			if err != nil {
				return err
			}
			jmpsToEnd = append(jmpsToEnd, c.emptyJump())
			j2 := c.top()
			c.setSplit(split, j1, j2)
		}
		// handle last entry
		err := c.c(ast.Sub[len(ast.Sub)-1])
		if err != nil {
			return err
		}
		end := uint(len(c.insts))
		for _, jmpToEnd := range jmpsToEnd {
			c.setJump(jmpToEnd, end)
		}
	case syntax.OpQuest:
		split := c.emptySplit()
		j1 := c.top()
		err := c.c(ast.Sub[0])
		if err != nil {
			return err
		}
		j2 := c.top()
		c.setSplit(split, j1, j2)

	case syntax.OpStar:
		j1 := c.top()
		split := c.emptySplit()
		j2 := c.top()
		err := c.c(ast.Sub[0])
		if err != nil {
			return err
		}
		jmp := c.emptyJump()
		j3 := uint(len(c.insts))

		c.setJump(jmp, j1)
		c.setSplit(split, j2, j3)

	case syntax.OpPlus:
		j1 := c.top()
		err := c.c(ast.Sub[0])
		if err != nil {
			return err
		}
		split := c.emptySplit()
		j2 := c.top()
		c.setSplit(split, j1, j2)

	case syntax.OpRepeat:
		if ast.Max == -1 {
			for i := 0; i < ast.Min; i++ {
				err := c.c(ast.Sub[0])
				if err != nil {
					return err
				}
			}
			next := syntax.Regexp{
				Op:    syntax.OpStar,
				Flags: ast.Flags,
				Sub:   ast.Sub,
				Sub0:  ast.Sub0,
				Rune:  ast.Rune,
				Rune0: ast.Rune0,
			}
			return c.c(&next)
		}
		for i := 0; i < ast.Min; i++ {
			err := c.c(ast.Sub[0])
			if err != nil {
				return err
			}
		}
		splits := make([]uint, 0, ast.Max-ast.Min)
		starts := make([]uint, 0, ast.Max-ast.Min)
		for i := ast.Min; i < ast.Max; i++ {
			splits = append(splits, c.emptySplit())
			starts = append(starts, uint(len(c.insts)))
			err := c.c(ast.Sub[0])
			if err != nil {
				return err
			}
		}
		end := uint(len(c.insts))
		for i := 0; i < len(splits); i++ {
			c.setSplit(splits[i], starts[i], end)
		}

	}

	return c.checkSize()
}

func (c *compiler) checkSize() error {
	if uint(len(c.insts)*instSize) > c.sizeLimit {
		return ErrCompiledTooBig
	}
	return nil
}

func (c *compiler) compileClass(ast *syntax.Regexp) error {
	if len(ast.Rune) == 0 {
		return nil
	}
	jmps := make([]uint, 0, len(ast.Rune)-2)
	// does not do last pair
	for i := 0; i < len(ast.Rune)-2; i += 2 {
		rstart := ast.Rune[i]
		rend := ast.Rune[i+1]

		split := c.emptySplit()
		j1 := c.top()
		err := c.compileClassRange(rstart, rend)
		if err != nil {
			return err
		}
		jmps = append(jmps, c.emptyJump())
		j2 := c.top()
		c.setSplit(split, j1, j2)
	}
	// handle last pair
	rstart := ast.Rune[len(ast.Rune)-2]
	rend := ast.Rune[len(ast.Rune)-1]
	err := c.compileClassRange(rstart, rend)
	if err != nil {
		return err
	}
	end := c.top()
	for _, jmp := range jmps {
		c.setJump(jmp, end)
	}
	return nil
}

func (c *compiler) compileClassRange(startR, endR rune) (err error) {
	c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
		startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
	if err != nil {
		return err
	}
	jmps := make([]uint, 0, len(c.sequences)-1)
	// does not do last entry
	for i := 0; i < len(c.sequences)-1; i++ {
		seq := c.sequences[i]
		split := c.emptySplit()
		j1 := c.top()
		c.compileUtf8Ranges(seq)
		jmps = append(jmps, c.emptyJump())
		j2 := c.top()
		c.setSplit(split, j1, j2)
	}
	// handle last entry
	c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
	end := c.top()
	for _, jmp := range jmps {
		c.setJump(jmp, end)
	}

	return nil
}

func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
	for _, r := range seq {
		inst := c.allocInst()
		inst.op = OpRange
		inst.rangeStart = r.Start
		inst.rangeEnd = r.End
		c.insts = append(c.insts, inst)
	}
}

func (c *compiler) emptySplit() uint {
	inst := c.allocInst()
	inst.op = OpSplit
	c.insts = append(c.insts, inst)
	return c.top() - 1
}

func (c *compiler) emptyJump() uint {
	inst := c.allocInst()
	inst.op = OpJmp
	c.insts = append(c.insts, inst)
	return c.top() - 1
}

func (c *compiler) setSplit(i, pc1, pc2 uint) {
	split := c.insts[i]
	split.splitA = pc1
	split.splitB = pc2
}

func (c *compiler) setJump(i, pc uint) {
	jmp := c.insts[i]
	jmp.to = pc
}

func (c *compiler) top() uint {
	return uint(len(c.insts))
}

func (c *compiler) allocInst() *inst {
	if len(c.instsPool) <= 0 {
		c.instsPool = make([]inst, 16)
	}
	inst := &c.instsPool[0]
	c.instsPool = c.instsPool[1:]
	return inst
}