// Copyright 2021 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package typesniffer

import (
	"encoding/base64"
	"encoding/hex"
	"net/http"
	"strings"
	"testing"

	"github.com/stretchr/testify/assert"
)

func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
	// Pre-condition: Shorter than sniffLen detects SVG.
	assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
	// Longer than sniffLen detects something else.
	assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", SniffContentSize)+` --><svg></svg>`)).contentType)
}

func TestIsTextFile(t *testing.T) {
	assert.True(t, DetectContentType([]byte{}).IsText())
	assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
}

func TestIsSvgImage(t *testing.T) {
	assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
	assert.True(t, DetectContentType([]byte("    <svg></svg>")).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<!-- Comment -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
	<!-- Comments -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<!-- Multiline
	Comment -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
	"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
	<!-- Comment -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
	<!-- Multiple -->
	<!-- Comments -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
	<!-- Multiline
	Comment -->
	<svg></svg>`)).IsSvgImage())
	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
	<!-- Multiline
	Comment -->
	<svg></svg>`)).IsSvgImage())

	// the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
	assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())

	assert.False(t, DetectContentType([]byte{}).IsSvgImage())
	assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
	assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
	assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
	assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
	assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
	assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
	<foo></foo>`)).IsSvgImage())
	assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
	<!-- <svg></svg> inside comment -->
	<foo></foo>`)).IsSvgImage())

	assert.False(t, DetectContentType([]byte(`
<!-- comment1 -->
<div>
	<!-- comment2 -->
	<svg></svg>
</div>
`)).IsSvgImage())

	assert.False(t, DetectContentType([]byte(`
<!-- comment1
-->
<div>
	<!-- comment2
-->
	<svg></svg>
</div>
`)).IsSvgImage())
	assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
	assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
}

func TestIsPDF(t *testing.T) {
	pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
	assert.True(t, DetectContentType(pdf).IsPDF())
	assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
}

func TestIsVideo(t *testing.T) {
	mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
	assert.True(t, DetectContentType(mp4).IsVideo())
	assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
}

func TestIsAudio(t *testing.T) {
	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
	assert.True(t, DetectContentType(mp3).IsAudio())
	assert.False(t, DetectContentType([]byte("plain text")).IsAudio())

	assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText())          // test ID3 tag for plain text
	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
}

func TestDetectContentTypeOgg(t *testing.T) {
	oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
	st := DetectContentType(oggAudio)
	assert.True(t, st.IsAudio())

	oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
	st = DetectContentType(oggVideo)
	assert.True(t, st.IsVideo())
}

func TestDetectFileTypeBox(t *testing.T) {
	_, found := detectFileTypeBox([]byte("\x00\x00\xff\xffftypAAAA...."))
	assert.False(t, found)

	brands, found := detectFileTypeBox([]byte("\x00\x00\x00\x0cftypAAAA"))
	assert.True(t, found)
	assert.Equal(t, []string{"AAAA"}, brands)

	brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x10ftypAAAA....BBBB"))
	assert.True(t, found)
	assert.Equal(t, []string{"AAAA"}, brands)

	brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x14ftypAAAA....BBBB"))
	assert.True(t, found)
	assert.Equal(t, []string{"AAAA", "BBBB"}, brands)

	_, found = detectFileTypeBox([]byte("\x00\x00\x00\x14ftypAAAA....BBB"))
	assert.False(t, found)

	brands, found = detectFileTypeBox([]byte("\x00\x00\x00\x13ftypAAAA....BBB"))
	assert.True(t, found)
	assert.Equal(t, []string{"AAAA"}, brands)
}

func TestDetectContentTypeAvif(t *testing.T) {
	buf := []byte("\x00\x00\x00\x20ftypavif.......................")
	st := DetectContentType(buf)
	assert.Equal(t, MimeTypeImageAvif, st.contentType)
}

func TestDetectContentTypeIncorrectFont(t *testing.T) {
	s := "Stupid Golang keep detecting 34th LP as font"
	// They don't want to have any improvement to it: https://github.com/golang/go/issues/77172
	golangDetected := http.DetectContentType([]byte(s))
	assert.Equal(t, "application/vnd.ms-fontobject", golangDetected)
	// We have to make our patch to make it work correctly
	ourDetected := DetectContentType([]byte(s))
	assert.Equal(t, "text/plain; charset=utf-8", ourDetected.contentType)

	// For binary content, ensure it still detects as font. The content is from "opensans-regular.eot"
	b := []byte{
		0x3d, 0x30, 0x00, 0x00, 0x6b, 0x2f, 0x00, 0x00, 0x02, 0x00, 0x02, 0x00, 0x04, 0x00, 0x00, 0x00,
		0x02, 0x0b, 0x06, 0x06, 0x03, 0x05, 0x04, 0x02, 0x02, 0x04, 0x01, 0x00, 0x90, 0x01, 0x00, 0x00,
		0x04, 0x00, 0x4c, 0x50, 0xef, 0x02, 0x00, 0xe0, 0x5b, 0x20, 0x00, 0x40, 0x28, 0x00, 0x00, 0x00,
		0x00, 0x00, 0x00, 0x00, 0x9f, 0x01, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x63, 0xf4, 0x17, 0x14,
		0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
		0x00, 0x00, 0x12, 0x00, 0x4f, 0x00, 0x70, 0x00, 0x65, 0x00, 0x6e, 0x00, 0x20, 0x00, 0x53, 0x00,
	}
	assert.Equal(t, "application/vnd.ms-fontobject", http.DetectContentType(b))
	assert.Equal(t, "application/vnd.ms-fontobject", DetectContentType(b).contentType)
}
