//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2026 Weaviate B.V. All rights reserved.
//
//  CONTACT: hello@weaviate.io
//

package db

import (
	"context"
	"fmt"
	"strings"
	"testing"
	"time"

	"github.com/go-openapi/strfmt"
	"github.com/sirupsen/logrus/hooks/test"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/mock"
	"github.com/stretchr/testify/require"

	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
	"github.com/weaviate/weaviate/adapters/repos/db/inverted"
	"github.com/weaviate/weaviate/adapters/repos/db/queue"
	"github.com/weaviate/weaviate/adapters/repos/db/roaringset"
	resolver "github.com/weaviate/weaviate/adapters/repos/db/sharding"
	"github.com/weaviate/weaviate/cluster/router/types"
	"github.com/weaviate/weaviate/entities/loadlimiter"
	"github.com/weaviate/weaviate/entities/models"
	"github.com/weaviate/weaviate/entities/replication"
	"github.com/weaviate/weaviate/entities/schema"
	"github.com/weaviate/weaviate/entities/storobj"
	enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
	"github.com/weaviate/weaviate/usecases/monitoring"
	schemaUC "github.com/weaviate/weaviate/usecases/schema"
	"github.com/weaviate/weaviate/usecases/sharding"
)

func TestIndex_ObjectStorageSize_Comprehensive(t *testing.T) {
	ctx := context.Background()
	dirName := t.TempDir()
	logger, _ := test.NewNullLogger()

	tests := []struct {
		name                   string
		className              string
		shardName              string
		objectCount            int
		objectSize             int // approximate size in bytes per object
		expectedObjectCount    int
		expectedStorageSizeMin int64 // minimum expected storage size
		expectedStorageSizeMax int64 // maximum expected storage size (allowing for overhead)
		setupData              bool
		description            string
	}{
		{
			name:        "empty shard",
			className:   "TestClass",
			shardName:   "test-shard-empty",
			setupData:   false,
			description: "Empty shard should have zero storage size",
		},
		{
			name:                   "shard with small objects",
			className:              "TestClass",
			shardName:              "test-shard-small",
			objectCount:            10,
			objectSize:             100, // ~100 bytes per object
			expectedObjectCount:    10,
			expectedStorageSizeMin: int64(10 * 100),     // minimum: just the data
			expectedStorageSizeMax: int64(10 * 100 * 5), // maximum: data + overhead (increased to 5x)
			setupData:              true,
			description:            "Shard with small objects should have proportional storage size",
		},
		{
			name:                   "shard with medium objects",
			className:              "TestClass",
			shardName:              "test-shard-medium",
			objectCount:            50,
			objectSize:             500, // ~500 bytes per object
			expectedObjectCount:    50,
			expectedStorageSizeMin: int64(50 * 500),     // minimum: just the data
			expectedStorageSizeMax: int64(50 * 500 * 3), // maximum: data + overhead
			setupData:              true,
			description:            "Shard with medium objects should have proportional storage size",
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			// Create sharding state
			shardState := &sharding.State{
				Physical: map[string]sharding.Physical{
					tt.shardName: {
						Name:           tt.shardName,
						BelongsToNodes: []string{"test-node"},
						Status:         models.TenantActivityStatusHOT,
					},
				},
			}
			shardState.SetLocalName("test-node")
			// Create test class
			class := &models.Class{
				Class: tt.className,
				Properties: []*models.Property{
					{
						Name:         "name",
						DataType:     schema.DataTypeText.PropString(),
						Tokenization: models.PropertyTokenizationWhitespace,
					},
					{
						Name:         "description",
						DataType:     schema.DataTypeText.PropString(),
						Tokenization: models.PropertyTokenizationWhitespace,
					},
					{
						Name:     "count",
						DataType: schema.DataTypeInt.PropString(),
					},
				},
				InvertedIndexConfig: &models.InvertedIndexConfig{},
				MultiTenancyConfig: &models.MultiTenancyConfig{
					Enabled: shardState.PartitioningEnabled,
				},
			}

			// Create fake schema
			fakeSchema := schema.Schema{
				Objects: &models.Schema{
					Classes: []*models.Class{class},
				},
			}

			// Create scheduler
			scheduler := queue.NewScheduler(queue.SchedulerOptions{
				Logger:  logger,
				Workers: 1,
			})

			mockSchemaReader := schemaUC.NewMockSchemaReader(t)
			mockSchemaReader.EXPECT().Read(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(className string, retryIfClassNotFound bool, readerFunc func(*models.Class, *sharding.State) error) error {
				return readerFunc(class, shardState)
			}).Maybe()

			// Create mock schema getter
			mockSchema := schemaUC.NewMockSchemaGetter(t)
			mockSchema.EXPECT().GetSchemaSkipAuth().Maybe().Return(fakeSchema)
			mockSchema.EXPECT().ReadOnlyClass(tt.className).Maybe().Return(class)
			mockSchemaReader.EXPECT().Read(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(className string, retryIfClassNotFound bool, readerFunc func(*models.Class, *sharding.State) error) error {
				return readerFunc(class, shardState)
			}).Maybe()
			mockSchema.EXPECT().NodeName().Maybe().Return("test-node")
			mockSchema.EXPECT().ShardFromUUID("TestClass", mock.Anything).Return(tt.shardName).Maybe()
			mockSchema.EXPECT().ShardOwner(tt.className, tt.shardName).Maybe().Return("test-node", nil)

			mockRouter := types.NewMockRouter(t)
			mockRouter.EXPECT().GetWriteReplicasLocation(tt.className, mock.Anything, tt.shardName).
				Return(types.WriteReplicaSet{Replicas: []types.Replica{{NodeName: "test-node", ShardName: tt.shardName, HostAddr: "110.12.15.23"}}}, nil).Maybe()
			shardResolver := resolver.NewShardResolver(class.Class, class.MultiTenancyConfig.Enabled, mockSchema)
			// Create index
			index, err := NewIndex(ctx, IndexConfig{
				RootPath:              dirName,
				ClassName:             schema.ClassName(tt.className),
				ReplicationFactor:     1,
				ShardLoadLimiter:      loadlimiter.NewLoadLimiter(monitoring.NoopRegisterer, "dummy", 1),
				TrackVectorDimensions: true,
			}, inverted.ConfigFromModel(class.InvertedIndexConfig),
				enthnsw.UserConfig{
					VectorCacheMaxObjects: 1000,
				}, nil, mockRouter, shardResolver, mockSchema, mockSchemaReader, nil, logger, nil, nil, nil, &replication.GlobalConfig{}, nil, class, nil, scheduler, nil, nil, NewShardReindexerV3Noop(), roaringset.NewBitmapBufPoolNoop(), false)
			require.NoError(t, err)
			defer index.Shutdown(ctx)

			// Add properties
			for _, prop := range class.Properties {
				err = index.addProperty(ctx, prop)
				require.NoError(t, err)
			}

			if tt.setupData {
				// Create objects with varying sizes
				for i := 0; i < tt.objectCount; i++ {
					// Create object with properties that approximate the desired size
					obj := &models.Object{
						Class: tt.className,
						ID:    strfmt.UUID(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)),
						Properties: map[string]interface{}{
							"name":        fmt.Sprintf("test-object-%d", i),
							"description": generateStringOfSize(tt.objectSize - 50), // Leave room for other properties
							"count":       i,
						},
					}
					storageObj := storobj.FromObject(obj, nil, nil, nil)
					err := index.putObject(ctx, storageObj, nil, obj.Tenant, 0)
					require.NoError(t, err)
				}

				// Wait for indexing to complete
				time.Sleep(2 * time.Second)

				// Test object storage size
				shard, release, err := index.GetShard(ctx, tt.shardName)
				require.NoError(t, err)
				require.NotNil(t, shard)
				defer release()

				lazyShard, ok := shard.(*LazyLoadShard)
				require.True(t, ok)
				require.NoError(t, lazyShard.Load(ctx))

				objectStorageSize, err := lazyShard.shard.ObjectStorageSize(ctx)
				require.NoError(t, err)
				objectCount, err := lazyShard.shard.ObjectCount(ctx)
				require.NoError(t, err)

				// Verify object count
				assert.Equal(t, tt.expectedObjectCount, objectCount, "Object count should match expected")

				// Verify storage size is within expected range
				assert.GreaterOrEqual(t, objectStorageSize, tt.expectedStorageSizeMin,
					"Storage size should be at least the minimum expected size")
				assert.LessOrEqual(t, objectStorageSize, tt.expectedStorageSizeMax,
					"Storage size should not exceed the maximum expected size")

			} else {
				// Test empty shard
				shard, release, err := index.GetShard(ctx, tt.shardName)
				require.NoError(t, err)
				require.NotNil(t, shard)
				defer release()

				lazyShard, ok := shard.(*LazyLoadShard)
				require.True(t, ok)
				require.NoError(t, lazyShard.Load(ctx))

				objectStorageSize, err := lazyShard.shard.ObjectStorageSize(ctx)
				require.NoError(t, err)
				objectCount, err := shard.ObjectCount(ctx)
				require.NoError(t, err)

				assert.Equal(t, tt.expectedObjectCount, objectCount, "Empty shard should have 0 objects")
				assert.Equal(t, tt.expectedStorageSizeMin, objectStorageSize, "Empty shard should have 0 storage size")
			}
			mockSchema.AssertExpectations(t)
		})
	}
}

func TestIndex_CalculateUnloadedObjectsMetrics_ActiveVsUnloaded(t *testing.T) {
	ctx := context.Background()
	dirName := t.TempDir()
	logger, _ := test.NewNullLogger()

	className := "TestClass"
	tenantName := "test-tenant"
	tenantNamePopulated := "test-tenant"
	tenantNameEmpty := "empty-tenant"
	objectCount := 50
	objectSize := 500 // ~500 bytes per object

	// Create sharding state with multi-tenancy enabled
	shardState := &sharding.State{
		Physical: map[string]sharding.Physical{
			tenantName: {
				Name:           tenantName,
				BelongsToNodes: []string{"test-node"},
				Status:         models.TenantActivityStatusHOT,
			},
		},
		PartitioningEnabled: true,
	}
	shardState.SetLocalName("test-node")

	// Create test class with multi-tenancy enabled
	class := &models.Class{
		Class: className,
		Properties: []*models.Property{
			{
				Name:         "name",
				DataType:     schema.DataTypeText.PropString(),
				Tokenization: models.PropertyTokenizationWhitespace,
			},
			{
				Name:         "description",
				DataType:     schema.DataTypeText.PropString(),
				Tokenization: models.PropertyTokenizationWhitespace,
			},
		},
		InvertedIndexConfig: &models.InvertedIndexConfig{},
		MultiTenancyConfig: &models.MultiTenancyConfig{
			Enabled: shardState.PartitioningEnabled,
		},
	}

	// Create fake schema
	fakeSchema := schema.Schema{
		Objects: &models.Schema{
			Classes: []*models.Class{class},
		},
	}

	shardState.SetLocalName("test-node")

	// Create scheduler
	scheduler := queue.NewScheduler(queue.SchedulerOptions{
		Logger:  logger,
		Workers: 1,
	})

	mockSchemaReader := schemaUC.NewMockSchemaReader(t)
	mockSchemaReader.EXPECT().Read(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(className string, retryIfClassNotFound bool, readerFunc func(*models.Class, *sharding.State) error) error {
		return readerFunc(class, shardState)
	}).Maybe()

	// Create mock schema getter
	mockSchema := schemaUC.NewMockSchemaGetter(t)
	mockSchema.EXPECT().GetSchemaSkipAuth().Maybe().Return(fakeSchema)
	mockSchema.EXPECT().ReadOnlyClass(className).Maybe().Return(class)
	mockSchemaReader.EXPECT().Read(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(className string, retryIfClassNotFound bool, readerFunc func(*models.Class, *sharding.State) error) error {
		return readerFunc(class, shardState)
	}).Maybe()
	mockSchema.EXPECT().NodeName().Maybe().Return("test-node")
	mockSchema.EXPECT().ShardOwner(className, tenantNamePopulated).Maybe().Return("test-node", nil)
	mockSchema.EXPECT().TenantsShards(ctx, className, tenantNamePopulated).Maybe().
		Return(map[string]string{tenantNamePopulated: models.TenantActivityStatusHOT}, nil)

	mockRouter := types.NewMockRouter(t)
	mockRouter.EXPECT().GetWriteReplicasLocation(className, mock.Anything, tenantName).
		Return(types.WriteReplicaSet{Replicas: []types.Replica{{NodeName: "test-node", ShardName: tenantName, HostAddr: "110.12.15.23"}}}, nil).Maybe()
	shardResolver := resolver.NewShardResolver(class.Class, class.MultiTenancyConfig.Enabled, mockSchema)
	// Create index with lazy loading disabled to test active calculation methods
	index, err := NewIndex(ctx, IndexConfig{
		RootPath:              dirName,
		ClassName:             schema.ClassName(className),
		ReplicationFactor:     1,
		ShardLoadLimiter:      loadlimiter.NewLoadLimiter(monitoring.NoopRegisterer, "dummy", 1),
		TrackVectorDimensions: true,
		DisableLazyLoadShards: true, // we have to make sure lazyload shard disabled to load directly
	}, inverted.ConfigFromModel(class.InvertedIndexConfig),
		enthnsw.UserConfig{
			VectorCacheMaxObjects: 1000,
		}, nil, nil, shardResolver, mockSchema, mockSchemaReader, nil, logger, nil, nil, nil, &replication.GlobalConfig{}, nil, class, nil, scheduler, nil, nil, NewShardReindexerV3Noop(), roaringset.NewBitmapBufPoolNoop(), false)
	require.NoError(t, err)

	// Add properties
	for _, prop := range class.Properties {
		err = index.addProperty(ctx, prop)
		require.NoError(t, err)
	}

	// Add test objects
	for i := range objectCount {
		obj := &models.Object{
			Class:  className,
			ID:     strfmt.UUID(fmt.Sprintf("00000000-0000-0000-0000-%012d", i)),
			Tenant: tenantNamePopulated,
			Properties: map[string]interface{}{
				"name":        fmt.Sprintf("test-object-%d", i),
				"description": generateStringOfSize(objectSize - 50), // Leave room for other properties
			},
		}
		storageObj := storobj.FromObject(obj, nil, nil, nil)
		err := index.putObject(ctx, storageObj, nil, obj.Tenant, 0)
		require.NoError(t, err)
	}

	// Wait for indexing to complete
	time.Sleep(1 * time.Second)

	// Test active shard object storage size
	activeShard, release, err := index.GetShard(ctx, tenantNamePopulated)
	require.NoError(t, err)
	require.NotNil(t, activeShard)

	// Force flush to ensure .cna files are created
	objectsBucket := activeShard.Store().Bucket(helpers.ObjectsBucketLSM)
	require.NotNil(t, objectsBucket)
	require.NoError(t, objectsBucket.FlushMemtable())

	loadedShard, ok := activeShard.(*Shard)
	require.True(t, ok)

	activeObjectStorageSize, err := loadedShard.ObjectStorageSize(ctx)
	require.NoError(t, err)
	activeObjectCount, err := loadedShard.ObjectCount(ctx)
	require.NoError(t, err)
	assert.Greater(t, activeObjectStorageSize, int64(0), "Active shard calculation should have object storage size > 0")

	// Test that active calculations are correct
	assert.Equal(t, objectCount, activeObjectCount, "Active shard object count should match")
	assert.Greater(t, activeObjectStorageSize, int64(objectCount*objectSize/2), "Active object storage size should be reasonable")

	// Release the shard (this will flush all data to disk)
	release()

	// Explicitly shutdown all shards to ensure data is flushed to disk
	err = index.ForEachShard(func(name string, shard ShardLike) error {
		return shard.Shutdown(ctx)
	})
	require.NoError(t, err)

	// Wait a bit for all shards to complete shutdown and data to be flushed
	time.Sleep(1 * time.Second)

	// Unload the shard from memory to test inactive calculation methods
	index.shards.LoadAndDelete(tenantNamePopulated)

	// Shut down the entire index to ensure all store metadata is persisted
	require.NoError(t, index.Shutdown(ctx))
	// Create a new index instance to test inactive calculation methods
	// This ensures we're testing the inactive methods on a fresh index that reads from disk
	newIndex, err := NewIndex(ctx, IndexConfig{
		RootPath:              dirName,
		ClassName:             schema.ClassName(className),
		ReplicationFactor:     1,
		ShardLoadLimiter:      loadlimiter.NewLoadLimiter(monitoring.NoopRegisterer, "dummy", 1),
		TrackVectorDimensions: true,
		DisableLazyLoadShards: false, // we have to make sure lazyload enabled
	}, inverted.ConfigFromModel(class.InvertedIndexConfig),
		enthnsw.UserConfig{
			VectorCacheMaxObjects: 1000,
		}, index.GetVectorIndexConfigs(), nil, shardResolver, mockSchema, mockSchemaReader, nil, logger, nil, nil, nil, &replication.GlobalConfig{}, nil, class, nil, scheduler, nil, nil, NewShardReindexerV3Noop(), roaringset.NewBitmapBufPoolNoop(), false)
	require.NoError(t, err)
	defer newIndex.Shutdown(ctx)

	// Explicitly shutdown all shards to ensure data is flushed to disk
	require.NoError(t, newIndex.ForEachShard(func(name string, shard ShardLike) error {
		return shard.Shutdown(ctx)
	}))
	newIndex.shards.LoadAndDelete(tenantNamePopulated)

	usage, err := newIndex.usageForCollection(ctx, time.Nanosecond, true, class.VectorConfig)
	require.NoError(t, err)

	for _, shardUsage := range usage.Shards {
		if shardUsage.Name == tenantNamePopulated {
			assert.Equal(t, int64(activeObjectCount), shardUsage.ObjectsCount, "Active and inactive object count should match")
			assert.InDelta(t, activeObjectStorageSize, shardUsage.ObjectsStorageBytes, 1024, "Active and inactive object storage size should be close")
		} else {
			assert.Equal(t, tenantNameEmpty, shardUsage.Name)
			assert.Equal(t, int64(0), shardUsage.ObjectsCount)
			assert.Equal(t, uint64(0), shardUsage.ObjectsStorageBytes)

		}
	}

	// Verify all mock expectations were met
	mockSchema.AssertExpectations(t)
}

// Helper function to generate a string of approximately the given size
func generateStringOfSize(size int) string {
	if size <= 0 {
		return ""
	}

	// Use a repeating pattern to create a string of approximately the desired size
	pattern := "abcdefghijklmnopqrstuvwxyz0123456789"
	result := strings.Repeat(pattern, size/len(pattern))
	if remainder := size % len(pattern); remainder > 0 {
		result += pattern[:remainder]
	}
	return result
}
