mirror of
https://github.com/unclshura/splitter.git
synced 2026-06-21 16:12:01 +00:00
74 lines
1.6 KiB
C#
74 lines
1.6 KiB
C#
namespace splitter.algo;
|
|
|
|
public sealed class IdentityCache
|
|
{
|
|
private sealed class Identity
|
|
{
|
|
public ulong Id;
|
|
public float[] Embedding = null!; // EMA
|
|
public int Samples;
|
|
}
|
|
|
|
private readonly List<Identity> _ids = new();
|
|
private ulong _nextId = 1;
|
|
|
|
private const float _emaAlpha = 0.2f;
|
|
|
|
public ulong ResolveId(float[] embedding, float threshold)
|
|
{
|
|
if (_ids.Count == 0)
|
|
return CreateNew(embedding);
|
|
|
|
int bestIndex = -1;
|
|
float bestDist = float.MaxValue;
|
|
|
|
for (int i = 0; i < _ids.Count; i++)
|
|
{
|
|
float d = CosineDistance(_ids[i].Embedding, embedding);
|
|
if (d < bestDist)
|
|
{
|
|
bestDist = d;
|
|
bestIndex = i;
|
|
}
|
|
}
|
|
|
|
if (bestDist <= threshold)
|
|
{
|
|
UpdateEma(_ids[bestIndex].Embedding, embedding);
|
|
_ids[bestIndex].Samples++;
|
|
return _ids[bestIndex].Id;
|
|
}
|
|
|
|
return CreateNew(embedding);
|
|
}
|
|
|
|
private ulong CreateNew(float[] embedding)
|
|
{
|
|
var id = _nextId++;
|
|
|
|
_ids.Add(new Identity
|
|
{
|
|
Id = id,
|
|
Embedding = embedding.ToArray(),
|
|
Samples = 1
|
|
});
|
|
|
|
return id;
|
|
}
|
|
|
|
private static float CosineDistance(float[] a, float[] b)
|
|
{
|
|
float dot = 0f;
|
|
for (int i = 0; i < a.Length; i++)
|
|
dot += a[i] * b[i];
|
|
|
|
return 1f - dot;
|
|
}
|
|
|
|
private static void UpdateEma(float[] ema, float[] v)
|
|
{
|
|
for (int i = 0; i < ema.Length; i++)
|
|
ema[i] = ema[i] * (1 - _emaAlpha) + v[i] * _emaAlpha;
|
|
}
|
|
}
|