Files
PdfMarker_Web/PdfMarker/Services/QuotaClusterer.cs
2026-01-23 09:57:52 +01:00

77 lines
1.9 KiB
C#

using PdfMarker.Models;
namespace PdfMarker.Services;
public class QuotaClusterer
{
// distanza massima tra quote per essere considerate la stessa feature
private const double MaxDistance = 0.05; // 5% pagina
public List<FeatureCandidate> Cluster(List<QuotaCandidate> quotes)
{
var features = new List<FeatureCandidate>();
var used = new HashSet<QuotaCandidate>();
foreach (var q in quotes)
{
if (used.Contains(q))
continue;
var cluster = new List<QuotaCandidate> { q };
used.Add(q);
foreach (var other in quotes)
{
if (used.Contains(other))
continue;
if (IsSameFeature(q, other))
{
cluster.Add(other);
used.Add(other);
}
}
features.Add(BuildFeature(cluster));
}
return features;
}
private bool IsSameFeature(QuotaCandidate a, QuotaCandidate b)
{
// stesso tipo → molto importante
if (a.Type != b.Type)
return false;
// orientamento compatibile
if (a.IsHorizontal != b.IsHorizontal ||
a.IsVertical != b.IsVertical)
return false;
// distanza
var dx = a.X - b.X;
var dy = a.Y - b.Y;
var distance = Math.Sqrt(dx * dx + dy * dy);
return distance < MaxDistance;
}
private FeatureCandidate BuildFeature(List<QuotaCandidate> cluster)
{
var centerX = cluster.Average(q => q.X);
var centerY = cluster.Average(q => q.Y);
var confidence = cluster.Average(q => q.Confidence);
return new FeatureCandidate
{
Type = cluster.First().Type,
Quotes = cluster,
CenterX = centerX,
CenterY = centerY,
Confidence = confidence
};
}
}