77 lines
1.9 KiB
C#
77 lines
1.9 KiB
C#
using PdfMarker.Models;
|
|
|
|
namespace PdfMarker.Services;
|
|
|
|
public class QuotaClusterer
|
|
{
|
|
// distanza massima tra quote per essere considerate la stessa feature
|
|
private const double MaxDistance = 0.05; // 5% pagina
|
|
|
|
public List<FeatureCandidate> Cluster(List<QuotaCandidate> quotes)
|
|
{
|
|
var features = new List<FeatureCandidate>();
|
|
var used = new HashSet<QuotaCandidate>();
|
|
|
|
foreach (var q in quotes)
|
|
{
|
|
if (used.Contains(q))
|
|
continue;
|
|
|
|
var cluster = new List<QuotaCandidate> { q };
|
|
used.Add(q);
|
|
|
|
foreach (var other in quotes)
|
|
{
|
|
if (used.Contains(other))
|
|
continue;
|
|
|
|
if (IsSameFeature(q, other))
|
|
{
|
|
cluster.Add(other);
|
|
used.Add(other);
|
|
}
|
|
}
|
|
|
|
features.Add(BuildFeature(cluster));
|
|
}
|
|
|
|
return features;
|
|
}
|
|
|
|
private bool IsSameFeature(QuotaCandidate a, QuotaCandidate b)
|
|
{
|
|
// stesso tipo → molto importante
|
|
if (a.Type != b.Type)
|
|
return false;
|
|
|
|
// orientamento compatibile
|
|
if (a.IsHorizontal != b.IsHorizontal ||
|
|
a.IsVertical != b.IsVertical)
|
|
return false;
|
|
|
|
// distanza
|
|
var dx = a.X - b.X;
|
|
var dy = a.Y - b.Y;
|
|
|
|
var distance = Math.Sqrt(dx * dx + dy * dy);
|
|
|
|
return distance < MaxDistance;
|
|
}
|
|
|
|
private FeatureCandidate BuildFeature(List<QuotaCandidate> cluster)
|
|
{
|
|
var centerX = cluster.Average(q => q.X);
|
|
var centerY = cluster.Average(q => q.Y);
|
|
|
|
var confidence = cluster.Average(q => q.Confidence);
|
|
|
|
return new FeatureCandidate
|
|
{
|
|
Type = cluster.First().Type,
|
|
Quotes = cluster,
|
|
CenterX = centerX,
|
|
CenterY = centerY,
|
|
Confidence = confidence
|
|
};
|
|
}
|
|
} |