This commit is contained in:
2026-01-23 09:57:52 +01:00
commit 831badd188
136 changed files with 7705 additions and 0 deletions

View File

@ -0,0 +1,238 @@
using System.Globalization;
using PdfMarker.Models;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
namespace PdfMarker.Services;
public class PdfQuotaExtractor
{
// percentuale inferiore della pagina da escludere (cartiglio)
private const double CartiglioCutoffY = 0.15; // basso
private const double CartiglioMarginX = 0.05; // bordi laterali 5%
public List<QuotaCandidate> Extract(string filePath)
{
var result = new List<QuotaCandidate>();
using var document = PdfDocument.Open(filePath);
foreach (var page in document.GetPages())
{
var pageWidth = page.Width;
var pageHeight = page.Height;
foreach (var word in page.GetWords())
{
if (!IsQuotaLikeText(word.Text))
continue;
var type = ClassifyQuota(word.Text);
if (type == QuotaType.Unknown)
continue;
var box = word.BoundingBox;
var x = box.Left / pageWidth;
var y = 1 - (box.Bottom / pageHeight);
var w = box.Width / pageWidth;
var h = box.Height / pageHeight;
// --- FILTRI CARTIGLIO ---
// parte bassa del foglio
if (y < CartiglioCutoffY)
continue;
// bordi laterali
if (x < CartiglioMarginX || x > 1 - CartiglioMarginX)
continue;
// testi troppo piccoli (tipici del cartiglio)
/*if (h < 0.003)
continue;*/
// testi troppo corti e non significativi
var t = word.Text.Trim();
if (t.Length <= 1 && !t.Contains("Ø") && !t.Contains("R") && !t.Contains("0"))
continue;
// --- AGGIUNTA QUOTA ---
result.Add(new QuotaCandidate
{
RawText = word.Text,
Type = type,
X = x,
Y = y,
Width = w,
Height = h,
IsHorizontal = box.Width >= box.Height,
IsVertical = box.Height > box.Width,
Confidence = ComputeConfidence(type, word)
});
}
}
return MergeSplitTexts(result);
}
private bool IsQuotaLikeText(string text)
{
if (string.IsNullOrWhiteSpace(text))
return false;
text = text.Replace(" ", "");
// deve contenere almeno un numero
if (!text.Any(char.IsDigit))
return false;
return
text.Contains("Ø") ||
text.StartsWith("R") ||
text.Contains("°") ||
text.Contains("x") ||
text.Contains("×") ||
IsNumeric(text);
}
private QuotaType ClassifyQuota(string text)
{
text = text.Replace(" ", "");
if (text.Contains("Ø"))
return QuotaType.Diameter;
if (text.StartsWith("R"))
return QuotaType.Radius;
if (text.Contains("°"))
return QuotaType.Angle;
if (text.Contains("x") || text.Contains("×"))
return QuotaType.Chamfer;
if (IsNumeric(text))
return QuotaType.Linear;
return QuotaType.Unknown;
}
private bool IsNumeric(string text)
{
return double.TryParse(
text.Replace(",", "."),
NumberStyles.Any,
CultureInfo.InvariantCulture,
out _);
}
private double ComputeConfidence(QuotaType type, Word word)
{
var confidence = type switch
{
QuotaType.Diameter => 0.9,
QuotaType.Radius => 0.85,
QuotaType.Angle => 0.85,
QuotaType.Chamfer => 0.8,
QuotaType.Linear => 0.6,
_ => 0.3
};
var box = word.BoundingBox;
var text = word.Text.Replace(" ", "");
// penalizza testi verticali
if (box.Height > box.Width)
confidence *= 0.7;
// penalizza numeri interi corti (1, 2, 3…)
if (IsNumeric(text) && !text.Contains(",") && !text.Contains("."))
confidence *= 0.7;
return confidence;
}
private bool IsDecimalNumber(string text)
{
// accetta 37,50 12.7 1,5
return double.TryParse(
text.Replace(",", "."),
System.Globalization.NumberStyles.Any,
System.Globalization.CultureInfo.InvariantCulture,
out _)
&& (text.Contains(",") || text.Contains("."));
}
private List<QuotaCandidate> MergeSplitTexts(List<QuotaCandidate> input)
{
var merged = new List<QuotaCandidate>();
var used = new HashSet<QuotaCandidate>();
for (int i = 0; i < input.Count; i++)
{
var current = input[i];
if (used.Contains(current))
continue;
var cluster = new List<QuotaCandidate> { current };
used.Add(current);
foreach (var other in input)
{
if (used.Contains(other))
continue;
// stesso tipo
if (other.Type != current.Type)
continue;
// molto vicini
var dx = Math.Abs(current.X - other.X);
var dy = Math.Abs(current.Y - other.Y);
if (dx < 0.02 && dy < 0.02)
{
cluster.Add(other);
used.Add(other);
}
}
if (cluster.Count == 1)
{
merged.Add(current);
continue;
}
// merge testi
var mergedText = string.Join(" ",
cluster
.Select(q => q.RawText.Trim())
.OrderBy(t => t.Length));
merged.Add(new QuotaCandidate
{
RawText = mergedText,
Type = current.Type,
X = cluster.Average(q => q.X),
Y = cluster.Average(q => q.Y),
Width = cluster.Max(q => q.Width),
Height = cluster.Max(q => q.Height),
IsHorizontal = cluster.Any(q => q.IsHorizontal),
IsVertical = cluster.Any(q => q.IsVertical),
Confidence = cluster.Average(q => q.Confidence)
});
}
return merged;
}
}