using System.Globalization; using PdfMarker.Models; using UglyToad.PdfPig; using UglyToad.PdfPig.Content; namespace PdfMarker.Services; public class PdfQuotaExtractor { // percentuale inferiore della pagina da escludere (cartiglio) private const double CartiglioCutoffY = 0.15; // basso private const double CartiglioMarginX = 0.05; // bordi laterali 5% public List Extract(string filePath) { var result = new List(); using var document = PdfDocument.Open(filePath); foreach (var page in document.GetPages()) { var pageWidth = page.Width; var pageHeight = page.Height; foreach (var word in page.GetWords()) { if (!IsQuotaLikeText(word.Text)) continue; var type = ClassifyQuota(word.Text); if (type == QuotaType.Unknown) continue; var box = word.BoundingBox; var x = box.Left / pageWidth; var y = 1 - (box.Bottom / pageHeight); var w = box.Width / pageWidth; var h = box.Height / pageHeight; // --- FILTRI CARTIGLIO --- // parte bassa del foglio if (y < CartiglioCutoffY) continue; // bordi laterali if (x < CartiglioMarginX || x > 1 - CartiglioMarginX) continue; // testi troppo piccoli (tipici del cartiglio) /*if (h < 0.003) continue;*/ // testi troppo corti e non significativi var t = word.Text.Trim(); if (t.Length <= 1 && !t.Contains("Ø") && !t.Contains("R") && !t.Contains("0")) continue; // --- AGGIUNTA QUOTA --- result.Add(new QuotaCandidate { RawText = word.Text, Type = type, X = x, Y = y, Width = w, Height = h, IsHorizontal = box.Width >= box.Height, IsVertical = box.Height > box.Width, Confidence = ComputeConfidence(type, word) }); } } return MergeSplitTexts(result); } private bool IsQuotaLikeText(string text) { if (string.IsNullOrWhiteSpace(text)) return false; text = text.Replace(" ", ""); // deve contenere almeno un numero if (!text.Any(char.IsDigit)) return false; return text.Contains("Ø") || text.StartsWith("R") || text.Contains("°") || text.Contains("x") || text.Contains("×") || IsNumeric(text); } private QuotaType ClassifyQuota(string text) { text = text.Replace(" ", ""); if (text.Contains("Ø")) return QuotaType.Diameter; if (text.StartsWith("R")) return QuotaType.Radius; if (text.Contains("°")) return QuotaType.Angle; if (text.Contains("x") || text.Contains("×")) return QuotaType.Chamfer; if (IsNumeric(text)) return QuotaType.Linear; return QuotaType.Unknown; } private bool IsNumeric(string text) { return double.TryParse( text.Replace(",", "."), NumberStyles.Any, CultureInfo.InvariantCulture, out _); } private double ComputeConfidence(QuotaType type, Word word) { var confidence = type switch { QuotaType.Diameter => 0.9, QuotaType.Radius => 0.85, QuotaType.Angle => 0.85, QuotaType.Chamfer => 0.8, QuotaType.Linear => 0.6, _ => 0.3 }; var box = word.BoundingBox; var text = word.Text.Replace(" ", ""); // penalizza testi verticali if (box.Height > box.Width) confidence *= 0.7; // penalizza numeri interi corti (1, 2, 3…) if (IsNumeric(text) && !text.Contains(",") && !text.Contains(".")) confidence *= 0.7; return confidence; } private bool IsDecimalNumber(string text) { // accetta 37,50 – 12.7 – 1,5 return double.TryParse( text.Replace(",", "."), System.Globalization.NumberStyles.Any, System.Globalization.CultureInfo.InvariantCulture, out _) && (text.Contains(",") || text.Contains(".")); } private List MergeSplitTexts(List input) { var merged = new List(); var used = new HashSet(); for (int i = 0; i < input.Count; i++) { var current = input[i]; if (used.Contains(current)) continue; var cluster = new List { current }; used.Add(current); foreach (var other in input) { if (used.Contains(other)) continue; // stesso tipo if (other.Type != current.Type) continue; // molto vicini var dx = Math.Abs(current.X - other.X); var dy = Math.Abs(current.Y - other.Y); if (dx < 0.02 && dy < 0.02) { cluster.Add(other); used.Add(other); } } if (cluster.Count == 1) { merged.Add(current); continue; } // merge testi var mergedText = string.Join(" ", cluster .Select(q => q.RawText.Trim()) .OrderBy(t => t.Length)); merged.Add(new QuotaCandidate { RawText = mergedText, Type = current.Type, X = cluster.Average(q => q.X), Y = cluster.Average(q => q.Y), Width = cluster.Max(q => q.Width), Height = cluster.Max(q => q.Height), IsHorizontal = cluster.Any(q => q.IsHorizontal), IsVertical = cluster.Any(q => q.IsVertical), Confidence = cluster.Average(q => q.Confidence) }); } return merged; } }