This commit is contained in:
2026-01-23 09:57:52 +01:00
commit 831badd188
136 changed files with 7705 additions and 0 deletions

View File

@ -0,0 +1,68 @@
using PdfMarker.Models;
namespace PdfMarker.Services;
public class BallooningService
{
private readonly PdfQuotaExtractor _quotaExtractor;
private readonly QuotaClusterer _clusterer;
private readonly IWebHostEnvironment _env;
public BallooningService(
PdfQuotaExtractor quotaExtractor,
QuotaClusterer clusterer,
IWebHostEnvironment env)
{
_quotaExtractor = quotaExtractor;
_clusterer = clusterer;
_env = env;
}
public async Task<AutoBalloonResult> GenerateAsync(string fileName)
{
// 1⃣ Path fisico del PDF
var pdfPath = Path.Combine(
_env.WebRootPath,
"pdf",
fileName);
// 2⃣ Estrazione QUOTE
var quotes = _quotaExtractor.Extract(pdfPath);
// 🔴 CHECK CRITICO
// se qui quotes è vuoto → il problema NON è il clustering
if (quotes.Count == 0)
{
return new AutoBalloonResult
{
Balloons = new List<BalloonVm>()
};
}
// 3⃣ Clustering QUOTE → FEATURE
var features = _clusterer.Cluster(quotes);
// 4⃣ Feature → Pallini
var balloons = new List<BalloonVm>();
int index = 1;
foreach (var feature in features)
{
balloons.Add(new BalloonVm
{
Number = index++,
Description = string.Join(" | ",
feature.Quotes.Select(q => q.RawText)),
X = feature.CenterX,
Y = feature.CenterY,
Selected = false
});
}
// 5⃣ Output finale
return new AutoBalloonResult
{
Balloons = balloons
};
}
}

View File

@ -0,0 +1,238 @@
using System.Globalization;
using PdfMarker.Models;
using UglyToad.PdfPig;
using UglyToad.PdfPig.Content;
namespace PdfMarker.Services;
public class PdfQuotaExtractor
{
// percentuale inferiore della pagina da escludere (cartiglio)
private const double CartiglioCutoffY = 0.15; // basso
private const double CartiglioMarginX = 0.05; // bordi laterali 5%
public List<QuotaCandidate> Extract(string filePath)
{
var result = new List<QuotaCandidate>();
using var document = PdfDocument.Open(filePath);
foreach (var page in document.GetPages())
{
var pageWidth = page.Width;
var pageHeight = page.Height;
foreach (var word in page.GetWords())
{
if (!IsQuotaLikeText(word.Text))
continue;
var type = ClassifyQuota(word.Text);
if (type == QuotaType.Unknown)
continue;
var box = word.BoundingBox;
var x = box.Left / pageWidth;
var y = 1 - (box.Bottom / pageHeight);
var w = box.Width / pageWidth;
var h = box.Height / pageHeight;
// --- FILTRI CARTIGLIO ---
// parte bassa del foglio
if (y < CartiglioCutoffY)
continue;
// bordi laterali
if (x < CartiglioMarginX || x > 1 - CartiglioMarginX)
continue;
// testi troppo piccoli (tipici del cartiglio)
/*if (h < 0.003)
continue;*/
// testi troppo corti e non significativi
var t = word.Text.Trim();
if (t.Length <= 1 && !t.Contains("Ø") && !t.Contains("R") && !t.Contains("0"))
continue;
// --- AGGIUNTA QUOTA ---
result.Add(new QuotaCandidate
{
RawText = word.Text,
Type = type,
X = x,
Y = y,
Width = w,
Height = h,
IsHorizontal = box.Width >= box.Height,
IsVertical = box.Height > box.Width,
Confidence = ComputeConfidence(type, word)
});
}
}
return MergeSplitTexts(result);
}
private bool IsQuotaLikeText(string text)
{
if (string.IsNullOrWhiteSpace(text))
return false;
text = text.Replace(" ", "");
// deve contenere almeno un numero
if (!text.Any(char.IsDigit))
return false;
return
text.Contains("Ø") ||
text.StartsWith("R") ||
text.Contains("°") ||
text.Contains("x") ||
text.Contains("×") ||
IsNumeric(text);
}
private QuotaType ClassifyQuota(string text)
{
text = text.Replace(" ", "");
if (text.Contains("Ø"))
return QuotaType.Diameter;
if (text.StartsWith("R"))
return QuotaType.Radius;
if (text.Contains("°"))
return QuotaType.Angle;
if (text.Contains("x") || text.Contains("×"))
return QuotaType.Chamfer;
if (IsNumeric(text))
return QuotaType.Linear;
return QuotaType.Unknown;
}
private bool IsNumeric(string text)
{
return double.TryParse(
text.Replace(",", "."),
NumberStyles.Any,
CultureInfo.InvariantCulture,
out _);
}
private double ComputeConfidence(QuotaType type, Word word)
{
var confidence = type switch
{
QuotaType.Diameter => 0.9,
QuotaType.Radius => 0.85,
QuotaType.Angle => 0.85,
QuotaType.Chamfer => 0.8,
QuotaType.Linear => 0.6,
_ => 0.3
};
var box = word.BoundingBox;
var text = word.Text.Replace(" ", "");
// penalizza testi verticali
if (box.Height > box.Width)
confidence *= 0.7;
// penalizza numeri interi corti (1, 2, 3…)
if (IsNumeric(text) && !text.Contains(",") && !text.Contains("."))
confidence *= 0.7;
return confidence;
}
private bool IsDecimalNumber(string text)
{
// accetta 37,50 12.7 1,5
return double.TryParse(
text.Replace(",", "."),
System.Globalization.NumberStyles.Any,
System.Globalization.CultureInfo.InvariantCulture,
out _)
&& (text.Contains(",") || text.Contains("."));
}
private List<QuotaCandidate> MergeSplitTexts(List<QuotaCandidate> input)
{
var merged = new List<QuotaCandidate>();
var used = new HashSet<QuotaCandidate>();
for (int i = 0; i < input.Count; i++)
{
var current = input[i];
if (used.Contains(current))
continue;
var cluster = new List<QuotaCandidate> { current };
used.Add(current);
foreach (var other in input)
{
if (used.Contains(other))
continue;
// stesso tipo
if (other.Type != current.Type)
continue;
// molto vicini
var dx = Math.Abs(current.X - other.X);
var dy = Math.Abs(current.Y - other.Y);
if (dx < 0.02 && dy < 0.02)
{
cluster.Add(other);
used.Add(other);
}
}
if (cluster.Count == 1)
{
merged.Add(current);
continue;
}
// merge testi
var mergedText = string.Join(" ",
cluster
.Select(q => q.RawText.Trim())
.OrderBy(t => t.Length));
merged.Add(new QuotaCandidate
{
RawText = mergedText,
Type = current.Type,
X = cluster.Average(q => q.X),
Y = cluster.Average(q => q.Y),
Width = cluster.Max(q => q.Width),
Height = cluster.Max(q => q.Height),
IsHorizontal = cluster.Any(q => q.IsHorizontal),
IsVertical = cluster.Any(q => q.IsVertical),
Confidence = cluster.Average(q => q.Confidence)
});
}
return merged;
}
}

View File

@ -0,0 +1,22 @@
namespace PdfMarker.Services;
public class PdfStorageService
{
private readonly IWebHostEnvironment _env;
public PdfStorageService(IWebHostEnvironment env)
{
_env = env;
}
public async Task SaveAsync(IFormFile file)
{
var path = Path.Combine(_env.WebRootPath, "pdf");
Directory.CreateDirectory(path);
var filePath = Path.Combine(path, file.FileName);
using var stream = File.Create(filePath);
await file.CopyToAsync(stream);
}
}

View File

@ -0,0 +1,40 @@
using PdfMarker.Models;
using UglyToad.PdfPig;
namespace PdfMarker.Services;
public class PdfTextExtractor
{
public List<TextFeature> Extract(string filePath)
{
using var doc = PdfDocument.Open(filePath);
var result = new List<TextFeature>();
foreach (var page in doc.GetPages())
{
foreach (var word in page.GetWords())
{
if (IsTechnicalFeature(word.Text))
{
result.Add(new TextFeature
{
X = word.BoundingBox.Left / page.Width,
Y = 1 - (word.BoundingBox.Bottom / page.Height),
Text = word.Text
});
}
}
}
return result;
}
bool IsTechnicalFeature(string text)
{
return text.Contains("Ø")
|| text.Contains("R")
|| text.Contains("x45")
|| text.Contains("H7");
}
}

View File

@ -0,0 +1,77 @@
using PdfMarker.Models;
namespace PdfMarker.Services;
public class QuotaClusterer
{
// distanza massima tra quote per essere considerate la stessa feature
private const double MaxDistance = 0.05; // 5% pagina
public List<FeatureCandidate> Cluster(List<QuotaCandidate> quotes)
{
var features = new List<FeatureCandidate>();
var used = new HashSet<QuotaCandidate>();
foreach (var q in quotes)
{
if (used.Contains(q))
continue;
var cluster = new List<QuotaCandidate> { q };
used.Add(q);
foreach (var other in quotes)
{
if (used.Contains(other))
continue;
if (IsSameFeature(q, other))
{
cluster.Add(other);
used.Add(other);
}
}
features.Add(BuildFeature(cluster));
}
return features;
}
private bool IsSameFeature(QuotaCandidate a, QuotaCandidate b)
{
// stesso tipo → molto importante
if (a.Type != b.Type)
return false;
// orientamento compatibile
if (a.IsHorizontal != b.IsHorizontal ||
a.IsVertical != b.IsVertical)
return false;
// distanza
var dx = a.X - b.X;
var dy = a.Y - b.Y;
var distance = Math.Sqrt(dx * dx + dy * dy);
return distance < MaxDistance;
}
private FeatureCandidate BuildFeature(List<QuotaCandidate> cluster)
{
var centerX = cluster.Average(q => q.X);
var centerY = cluster.Average(q => q.Y);
var confidence = cluster.Average(q => q.Confidence);
return new FeatureCandidate
{
Type = cluster.First().Type,
Quotes = cluster,
CenterX = centerX,
CenterY = centerY,
Confidence = confidence
};
}
}