Files
PdfMarker_Web/PdfMarker/Services/PdfTextExtractor.cs
2026-01-23 09:57:52 +01:00

40 lines
1010 B
C#

using PdfMarker.Models;
using UglyToad.PdfPig;
namespace PdfMarker.Services;
public class PdfTextExtractor
{
public List<TextFeature> Extract(string filePath)
{
using var doc = PdfDocument.Open(filePath);
var result = new List<TextFeature>();
foreach (var page in doc.GetPages())
{
foreach (var word in page.GetWords())
{
if (IsTechnicalFeature(word.Text))
{
result.Add(new TextFeature
{
X = word.BoundingBox.Left / page.Width,
Y = 1 - (word.BoundingBox.Bottom / page.Height),
Text = word.Text
});
}
}
}
return result;
}
bool IsTechnicalFeature(string text)
{
return text.Contains("Ø")
|| text.Contains("R")
|| text.Contains("x45")
|| text.Contains("H7");
}
}