Initial
This commit is contained in:
40
PdfMarker/Services/PdfTextExtractor.cs
Normal file
40
PdfMarker/Services/PdfTextExtractor.cs
Normal file
@ -0,0 +1,40 @@
|
||||
using PdfMarker.Models;
|
||||
using UglyToad.PdfPig;
|
||||
|
||||
namespace PdfMarker.Services;
|
||||
|
||||
public class PdfTextExtractor
|
||||
{
|
||||
public List<TextFeature> Extract(string filePath)
|
||||
{
|
||||
using var doc = PdfDocument.Open(filePath);
|
||||
|
||||
var result = new List<TextFeature>();
|
||||
|
||||
foreach (var page in doc.GetPages())
|
||||
{
|
||||
foreach (var word in page.GetWords())
|
||||
{
|
||||
if (IsTechnicalFeature(word.Text))
|
||||
{
|
||||
result.Add(new TextFeature
|
||||
{
|
||||
X = word.BoundingBox.Left / page.Width,
|
||||
Y = 1 - (word.BoundingBox.Bottom / page.Height),
|
||||
Text = word.Text
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool IsTechnicalFeature(string text)
|
||||
{
|
||||
return text.Contains("Ø")
|
||||
|| text.Contains("R")
|
||||
|| text.Contains("x45")
|
||||
|| text.Contains("H7");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user