Beispiele-Übersicht

Barcodes | Text extrahierbar machen

Barcodes

Strichcodes extrahieren

Extrahiere Strichcodes aus einem PDF und schreibe diese in eine XML Datei.

C# Beispiel:
// Open input document
using (Stream inStream = File.OpenRead(inPath))
using (Document inDoc = Document.Open(inStream, null))
{
    // Create OCR engine
    using (Engine engine = Engine.Create("BARCODES"))
    {
        // Set process parameters
        OcrParams ocr = new OcrParams();
        ocr.Engine = engine;

        PageOcrParams pageOcr = new PageOcrParams();
        pageOcr.Mode = PageOcrMode.All;

        BarcodeParams barcodes = new BarcodeParams();
        barcodes.Mode = BarcodeMode.Extract;
        using (barcodes.XmlOutput = File.Create(barcodesPath))
        {
            // Process document
            inDoc.Process(new MemoryStream(), null, ocr, null, null, pageOcr, barcodes);
        }
    }
}
Java Beispiel:
try (// Open input document
    FileStream inStream = new FileStream(inPath, "r");
    Document inDoc = Document.open(inStream, null)) {

    // Create OCR engine
    try (Engine engine = Engine.create("barcodes")) {
        // Set process parameters
        OcrParams ocr = new OcrParams();
        ocr.setEngine(engine);

        PageOcrParams pageOcr = new PageOcrParams();
        pageOcr.setMode(PageOcrMode.ALL);

        BarcodeParams barcodes = new BarcodeParams();
        barcodes.setMode(BarcodeMode.EXTRACT);

        try(// Process document
            FileStream barcodesStream = new FileStream(barcodesPath, "rw")) {
            barcodes.setXmlOutput(barcodesStream);
            inDoc.process(new MemoryStream(), null, ocr, null, null, pageOcr, barcodes);
        }
    }
}

Swiss Payments Code (SPC) erkennen

Erkenne Swiss QR Codes auf einem PDF Dokument und gebe den dekodierten Inhalt auf der Konsole aus.

C# Beispiel:
using (Stream barcodesStream = new MemoryStream())
{
    // Recognize barcodes
    using (Stream inStream = File.OpenRead(inPath))
        Recognize(inStream, null, barcodesStream);

    // Parse barcodes from XML file
    ParseBarcodes(barcodesStream);
}
static void Recognize(Stream inStream, string password, Stream barcodesStream)
{
    using (Document document = Document.Open(inStream, null))
    using (Stream outStream = new MemoryStream())
    {
        // Create OCR engine
        using (Engine engine = Engine.Create("barcodes"))
        {
            // Set process parameters
            engine.SetParameters("BarcodeTypes=QRCode");

            OcrParams ocr = new OcrParams();
            ocr.Engine = engine;

            PageOcrParams pageOcr = new PageOcrParams();
            pageOcr.Mode = PageOcrMode.All;

            BarcodeParams barcodes = new BarcodeParams();
            barcodes.Mode = BarcodeMode.Extract;
            barcodes.XmlOutput = barcodesStream;

            WarningList warnings = document.Process(outStream, null, ocr, null, null, pageOcr, barcodes);
        }
    }
}
static void ParseBarcodes(Stream barcodesStream)
{
    barcodesStream.Seek(0, SeekOrigin.Begin);
    XElement barcodes = XElement.Load(barcodesStream, LoadOptions.PreserveWhitespace);

    // Load all barcodes
    foreach (XElement barcode in barcodes.Elements(barcodes.Name.Namespace + "barcode"))
    {
        string value = barcode.Value;

        // Decode binary barcodes
        if (barcode.Attribute("encoding") != null)
        {
            try
            {
                value = Regex.Replace(value, @"\s+", "");
                byte[] bb = Enumerable.Range(0, value.Length)
                                      .Where(x => x % 2 == 0)
                                      .Select(x => Convert.ToByte(value.Substring(x, 2), 16))
                                      .ToArray();
                value = System.Text.Encoding.GetEncoding("iso-8859-1").GetString(bb);
            }
            catch (Exception)
            {
                continue;
            }
        }

        // Verify first line is "SPC"
        StringReader lines = new StringReader(value);
        string line = lines.ReadLine();
        if ("SPC".Equals(line))
        {
            // Print barcode to console
            Console.WriteLine("Code on page {0}:", barcode.Attribute("page").Value);
            Console.WriteLine("-------------------------------------------");
            do
                Console.WriteLine(line);
            while ((line = lines.ReadLine()) != null);
            Console.WriteLine("-------------------------------------------");
        }
    }
}
Java Beispiel:
try (
    MemoryStream barcodesStream = new MemoryStream()) {
    // Recognize barcodes
    try (
        FileStream inStream = new FileStream(inPath, "r")) {
        recognize(inStream, null, barcodesStream);
    }

    // Parse barcodes from XML file
    parseBarcodes(barcodesStream);
}
static void recognize(Stream inStream, String password, Stream barcodesStream)
        throws ErrorCodeException, IOException {
    try (
        Document document = Document.open(inStream, null);
        Stream outStream = new MemoryStream()) {

        // Create OCR engine
        try (Engine engine = Engine.create("barcodes")) {
            // Set process parameters
            engine.setParameters("BarcodeTypes=QRCode");

            OcrParams ocr = new OcrParams();
            ocr.engine = engine;

            PageOcrParams pageOcr = new PageOcrParams();
            pageOcr.mode = PageOcrMode.ALL;

            BarcodeParams barcodes = new BarcodeParams();
            barcodes.mode = BarcodeMode.EXTRACT;
            barcodes.xmlOutput = barcodesStream;

            WarningList warnings = document.process(outStream, null, ocr, null, null, pageOcr, barcodes);
        }
    }
}
static void parseBarcodes(MemoryStream barcodesStream)
        throws IOException, SAXException, ParserConfigurationException {
    barcodesStream.seek(0);

    int length = (int) barcodesStream.getLength();
    byte[] barcodesArray = new byte[length];
    barcodesStream.read(barcodesArray, 0, length);
    InputStream barcodesInputStream = new ByteArrayInputStream(barcodesArray);

    DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
    org.w3c.dom.Document barcodesDoc = dBuilder.parse(barcodesInputStream);

    Element barcodes = barcodesDoc.getDocumentElement();

    // Load all barcodes
    NodeList barcodeList = barcodes.getElementsByTagNameNS(barcodes.getBaseURI(), "barcode");
    for (int i = 0; i < barcodeList.getLength(); i++) {
        try {
            Element barcode = (Element) barcodeList.item(i);
            String value = barcode.getNodeValue();

            // Decode binary barcodes
            if (barcode.getAttribute("encoding") != "") {
                value = value.replaceAll("\\s+", "");

                int len = value.length();
                byte[] bb = new byte[len / 2];
                for (int j = 0; j < len; j += 2) {
                    bb[j / 2] = (byte) ((Character.digit(value.charAt(j), 16) << 4)
                            + Character.digit(value.charAt(j + 1), 16));
                }
                value = new String(bb, StandardCharsets.ISO_8859_1);
            }

            // Verify first line is "SPC"
            Scanner scanner = new Scanner(value);
            String line = scanner.nextLine();
            if (line == "SPC") {
                // Print barcode to console
                System.out.println("Code on page " + barcode.getAttribute("page") + ":");
                System.out.println("-------------------------------------------");
                while (scanner.hasNextLine()) {
                    line = scanner.nextLine();
                }
                System.out.println("-------------------------------------------");
            }
            scanner.close();
        } catch (Exception e) {
        }
    }
}

Text extrahierbar machen

Text in einem Dokument extrahierbar machen

Erkenne Text in einem PDF Dokument mittels OCR und bette diesen ins Dokument ein. Setze die OCR Engine und zugehörige Parameter.

C# Beispiel:
// Open input document
using (Stream inStream = File.OpenRead(inPath))
using (Document inDoc = Document.Open(inStream, null))

// Open output document
using (Stream outStream = File.Create(outPath))
{
    // Create OCR engine
    using (Engine engine = Engine.Create(engineName))
    {
        // Set process parameters
        engine.SetParameters(engineParams);

        OcrParams ocr = new OcrParams();
        ocr.Engine = engine;

        ImageOcrParams imageOcr = new ImageOcrParams();
        imageOcr.Mode = ImageOcrMode.UpdateText;

        TextOcrParams textOcr = new TextOcrParams();
        textOcr.Mode = TextOcrMode.Update;

        // Process document
        WarningList warnings = inDoc.Process(outStream, null, ocr, imageOcr, textOcr, null, null);
    }
}
Java Beispiel:
try (// Open input document
    FileStream inStream = new FileStream(inPath, "r");
    Document inDoc = Document.open(inStream, null)) {
    try (// Create output document
        FileStream outStream = new FileStream(outPath, "rw")) {
        outStream.setLength(0);

        // Create OCR engine
        try (Engine engine = Engine.create(engineName)) {
            // Set process parameters
            engine.setParameters(engineParams);

            OcrParams ocr = new OcrParams();
            ocr.setEngine(engine);

            ImageOcrParams imageOcr = new ImageOcrParams();
            imageOcr.setMode(ImageOcrMode.UPDATE_TEXT);

            TextOcrParams textOcr = new TextOcrParams();
            textOcr.setMode(TextOcrMode.UPDATE);

            // Process document
            inDoc.process(outStream, null, ocr, imageOcr, textOcr, null, null);
        }
    }
}