User bio
404 bio not found
Member since Jun 1, 2021
Replies:

I have got an answer while searching in community pages.

I have written below code , reading all text from pdf

Class PRASHANTHNSPKG.ReadPDF Extends %RegisteredObject

{

/// zw ##class(User.PDF).GetText("/tmp/example.pdf", .text)

ClassMethod GetText(file, Output text) As %Status

{

  Try {

    #dim sc As %Status = $$$OK

    Kill text

    Set dir = $SYSTEM.Util.ManagerDirectory()_ "python"

    Do ##class(%File).CreateDirectoryChain(dir)

   

    /// C:\InterSystems\IRISHealth\bin>irispip install --target C:\InterSystems\IRISHealth\mgr\python PyPDF2

    Try {

      Set pypdf2 = $SYSTEM.Python.Import("PyPDF2")

    } Catch {

      Set cmd = "irispip"

      Set args($INCREMENT(args)) = "install"

      Set args($INCREMENT(args)) = "--target"

      Set args($INCREMENT(args)) = dir

      Set args($INCREMENT(args)) = "PyPDF2"

      Set sc = $ZF(-100,"", cmd, .args)

      Set pypdf2 = $SYSTEM.Python.Import("PyPDF2")

    }

    Return:'$DATA(pypdf2) $$$ERROR($$$GeneralError, "Unable to load PyPDF2")

    Kill pypdf2

    Set text = ..GetTextPy(file)

  } Catch ex {

    Set sc = ex.AsStatus()

  }

  Quit sc

}

ClassMethod GetTextPy(file) [ Language = python ]

{

  from PyPDF2 import PdfReader

  reader = PdfReader(file)

  text = ""

  for page in reader.pages:

    text += page.extract_text() + "\n"

  return text

}

}

Certifications & Credly badges:
Global Masters badges:
Followers:
Following: