Retrieve Text, Metadata, or XMP From PDFs
Extract text, retrieve metadata, or retrieve XMP metadata from PDF documents.
Extract text from a PDF using the pdf-text
endpoint. Extract metadata from a PDF using the pdf-info
endpoint. Extract XMP metadata from a PDF using the pdf-xmp
endpoint.
Check out Getting Started and Task Roadmap if you are new to The DynamicPDF API.
Extract Text
Extract text from a PDF using the pdf-text
endpoint. The following illustrates how easy it is to extract text from a PDF using this endpoint.
You can also specify the start page and page count properties to limit the pages to extract text from. Refer to the API documentation and the client library documentation for the pdf-text
endpoint.
Calling Endpoint Directly
Call the endpoint directly by passing the API key in the request header and specifying the PDF's path as the data.
curl --location 'https://api.dynamicpdf.com/v1.0/pdf-text'
--header 'Authorization: Bearer DP--api-key--'
--header 'Content-Type: application/pdf'
--data '@/C:/temp/solutions/text-metadata-xmp/fw4.pdf'
Calling Endpoint Using Client Library
You can also call the endpoint using a client library rather than directly. The processing and syntax are similar for all six languages.
- Create a new
PdfText
instance and pass aPdfResource
instance to the constructor. - Call the
PdfText
instance'sProcess
method and get the results as aPdfTextResponse
which contains the extracted text as a JSON document.
- C# (.NET)
- Java
- Node.js
- PHP
- Go
- Python
using DynamicPDF.Api;
using System;
namespace PdfTextExample
{
class Program
{
static void Main(string[] args)
{
Run("DP.xxx-api-key-xxx", "C:/temp/dynamicpdf-api-usersguide-examples/");
}
public static void Run(String apiKey, String basePath)
{
PdfResource resource = new PdfResource(basePath + "/fw4.pdf");
PdfText pdfText = new PdfText(resource);
pdfText.ApiKey = apiKey;
PdfTextResponse response = pdfText.Process();
Console.WriteLine(PrettyPrintUtil.JsonPrettify(response.JsonContent));
}
}
}
import {
PdfResource,
PdfText
} from "@dynamicpdf/api"
export class PdfTextExample {
static async Run() {
var basePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
var apiKey = "DP.xxx-api-key-xxx";
var resource = new PdfResource(basePath + "fw4.pdf");
var pdfText = new PdfText(resource);
pdfText.apiKey = apiKey;
var res = await pdfText.process();
if (res.isSuccessful) {
console.log(JSON.parse(res.content));
}
}
}
await PdfTextExample.Run();
package com.dynamicpdf.api.examples;
import com.dynamicpdf.api.PdfResource;
import com.dynamicpdf.api.PdfText;
import com.dynamicpdf.api.PdfTextResponse;
import com.dynamicpdf.api.util.PrettyPrintUtility;
public class PdfTextExample {
public static void Run(String apiKey, String basePath)
{
PdfResource resource = new PdfResource(basePath + "fw4.pdf");
PdfText pdfText = new PdfText(resource);
pdfText.setApiKey(apiKey);
PdfTextResponse response = pdfText.process();
System.out.println(PrettyPrintUtility.prettyPrintJSON(response.getJsonContent()));
}
public static void main(String[] args) {
PdfTextExample.Run("DP.xxx--api-key--xxx",
"C:/temp/dynamicpdf-api-usersguide-examples/");
}
}
<?php
use DynamicPDF\Api\PdfResource;
use DynamicPDF\Api\PdfText;
require __DIR__ . '/vendor/autoload.php';
class PdfTextExample
{
private static string $BasePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
private static string $ApiKey = "DP.xxx-api-key-xxx";
public static function Run()
{
$resource = new PdfResource(PdfTextExample::$BasePath . "fw4.pdf");
$pdfText = new PdfText($resource);
$pdfText->ApiKey = PdfTextExample::$ApiKey;
$response = $pdfText->Process();
echo ($response->JsonContent);
}
}
PdfTextExample::Run();
package main
import (
"fmt"
"github.com/dynamicpdf-api/go-client/endpoint"
"github.com/dynamicpdf-api/go-client/resource"
)
func main() {
resource := resource.NewPdfResourceWithResourcePath("C:/temp/dynamicpdf-api-samples/fw4.pdf", "fw4.pdf")
txt := endpoint.NewPdfText(resource,1,3)
txt.Endpoint.BaseUrl = "https://api.dynamicpdf.com/"
txt.Endpoint.ApiKey = "DP.xxx-api-key-xxx"
resp := txt.Process()
res := <-resp
if res.IsSuccessful() == true {
fmt.Print(string(res.Content().Bytes()))
}
}
from dynamicpdf_api.pdf_text import PdfText
from dynamicpdf_api.pdf_resource import PdfResource
from Shared import *
def pdf_text_example(apikey, full_path):
resource = PdfResource(full_path + "fw4.pdf")
pdf_text = PdfText(resource)
pdf_text.api_key = apikey
pdf_text.start_page=1
pdf_text.page_count=2
response = pdf_text.process()
print(response.json_content)
if __name__ == "__main__":
pdf_text_example(api_key, base_path + "/pdf-info/")
Retrieve Metadata
Retrieve metadata from a PDF using the pdf-info
endpoint. The following illustrates how easy it is to extract text from a PDF using this endpoint.
Refer to the endpoint documentation and client library documentation for the pdf-info
endpoint.
Calling Endpoint Directly
Call the endpoint directly by passing the API key in the request header and specifying the PDF's path as the data.
curl --location 'https://api.dynamicpdf.com/v1.0/pdf-info'
--header 'Authorization: Bearer DP--api-key--'
--header 'Content-Type: application/pdf'
--data '@/C:/temp/solutions/text-metadata-xmp/fw4.pdf'
Calling Endpoint Using Client Library
You can also call the endpoint using a client library rather than directly. The processing and syntax are similar for all six languages.
- Create a
PdfInfo
instance and pass aPdfResource
instance to thePdfInfo
instance. - Call the
PdfInfo
instance'sProcess
method to return the PDF's metadata as JSON.
- C# (.NET)
- Java
- Node.js
- PHP
- Go
- Python
using DynamicPDF.Api;
using System;
namespace PdfInfoExample
{
class Program
{
static void Main(string[] args)
{
Run("DP.xxx-api-key-xxx", "C:/temp/dynamicpdf-api-usersguide-examples/");
}
public static void Run(string key, string basePath)
{
PdfResource resource = new PdfResource(basePath + "/DocumentA.pdf");
PdfInfo pdfInfo = new PdfInfo(resource);
pdfInfo.ApiKey = key;
PdfInfoResponse response = pdfInfo.Process();
Console.WriteLine(PrettyPrintUtil.JsonPrettify(response.JsonContent));
}
}
}
import {
PdfResource,
PdfInfo
} from "@dynamicpdf/api"
export class PdfInfoExample {
static async Run() {
var basePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
var apiKey = "DP.xxx-api-key-xxx";
var resource = new PdfResource(basePath + "DocumentA.pdf");
var pdfInfo = new PdfInfo(resource);
pdfInfo.apiKey = apiKey;
var res = await pdfInfo.process();
if (res.isSuccessful) {
console.log(JSON.parse(res.content));
}
}
}
PdfInfoExample.Run();
package com.dynamicpdf.api.examples;
import com.dynamicpdf.api.PdfInfo;
import com.dynamicpdf.api.PdfInfoResponse;
import com.dynamicpdf.api.PdfResource;
import com.dynamicpdf.api.util.PrettyPrintUtility;
public class PdfInfoExample {
public static void Run(String key, String basePath) {
PdfResource resource = new PdfResource(basePath + "DocumentA.pdf");
PdfInfo pdfInfo = new PdfInfo(resource);
pdfInfo.setApiKey(key);
PdfInfoResponse response = pdfInfo.process();
System.out.println(PrettyPrintUtility.prettyPrintJSON(response.getJsonContent()));
}
public static void main(String[] args) {
PdfInfoExample.Run("DP.xxx-api-key-xxx",
"C:/temp/dynamicpdf-api-usersguide-examples/");
}
}
<?php
require __DIR__ . '/vendor/autoload.php';
use DynamicPDF\Api\PdfResource;
use DynamicPDF\Api\PdfInfo;
class PdfInfoExample
{
private static string $BasePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
private static string $ApiKey = "DP.xxx-api-key-xxx";
public static function Run()
{
$resource = new PdfResource(PdfInfoExample::$BasePath . "DocumentA.pdf");
$pdfInfo = new PdfInfo($resource);
$pdfInfo->ApiKey = PdfInfoExample::$ApiKey;
$response = $pdfInfo->Process();
echo (json_encode($response));
}
}
PdfInfoExample::Run();
package main
import (
"fmt"
"github.com/dynamicpdf-api/go-client/endpoint"
"github.com/dynamicpdf-api/go-client/resource"
)
func main() {
resource := resource.NewPdfResourceWithResourcePath("C:/temp/dynamicpdf-api-samples/pdf-info/fw4.pdf", "fw4.pdf")
text := endpoint.NewPdfInfoResource(resource)
text.Endpoint.BaseUrl = "https://api.dynamicpdf.com/"
text.Endpoint.ApiKey = "DP.xxx-api-key-xxx"
resp := text.Process()
res := <-resp
if res.IsSuccessful() == true {
fmt.Print(string(res.Content().Bytes()))
}
}
from dynamicpdf_api.pdf_info import PdfInfo
from dynamicpdf_api.pdf_resource import PdfResource
import pprint
import json
from Shared import *
def pdf_info_example(api_key, full_path):
resource = PdfResource(full_path + "fw4.pdf")
pdf_info = PdfInfo(resource)
pdf_info.api_key = api_key
response = pdf_info.process()
print(pprint.pformat(json.loads(response.json_content)))
if __name__ == "__main__":
pdf_info_example(api_key, base_path + "/pdf-info/")
Retrieve XMP Metadata
Retrieve a PDF document's XMP metadata using the pdf-xmp
endpoint.
Refer to the endpoint documentation and client library documentation for the pdf-xmp
endpoint.
Calling Endpoint Directly
Call the endpoint directly by passing the API key in the request header and specifying the PDF's path as the data.
curl --location 'https://api.dynamicpdf.com/v1.0/pdf-xmp'
--header 'Authorization: Bearer DP--api-key--'
--header 'Content-Type: application/pdf'
--data '@/C:/temp/solutions/text-metadata-xmp/fw4.pdf'
Calling Endpoint Using Client Library
You can also call the endpoint using a client library rather than directly. The processing and syntax are similar for all six languages.
- Create a new
PdfXmp
instance and pass aPdfResource
instance containing the PDF. - Call the
PdfXmp
instance'sProcess
method and the PDF's XMP metadata is returned as XML.
- C# (.NET)
- Java
- Node.js
- PHP
- Go
- Python
using DynamicPDF.Api;
using System;
namespace PdfXmpExample
{
class Program
{
static void Main(string[] args)
{
Run("DP.xxx-api-key-xxx", "C:/temp/dynamicpdf-api-usersguide-examples/");
}
public static void Run(String apiKey, String basePath)
{
PdfResource resource = new PdfResource(basePath + "/fw4.pdf");
PdfXmp pdfXmp = new PdfXmp(resource);
pdfXmp.ApiKey = apiKey;
XmlResponse response = pdfXmp.Process();
Console.WriteLine(response.Content);
}
}
}
import {
PdfResource,
PdfXmp
} from "@dynamicpdf/api"
export class PdfXmpExample {
static async Run() {
var basePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
var apiKey = "DP.xxx-api-key-xxx";
var resource = new PdfResource(basePath + "fw4.pdf")
var pdfXmp = new PdfXmp(resource);
pdfXmp.apiKey = apiKey;
var res = await pdfXmp.process();
if (res.isSuccessful) {
console.log(res.content);
}
}
}
await PdfXmpExample.Run();
package com.dynamicpdf.api.examples;
import com.dynamicpdf.api.PdfResource;
import com.dynamicpdf.api.PdfXmp;
import com.dynamicpdf.api.XmlResponse;
import com.dynamicpdf.api.util.PrettyPrintUtility;
public class PdfXmpExample {
public static void Run(String apiKey, String basePath)
{
PdfResource resource = new PdfResource(basePath + "fw4.pdf");
PdfXmp pdfXmp = new PdfXmp(resource);
pdfXmp.setApiKey(apiKey);
XmlResponse response = pdfXmp.process();
System.out.println(PrettyPrintUtility.prettyPrintJSON(response.getContent()));
}
public static void main(String[] args) {
PdfXmpExample.Run("DP.xxx-api-key-xxx",
"C:/temp/dynamicpdf-api-usersguide-examples/");
}
}
<?php
require __DIR__ . '/vendor/autoload.php';
use DynamicPDF\Api\PdfResource;
use DynamicPDF\Api\PdfXmp;
class PdfXmpExample
{
private static string $BasePath = "C:/temp/dynamicpdf-api-usersguide-examples/";
private static string $ApiKey = "DP.xxx-api-key-xxx";
public static function Run()
{
$resource = new PdfResource(PdfXmpExample::$BasePath . "fw4.pdf");
$pdfXmp = new PdfXmp($resource);
$pdfXmp->ApiKey = PdfXmpExample::$ApiKey;
$response = $pdfXmp->Process();
echo ($response->Content);
}
}
PdfXmpExample::Run();
package main
import (
"fmt"
"github.com/dynamicpdf-api/go-client/endpoint"
"github.com/dynamicpdf-api/go-client/resource"
)
func main() {
resource := resource.NewPdfResourceWithResourcePath("C:/temp/dynamicpdf-api-samples/fw4.pdf", "fw4.pdf")
xmp := endpoint.NewPdfXmp(resource)
xmp.Endpoint.BaseUrl = "https://api.dynamicpdf.com/"
xmp.Endpoint.ApiKey = "DP.xxx-api-key-xxx"
resp := xmp.Process()
res := <-resp
if res.IsSuccessful() == true {
fmt.Print(string(res.Content().Bytes()))
}
}
from dynamicpdf_api.pdf_xmp import PdfXmp
from dynamicpdf_api.pdf_resource import PdfResource
from Shared import *
def pdf_xmp_info(api_key, full_path):
resource = PdfResource(full_path + "fw4.pdf")
pdf_info = PdfXmp(resource)
pdf_info.api_key = api_key
response = pdf_info.process()
print(response.content)
if __name__ == "__main__":
pdf_xmp_info(api_key, base_path + "/get-xmp-metadata-pdf-xmp-endpoint/")