Run this command to install the SDK:
npm install cloudmersive-ocr-api-client --save
Or add this snippet to your package.json:
"dependencies": { "cloudmersive-ocr-api-client": "^1.3.3" }
Run this command to install the SDK:
npm install cloudmersive-ocr-api-client --save
Or add this snippet to your package.json:
"dependencies": { "cloudmersive-ocr-api-client": "^1.3.3" }
var CloudmersiveOcrApiClient = require('cloudmersive-ocr-api-client');
var defaultClient = CloudmersiveOcrApiClient.ApiClient.instance;
// Configure API key authorization: Apikey
var Apikey = defaultClient.authentications['Apikey'];
Apikey.apiKey = 'YOUR API KEY';
var apiInstance = new CloudmersiveOcrApiClient.PdfOcrApi();
var imageFile = Buffer.from(fs.readFileSync("C:\\temp\\inputfile").buffer); // File | PDF file to perform OCR on.
var opts = {
'recognitionMode': "recognitionMode_example", // String | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 14-16 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic'
'language': "language_example", // String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
'preprocessing': "preprocessing_example" // String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
};
var callback = function(error, data, response) {
if (error) {
console.error(error);
} else {
console.log('API called successfully. Returned data: ' + data);
}
};
apiInstance.pdfOcrPost(imageFile, opts, callback);
Run this command to install the SDK:
pip install cloudmersive-ocr-api-client
from __future__ import print_function
import time
import cloudmersive_ocr_api_client
from cloudmersive_ocr_api_client.rest import ApiException
from pprint import pprint
# Configure API key authorization: Apikey
configuration = cloudmersive_ocr_api_client.Configuration()
configuration.api_key['Apikey'] = 'YOUR_API_KEY'
# create an instance of the API class
api_instance = cloudmersive_ocr_api_client.PdfOcrApi(cloudmersive_ocr_api_client.ApiClient(configuration))
image_file = '/path/to/file' # file | PDF file to perform OCR on.
recognition_mode = 'recognition_mode_example' # str | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic' (optional)
language = 'language_example' # str | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) (optional)
preprocessing = 'preprocessing_example' # str | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). (optional)
try:
# Converts an uploaded PDF file into text via Optical Character Recognition.
api_response = api_instance.pdf_ocr_post(image_file, recognition_mode=recognition_mode, language=language, preprocessing=preprocessing)
pprint(api_response)
except ApiException as e:
print("Exception when calling PdfOcrApi->pdf_ocr_post: %s\n" % e)
Run this command to install the SDK:
Install-Package Cloudmersive.APIClient.NET.OCR -Version 3.0.5
Run this command to install the SDK:
Install-Package Cloudmersive.APIClient.NETCore.OCR -Version 2.0.2
using System;
using System.Diagnostics;
using Cloudmersive.APIClient.NET.OCR.Api;
using Cloudmersive.APIClient.NET.OCR.Client;
using Cloudmersive.APIClient.NET.OCR.Model;
namespace Example
{
public class PdfOcrPostExample
{
public void main()
{
// Configure API key authorization: Apikey
Configuration.Default.AddApiKey("Apikey", "YOUR_API_KEY");
var apiInstance = new PdfOcrApi();
var imageFile = new System.IO.FileStream("C:\\temp\\inputfile", System.IO.FileMode.Open); // System.IO.Stream | PDF file to perform OCR on.
var recognitionMode = recognitionMode_example; // string | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic' (optional)
var language = language_example; // string | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) (optional)
var preprocessing = preprocessing_example; // string | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). (optional)
try
{
// Converts an uploaded PDF file into text via Optical Character Recognition.
PdfToTextResponse result = apiInstance.PdfOcrPost(imageFile, recognitionMode, language, preprocessing);
Debug.WriteLine(result);
}
catch (Exception e)
{
Debug.Print("Exception when calling PdfOcrApi.PdfOcrPost: " + e.Message );
}
}
}
}
using System;
using System.Diagnostics;
using Cloudmersive.APIClient.NETCore.OCR.Api;
using Cloudmersive.APIClient.NETCore.OCR.Client;
using Cloudmersive.APIClient.NETCore.OCR.Model;
namespace Example
{
public class PdfOcrPostExample
{
public void main()
{
// Configure API key authorization: Apikey
Configuration.Default.AddApiKey("Apikey", "YOUR_API_KEY");
var apiInstance = new PdfOcrApi();
var imageFile = new System.IO.FileStream("C:\\temp\\inputfile", System.IO.FileMode.Open); // System.IO.Stream | PDF file to perform OCR on.
var recognitionMode = recognitionMode_example; // string | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic' (optional)
var language = language_example; // string | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) (optional)
var preprocessing = preprocessing_example; // string | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). (optional)
try
{
// Converts an uploaded PDF file into text via Optical Character Recognition.
PdfToTextResponse result = apiInstance.PdfOcrPost(imageFile, recognitionMode, language, preprocessing);
Debug.WriteLine(result);
}
catch (Exception e)
{
Debug.Print("Exception when calling PdfOcrApi.PdfOcrPost: " + e.Message );
}
}
}
}
To install with Maven, add a reference to the repository in pom.xml:
<repositories> <repository> <id>jitpack.io</id> <url>https://jitpack.io</url> </repository> </repositories>
And add a reference to the dependency in pom.xml:
<dependencies> <dependency> <groupId>com.github.Cloudmersive</groupId> <artifactId>Cloudmersive.APIClient.Java</artifactId> <version>v4.25</version> </dependency> </dependencies>
To install with Gradle, add it in your root build.gradle at the end of repositories:
allprojects { repositories { ... maven { url 'https://jitpack.io' } } }
And add the dependency in build.gradle:
dependencies { implementation 'com.github.Cloudmersive:Cloudmersive.APIClient.Java:v4.25' }
// Import classes:
//import com.cloudmersive.client.invoker.ApiClient;
//import com.cloudmersive.client.invoker.ApiException;
//import com.cloudmersive.client.invoker.Configuration;
//import com.cloudmersive.client.invoker.auth.*;
//import com.cloudmersive.client.PdfOcrApi;
ApiClient defaultClient = Configuration.getDefaultApiClient();
// Configure API key authorization: Apikey
ApiKeyAuth Apikey = (ApiKeyAuth) defaultClient.getAuthentication("Apikey");
Apikey.setApiKey("YOUR API KEY");
// Uncomment the following line to set a prefix for the API key, e.g. "Token" (defaults to null)
//Apikey.setApiKeyPrefix("Token");
PdfOcrApi apiInstance = new PdfOcrApi();
File imageFile = new File("/path/to/file"); // File | PDF file to perform OCR on.
String recognitionMode = "recognitionMode_example"; // String | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic'
String language = "language_example"; // String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
String preprocessing = "preprocessing_example"; // String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
try {
PdfToTextResponse result = apiInstance.pdfOcrPost(imageFile, recognitionMode, language, preprocessing);
System.out.println(result);
} catch (ApiException e) {
System.err.println("Exception when calling PdfOcrApi#pdfOcrPost");
e.printStackTrace();
}
Run this command to install the SDK:
composer require cloudmersive/cloudmersive_ocr_api_client
<?php
require_once(__DIR__ . '/vendor/autoload.php');
// Configure API key authorization: Apikey
$config = Swagger\Client\Configuration::getDefaultConfiguration()->setApiKey('Apikey', 'YOUR_API_KEY');
$apiInstance = new Swagger\Client\Api\PdfOcrApi(
new GuzzleHttp\Client(),
$config
);
$image_file = "/path/to/file"; // \SplFileObject | PDF file to perform OCR on.
$recognition_mode = "recognition_mode_example"; // string | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 14-16 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic'
$language = "language_example"; // string | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
$preprocessing = "preprocessing_example"; // string | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
try {
$result = $apiInstance->pdfOcrPost($image_file, $recognition_mode, $language, $preprocessing);
print_r($result);
} catch (Exception $e) {
echo 'Exception when calling PdfOcrApi->pdfOcrPost: ', $e->getMessage(), PHP_EOL;
}
?>
Add the Objective-C client to your Podfile:
pod 'CloudmersiveOCRApiClient', '~> 1.0'
CMDefaultConfiguration *apiConfig = [CMDefaultConfiguration sharedConfig];
// Configure API key authorization: (authentication scheme: Apikey)
[apiConfig setApiKey:@"YOUR_API_KEY" forApiKeyIdentifier:@"Apikey"];
NSURL* imageFile = [NSURL fileURLWithPath:@"/path/to/file"]; // PDF file to perform OCR on.
NSString* recognitionMode = @"recognitionMode_example"; // Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic' (optional)
NSString* language = @"language_example"; // Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish) (optional)
NSString* preprocessing = @"preprocessing_example"; // Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended). (optional)
CMPdfOcrApi*apiInstance = [[CMPdfOcrApi alloc] init];
// Converts an uploaded PDF file into text via Optical Character Recognition.
[apiInstance pdfOcrPostWithImageFile:imageFile
recognitionMode:recognitionMode
language:language
preprocessing:preprocessing
completionHandler: ^(CMPdfToTextResponse* output, NSError* error) {
if (output) {
NSLog(@"%@", output);
}
if (error) {
NSLog(@"Error calling CMPdfOcrApi->pdfOcrPost: %@", error);
}
}];
Add the Ruby client to your Gemfile:
gem 'cloudmersive-ocr-api-client', '~> 2.0.2'
# load the gem
require 'cloudmersive-ocr-api-client'
# setup authorization
CloudmersiveOcrApiClient.configure do |config|
# Configure API key authorization: Apikey
config.api_key['Apikey'] = 'YOUR API KEY'
# Uncomment the following line to set a prefix for the API key, e.g. 'Bearer' (defaults to nil)
#config.api_key_prefix['Apikey'] = 'Bearer'
end
api_instance = CloudmersiveOcrApiClient::PdfOcrApi.new
image_file = File.new('/path/to/inputfile') # File | PDF file to perform OCR on.
opts = {
recognition_mode: 'recognition_mode_example', # String | Optional; possible values are 'Basic' which provides basic recognition and is not resillient to page rotation, skew or low quality images uses 1-2 API calls per page; 'Normal' which provides highly fault tolerant OCR recognition uses 26-30 API calls per page; and 'Advanced' which provides the highest quality and most fault-tolerant recognition uses 28-30 API calls per page. Default recognition mode is 'Basic'
language: 'language_example', # String | Optional, language of the input document, default is English (ENG). Possible values are ENG (English), ARA (Arabic), ZHO (Chinese - Simplified), ZHO-HANT (Chinese - Traditional), ASM (Assamese), AFR (Afrikaans), AMH (Amharic), AZE (Azerbaijani), AZE-CYRL (Azerbaijani - Cyrillic), BEL (Belarusian), BEN (Bengali), BOD (Tibetan), BOS (Bosnian), BUL (Bulgarian), CAT (Catalan; Valencian), CEB (Cebuano), CES (Czech), CHR (Cherokee), CYM (Welsh), DAN (Danish), DEU (German), DZO (Dzongkha), ELL (Greek), ENM (Archaic/Middle English), EPO (Esperanto), EST (Estonian), EUS (Basque), FAS (Persian), FIN (Finnish), FRA (French), FRK (Frankish), FRM (Middle-French), GLE (Irish), GLG (Galician), GRC (Ancient Greek), HAT (Hatian), HEB (Hebrew), HIN (Hindi), HRV (Croatian), HUN (Hungarian), IKU (Inuktitut), IND (Indonesian), ISL (Icelandic), ITA (Italian), ITA-OLD (Old - Italian), JAV (Javanese), JPN (Japanese), KAN (Kannada), KAT (Georgian), KAT-OLD (Old-Georgian), KAZ (Kazakh), KHM (Central Khmer), KIR (Kirghiz), KOR (Korean), KUR (Kurdish), LAO (Lao), LAT (Latin), LAV (Latvian), LIT (Lithuanian), MAL (Malayalam), MAR (Marathi), MKD (Macedonian), MLT (Maltese), MSA (Malay), MYA (Burmese), NEP (Nepali), NLD (Dutch), NOR (Norwegian), ORI (Oriya), PAN (Panjabi), POL (Polish), POR (Portuguese), PUS (Pushto), RON (Romanian), RUS (Russian), SAN (Sanskrit), SIN (Sinhala), SLK (Slovak), SLV (Slovenian), SPA (Spanish), SPA-OLD (Old Spanish), SQI (Albanian), SRP (Serbian), SRP-LAT (Latin Serbian), SWA (Swahili), SWE (Swedish), SYR (Syriac), TAM (Tamil), TEL (Telugu), TGK (Tajik), TGL (Tagalog), THA (Thai), TIR (Tigrinya), TUR (Turkish), UIG (Uighur), UKR (Ukrainian), URD (Urdu), UZB (Uzbek), UZB-CYR (Cyrillic Uzbek), VIE (Vietnamese), YID (Yiddish)
preprocessing: 'preprocessing_example' # String | Optional, preprocessing mode, default is 'Auto'. Possible values are None (no preprocessing of the image), and Auto (automatic image enhancement of the image before OCR is applied; this is recommended).
}
begin
#Converts an uploaded PDF file into text via Optical Character Recognition.
result = api_instance.pdf_ocr_post(image_file, opts)
p result
rescue CloudmersiveOcrApiClient::ApiError => e
puts "Exception when calling PdfOcrApi->pdf_ocr_post: #{e}"
end
SwagPdfOcrApi api = new SwagPdfOcrApi();
SwagClient client = api.getClient();
// Configure API key authorization: Apikey
ApiKeyAuth Apikey = (ApiKeyAuth) client.getAuthentication('Apikey');
Apikey.setApiKey('YOUR API KEY');
Map<String, Object> params = new Map<String, Object>{
'imageFile' => Blob.valueOf('Sample text file\nContents'),
'recognitionMode' => 'recognitionMode_example',
'language' => 'language_example',
'preprocessing' => 'preprocessing_example'
};
try {
// cross your fingers
SwagPdfToTextResponse result = api.pdfOcrPost(params);
System.debug(result);
} catch (Swagger.ApiException e) {
// ...handle your exceptions
}
Install libcurl in your C/C++ project:
libcurl/7.75.0
CURL *curl;
CURLcode res;
curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, "POST");
curl_easy_setopt(curl, CURLOPT_URL, "https://api.cloudmersive.com/ocr/pdf/toText");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_DEFAULT_PROTOCOL, "https");
struct curl_slist *headers = NULL;
headers = curl_slist_append(headers, "recognitionMode: <string>");
headers = curl_slist_append(headers, "language: <string>");
headers = curl_slist_append(headers, "preprocessing: <string>");
headers = curl_slist_append(headers, "Content-Type: multipart/form-data");
headers = curl_slist_append(headers, "Apikey: YOUR-API-KEY-HERE");
curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers);
curl_mime *mime;
curl_mimepart *part;
mime = curl_mime_init(curl);
part = curl_mime_addpart(mime);
curl_mime_name(part, "imageFile");
curl_mime_filedata(part, "/path/to/file");
curl_easy_setopt(curl, CURLOPT_MIMEPOST, mime);
res = curl_easy_perform(curl);
curl_mime_free(mime);
}
curl_easy_cleanup(curl);
curl --location --request POST 'https://api.cloudmersive.com/ocr/pdf/toText' \
--header 'recognitionMode: <string>' \
--header 'language: <string>' \
--header 'preprocessing: <string>' \
--header 'Content-Type: multipart/form-data' \
--header 'Apikey: YOUR-API-KEY-HERE' \
--form 'imageFile=@"/path/to/file"'
import Foundation
#if canImport(FoundationNetworking)
import FoundationNetworking
#endif
var semaphore = DispatchSemaphore (value: 0)
let parameters = [
[
"key": "imageFile",
"src": "/path/to/file",
"type": "file"
]] as [[String : Any]]
let boundary = "Boundary-\(UUID().uuidString)"
var body = ""
var error: Error? = nil
for param in parameters {
if param["disabled"] == nil {
let paramName = param["key"]!
body += "--\(boundary)\r\n"
body += "Content-Disposition:form-data; name=\"\(paramName)\""
if param["contentType"] != nil {
body += "\r\nContent-Type: \(param["contentType"] as! String)"
}
let paramType = param["type"] as! String
if paramType == "text" {
let paramValue = param["value"] as! String
body += "\r\n\r\n\(paramValue)\r\n"
} else {
let paramSrc = param["src"] as! String
let fileData = try NSData(contentsOfFile:paramSrc, options:[]) as Data
let fileContent = String(data: fileData, encoding: .utf8)!
body += "; filename=\"\(paramSrc)\"\r\n"
+ "Content-Type: \"content-type header\"\r\n\r\n\(fileContent)\r\n"
}
}
}
body += "--\(boundary)--\r\n";
let postData = body.data(using: .utf8)
var request = URLRequest(url: URL(string: "https://api.cloudmersive.com/ocr/pdf/toText")!,timeoutInterval: Double.infinity)
request.addValue("<string>", forHTTPHeaderField: "recognitionMode")
request.addValue("<string>", forHTTPHeaderField: "language")
request.addValue("<string>", forHTTPHeaderField: "preprocessing")
request.addValue("multipart/form-data", forHTTPHeaderField: "Content-Type")
request.addValue("YOUR-API-KEY-HERE", forHTTPHeaderField: "Apikey")
request.addValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
request.httpMethod = "POST"
request.httpBody = postData
let task = URLSession.shared.dataTask(with: request) { data, response, error in
guard let data = data else {
print(String(describing: error))
semaphore.signal()
return
}
print(String(data: data, encoding: .utf8)!)
semaphore.signal()
}
task.resume()
semaphore.wait()
This code snippet uses the built-in JavaScript XHR request capability
Run this command to install jQuery:
bower install jquery
var data = new FormData();
data.append("imageFile", fileInput.files[0], "file");
var xhr = new XMLHttpRequest();
xhr.withCredentials = true;
xhr.addEventListener("readystatechange", function() {
if(this.readyState === 4) {
console.log(this.responseText);
}
});
xhr.open("POST", "https://api.cloudmersive.com/ocr/pdf/toText");
xhr.setRequestHeader("recognitionMode", "<string>");
xhr.setRequestHeader("language", "<string>");
xhr.setRequestHeader("preprocessing", "<string>");
xhr.setRequestHeader("Apikey", "YOUR-API-KEY-HERE");
xhr.send(data);
var form = new FormData();
form.append("imageFile", fileInput.files[0], "file");
var settings = {
"url": "https://api.cloudmersive.com/ocr/pdf/toText",
"method": "POST",
"timeout": 0,
"headers": {
"recognitionMode": "<string>",
"language": "<string>",
"preprocessing": "<string>",
"Content-Type": "multipart/form-data",
"Apikey": "YOUR-API-KEY-HERE"
},
"processData": false,
"mimeType": "multipart/form-data",
"contentType": false,
"data": form
};
$.ajax(settings).done(function (response) {
console.log(response);
});
package main
import (
"fmt"
"bytes"
"mime/multipart"
"os"
"path/filepath"
"io"
"net/http"
"io/ioutil"
)
func main() {
url := "https://api.cloudmersive.com/ocr/pdf/toText"
method := "POST"
payload := &bytes.Buffer{}
writer := multipart.NewWriter(payload)
file, errFile1 := os.Open("/path/to/file")
defer file.Close()
part1,
errFile1 := writer.CreateFormFile("imageFile",filepath.Base("/path/to/file"))
_, errFile1 = io.Copy(part1, file)
if errFile1 != nil {
fmt.Println(errFile1)
return
}
err := writer.Close()
if err != nil {
fmt.Println(err)
return
}
client := &http.Client {
}
req, err := http.NewRequest(method, url, payload)
if err != nil {
fmt.Println(err)
return
}
req.Header.Add("recognitionMode", "<string>")
req.Header.Add("language", "<string>")
req.Header.Add("preprocessing", "<string>")
req.Header.Add("Content-Type", "multipart/form-data")
req.Header.Add("Apikey", "YOUR-API-KEY-HERE")
req.Header.Set("Content-Type", writer.FormDataContentType())
res, err := client.Do(req)
if err != nil {
fmt.Println(err)
return
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(body))
}