| openapi: 3.0.0 |
| info: |
| description: Context aware, pluggable and customizable PII anonymization service for text and images. |
| version: "2.0" |
| title: Presidio |
| contact: |
| name: Presidio support |
| email: presidio@microsoft.com |
| url: https://github.com/microsoft/presidio |
| x-logo: |
| url: "https://upload.wikimedia.org/wikipedia/commons/4/44/Microsoft_logo.svg" |
| license: |
| name: MIT |
| url: 'https://github.com/microsoft/presidio/blob/main/LICENSE' |
| externalDocs: |
| description: Presidio documentation. |
| url: 'https://microsoft.github.io/presidio/' |
| tags: |
| - name: Analyzer |
| description: Detecting PII entities in text |
| - name: Anonymizer |
| description: Anonymizing detected PII text entities with desired values. |
|
|
| paths: |
|
|
| /analyze: |
| post: |
| servers: |
| - url: https://presidio-analyzer-prod.azurewebsites.net |
| tags: |
| - Analyzer |
| summary: "Analyze Text" |
| description: "Recognizes PII entities in a given text and returns their types, locations and score" |
| requestBody: |
| $ref: "#/components/requestBodies/AnalyzeRequest" |
|
|
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| description: "A list analysis results" |
| type: array |
| items: |
| $ref: "#/components/schemas/RecognizerResultWithAnaysisExplanation" |
|
|
| examples: |
| Enhanced response: |
| value: |
| [ |
| { "entity_type": "PERSON", "start": 0, "end": 10, "score": 0.85, |
| "analysis_explanation": { |
| "recognizer": "SpacyRecognizer", "pattern_name": null, "pattern": null, "original_score": 0.85, |
| "score": 0.85, "textual_explanation": "Identified as PERSON by Spacy's Named Entity Recognition", |
| "score_context_improvement": 0, "supportive_context_word": "", "validation_result": null |
| }, |
| "recognition_metadata": { |
| "recognizer_name": "SpacyRecognizer" |
| } |
| }, |
| { "entity_type": "US_DRIVER_LICENSE", "start": 30, "end": 38, "score": 0.6499999999999999, |
| "analysis_explanation": { |
| "recognizer": "UsLicenseRecognizer", "pattern_name": "Driver License - Alphanumeric (weak)", |
| "pattern": "\\\\b([A-Z][0-9]{3,6}|[A-Z][0-9]{5,9}|[A-Z][0-9]{6,8}|[A-Z][0-9]{4,8}|[A-Z][0-9]{9,11}|[A-Z]{1,2}[0-9]{5,6}|H[0-9]{8}|V[0-9]{6}|X[0-9]{8}|A-Z]{2}[0-9]{2,5}|[A-Z]{2}[0-9]{3,7}|[0-9]{2}[A-Z]{3}[0-9]{5,6}|[A-Z][0-9]{13,14}|[A-Z][0-9]{18}|[A-Z][0-9]{6}R|[A-Z][0-9]{9}|[A-Z][0-9]{1,12}|[0-9]{9}[A-Z]|[A-Z]{2}[0-9]{6}[A-Z]|[0-9]{8}[A-Z]{2}|[0-9]{3}[A-Z]{2}[0-9]{4}|[A-Z][0-9][A-Z][0-9][A-Z]|[0-9]{7,8}[A-Z])\\\\b", |
| "original_score": 0.3, "score": 0.6499999999999999, "textual_explanation": null, |
| "score_context_improvement": 0.3499999999999999, "supportive_context_word": "driver", |
| "validation_result": null |
| }, |
| "recognition_metadata": { |
| "recognizer_name": "UsLicenseRecognizer" |
| } |
| } |
| ] |
| Lean response: |
| value: |
| [ |
| { |
| "analysis_explanation": null, |
| "end": 38, |
| "entity_type": "US_DRIVER_LICENSE", |
| "score": 0.6499999999999999, |
| "start": 30, |
| "recognition_metadata": { |
| "recognizer_name": "UsLicenseRecognizer" |
| } |
| } |
| ] |
|
|
| /recognizers: |
| get: |
| servers: |
| - url: https://presidio-analyzer-prod.azurewebsites.net |
| tags: |
| - Analyzer |
| summary: "Get Recognizers" |
| description: "Get the available PII recognizers for a given language" |
| parameters: |
| - in: query |
| name: language |
| schema: |
| type: string |
| example: en |
| description: "Two characters for the desired language in ISO_639-1 format" |
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| description: "A list of supported recognizers" |
| type: array |
| items: |
| type: string |
| description: "Recognizer name" |
| example: |
| [ |
| "CryptoRecognizer", "CreditCardRecognizer", "IbanRecognizer", "UsPhoneRecognizer", |
| "EmailRecognizer","UsPassportRecognizer", "NhsRecognizer", "IpRecognizer", |
| "SpacyRecognizer","SgFinRecognizer", "UsSsnRecognizer","UsBankRecognizer", |
| "DomainRecognizer", "UsLicenseRecognizer", "UsItinRecognizer" |
| ] |
|
|
| /supportedentities: |
| get: |
| servers: |
| - url: https://presidio-analyzer-prod.azurewebsites.net |
| tags: |
| - Analyzer |
| summary: "Get supported entities" |
| description: "Get the list of PII entities Presidio-Analyzer is capable of detecting" |
| parameters: |
| - in: query |
| name: language |
| schema: |
| type: string |
| example: en |
| description: "Two characters for the desired language in ISO_639-1 format" |
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| description: "A list of supported entities" |
| type: array |
| items: |
| $ref: "#/components/schemas/EntityTypes" |
| example: |
| [ "PHONE_NUMBER", "US_DRIVER_LICENSE", "US_PASSPORT", "LOCATION", "CREDIT_CARD", "CRYPTO", |
| "UK_NHS", "US_SSN", "US_BANK_NUMBER", "EMAIL_ADDRESS", "DATE_TIME", "IP_ADDRESS", "PERSON", "IBAN_CODE", |
| "NRP", "US_ITIN", "MEDICAL_LICENSE", "URL" ] |
|
|
| /anonymize: |
| post: |
| servers: |
| - url: https://presidio-anonymizer-prod.azurewebsites.net |
| tags: |
| - Anonymizer |
| summary: "Anonymize Text" |
| requestBody: |
| $ref: "#/components/requestBodies/AnonymizeRequest" |
|
|
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| $ref: "#/components/schemas/AnonymizeResponse" |
| examples: |
| Replace and Redact Anonymizers: |
| value: |
| { "text": "hello world, my name is ANONYMIZED. My number is: ", "items": [ { "operator": "redact", "entity_type": "PHONE_NUMBER", "start": 50, "end": 50, "text": "" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
| Replace as default Anonymizer: |
| value: |
| { "text": "hello world, my name is ANONYMIZED. My number is: ANONYMIZED", "items": [ { "operator": "replace", "entity_type": "PHONE_NUMBER", "start": 50, "end": 60, "text": "ANONYMIZED" }, { "operator": "replace", "entity_type": "NAME", "start": 24, "end": 34, "text": "ANONYMIZED" } ] } |
| 400: |
| $ref: "#/components/responses/400BadRequest" |
|
|
| 422: |
| $ref: "#/components/responses/422UnprocessableEntity" |
|
|
| /anonymizers: |
| get: |
| servers: |
| - url: https://presidio-anonymizer-prod.azurewebsites.net |
| tags: |
| - Anonymizer |
| summary: "Get supported anonymizers" |
|
|
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| description: "A list of all built-in supported anonymizers" |
| type: array |
| items: |
| description: "The Anonymizer name" |
| type: string |
| example: "mask" |
| example: |
| [ "hash", "mask", "redact", "replace", "encrypt" ] |
|
|
|
|
| /deanonymize: |
| post: |
| servers: |
| - url: https://presidio-anonymizer-prod.azurewebsites.net |
| tags: |
| - Anonymizer |
| summary: "Deanonymize Text" |
| requestBody: |
| $ref: "#/components/requestBodies/DeanonymizeRequest" |
|
|
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| $ref: "#/components/schemas/DeanonymizeResponse" |
| examples: |
| Decrypt Single PII: |
| value: |
| { "text": "text_for_encryption", "items": [ { "start": 0, "end": 19, "operator":"decrypt", "text": "text_for_encryption","entity_type": "NUMBER" } ] } |
| 400: |
| $ref: "#/components/responses/400BadRequest" |
|
|
| 422: |
| $ref: "#/components/responses/422UnprocessableEntity" |
|
|
| /deanonymizers: |
| get: |
| servers: |
| - url: https://presidio-anonymizer-prod.azurewebsites.net |
| tags: |
| - Anonymizer |
| summary: "Get supported deanonymizers" |
|
|
| responses: |
| 200: |
| description: OK |
| content: |
| application/json: |
| schema: |
| description: "A list of all built-in supported deanonymizers" |
| type: array |
| items: |
| description: "The Deanonymizer name" |
| type: string |
| example: "decrypt" |
| example: |
| [ "decrypt" ] |
|
|
| /health: |
| get: |
| servers: |
| - url: https://presidio-anonymizer-prod.azurewebsites.net |
| tags: |
| - Anonymizer |
| - Analyzer |
| summary: "Healthcheck" |
| responses: |
| 200: |
| description: OK |
| content: |
| text/plain: |
| schema: |
| type: string |
| example: Presidio Anonymizer service is up |
|
|
| components: |
| requestBodies: |
| AnalyzeRequest: |
| required: true |
| content: |
| application/json: |
| schema: |
| $ref: "#/components/schemas/AnalyzeRequest" |
| examples: |
| Minimal Request: |
| value: |
| { |
| "text": "John Smith drivers license is AC432223", |
| "language": "en" |
| } |
| Enhanced Request : |
| value: |
| { |
| "text": "John Smith drivers license is AC432223 and the zip code is 12345", |
| "language": "en", |
| "return_decision_process": false, |
| "correlation_id": "123e4567-e89b-12d3-a456-426614174000", |
| "score_threshold": 0.6, |
| "entities": ["US_DRIVER_LICENSE", "ZIP"], |
| "trace": false, |
| "ad_hoc_recognizers":[ |
| { |
| "name": "Zip code Recognizer", |
| "supported_language": "en", |
| "patterns": [ |
| { |
| "name": "zip code (weak)", |
| "regex": "(\\b\\d{5}(?:\\-\\d{4})?\\b)", |
| "score": 0.01 |
| } |
| ], |
| "context": ["zip", "code"], |
| "supported_entity":"ZIP" |
| } |
| ] |
| } |
|
|
| AnonymizeRequest: |
| required: true |
| content: |
| application/json: |
| schema: |
| $ref: "#/components/schemas/AnonymizeRequest" |
| examples: |
| Replace and Redact Anonymizers: |
| value: |
| { |
| "text": "hello world, my name is Jane Doe. My number is: 034453334", |
| "anonymizers": { |
| "PERSON": { "type": "redact" }, |
| "PHONE_NUMBER": { "type": "replace", "new_value": "ANONYMIZED" } |
| }, |
| "analyzer_results": [ |
| { "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
| { "start": 48, "end": 57, "score": 0.95, "entity_type": "PHONE_NUMBER" } |
| ] |
| } |
| Replace as default Anonymizer: |
| value: |
| { |
| "text": "hello world, my name is Jane Doe.", |
| "anonymizers": { |
| "DEFAULT": { "type": "replace", "new_value": "ANONYMIZED" }, |
| }, |
| "analyzer_results": [ |
| { "start": 24, "end": 32, "score": 0.8, "entity_type": "PERSON" }, |
| ] |
| } |
|
|
| DeanonymizeRequest: |
| required: true |
| content: |
| application/json: |
| schema: |
| $ref: "#/components/schemas/DeanonymizeRequest" |
| examples: |
| Deanonymize text: |
| value: |
| { |
| "text": "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=", |
| "deanonymizers": { |
| "PERSON": { |
| "type": "decrypt", |
| "key": "WmZq4t7w!z%C&F)J" |
| } |
| }, |
| "anonymizer_results": [ { |
| "start": 11, |
| "end": 55, |
| "entity_type": "PERSON" |
| } ] |
| } |
|
|
|
|
| schemas: |
| AnalyzeRequest: |
| type: object |
| required: |
| - text |
| - language |
| properties: |
| text: |
| type: string |
| description: "The text to analyze" |
| example: "hello world, my name is Jane Doe. My number is: 034453334" |
| language: |
| type: string |
| description: "Two characters for the desired language in ISO_639-1 format" |
| example: "en" |
| correlation_id: |
| type: string |
| description: "A correlation id to append to headers and traces" |
| score_threshold: |
| type: number |
| format: double |
| description: "The minimal detection score threshold" |
| entities: |
| type: array |
| items: |
| $ref: "#/components/schemas/EntityTypes" |
| description: "A list of entities to analyze" |
| return_decision_process: |
| type: boolean |
| description: "Whether to include analysis explanation in the response" |
| ad_hoc_recognizers: |
| type: array |
| description: "list of recognizers to be used in the context of this request only (ad-hoc)." |
| items: |
| $ref: "#/components/schemas/PatternRecognizer" |
| context: |
| type: array |
| description: "list of context words which may help to raise recognized entities confidence" |
| items: |
| description: "The context word" |
| type: string |
| example: "address" |
|
|
| AnonymizeRequest: |
| type: object |
| required: |
| - text |
| - analyzer_results |
| properties: |
| text: |
| type: string |
| description: "The text to anonymize" |
| example: "hello world, my name is Jane Doe. My number is: 034453334" |
| anonymizers: |
| description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is the anonymizer definition" |
| type: object |
| additionalProperties: |
| anyOf: |
| - $ref: "#/components/schemas/Replace" |
| - $ref: "#/components/schemas/Redact" |
| - $ref: "#/components/schemas/Mask" |
| - $ref: "#/components/schemas/Hash" |
| - $ref: "#/components/schemas/Encrypt" |
| default: |
| { "DEFAULT": { "type": "replace", "new_value": "<ENTITY_TYPE>" } } |
|
|
| analyzer_results: |
| type: array |
| description: "Array of analyzer detections" |
| items: |
| $ref: "#/components/schemas/RecognizerResult" |
|
|
| DeanonymizeRequest: |
| type: object |
| required: |
| - text |
| - anonymizer_results |
| - deanonymizers |
| properties: |
| text: |
| type: string |
| description: "The anonymized text" |
| example: "My name is S184CMt9Drj7QaKQ21JTrpYzghnboTF9pn/neN8JME0=" |
| deanonymizers: |
| description: "Object where the key is DEFAULT or the ENTITY_TYPE and the value is decrypt since it is the only one supported" |
| type: object |
| additionalProperties: |
| anyOf: |
| - $ref: "#/components/schemas/Decrypt" |
| default: |
| { "DEFAULT": { "type": "decrypt", "key": "3t6w9z$C&F)J@NcR" } } |
| anonymizer_results: |
| type: array |
| description: "Array of anonymized PIIs" |
| items: |
| $ref: "#/components/schemas/OperatorResult" |
|
|
|
|
| RecognizerResult: |
| type: object |
| required: |
| - start |
| - end |
| - score |
| - entity_type |
| properties: |
| start: |
| type: integer |
| description: "Where the PII starts" |
| example: 24 |
| end: |
| type: integer |
| description: "Where the PII ends" |
| example: 32 |
| score: |
| type: number |
| format: double |
| description: "The PII detection score" |
| example: 0.8 |
| entity_type: |
| $ref: "#/components/schemas/EntityTypes" |
| recognition_metadata: |
| type: object |
| $ref: "#/components/schemas/RecognizedMetadata" |
|
|
| RecognizedMetadata: |
| type: object |
| properties: |
| recognizer_name: |
| type: string |
| description: "Name of recognizer that made the decision" |
|
|
| RecognizerResultWithAnaysisExplanation: |
| allOf: |
| - $ref: '#/components/schemas/RecognizerResult' |
| - type: object |
| properties: |
| analysis_explanation: |
| $ref: "#/components/schemas/AnalysisExplanation" |
|
|
| AnalysisExplanation: |
| type: object |
| properties: |
| recognizer: |
| type: string |
| description: "Name of recognizer that made the decision" |
| pattern_name: |
| type: string |
| description: "name of pattern (if decision was made by a PatternRecognizer)" |
| pattern: |
| type: string |
| description: "Regex pattern that was applied (if PatternRecognizer)" |
| original_score: |
| type: number |
| format: double |
| description: "Recognizer's confidence in result" |
| score: |
| type: number |
| format: double |
| description: "The PII detection score" |
| textual_explanation: |
| type: string |
| description: "Free text for describing a decision of a logic or model" |
| score_context_improvement: |
| type: number |
| format: double |
| description: "Difference from the original score" |
| supportive_context_word: |
| type: string |
| description: "The context word which helped increase the score" |
| validation_result: |
| type: number |
| format: double |
| description: "Result of a validation (e.g. checksum)" |
| |
| Pattern: |
| type: object |
| properties: |
| name: |
| type: string |
| description: "Name of regular expression pattern" |
| regex: |
| type: string |
| description: "Regex pattern string" |
| score: |
| type: number |
| format: double |
| description: "Detection confidence of this pattern (0.01 if very noisy, 0.6-1.0 if very specific)" |
|
|
|
|
| PatternRecognizer: |
| type: object |
| description: "A regular expressions or deny-list based recognizer" |
| properties: |
| name: |
| type: string |
| description: "Name of recognizer" |
| supported_language: |
| type: string |
| description: "Language code supported by this recognizer" |
| patterns: |
| description: "List of type Pattern containing regex expressions with additional metadata." |
| type: array |
| items: |
| $ref: "#/components/schemas/Pattern" |
| deny_list: |
| type: array |
| description: "List of words to be returned as PII if found." |
| items: |
| type: string |
| context: |
| description: "List of words to be used to increase confidence if found in the vicinity of detected entities." |
| type: array |
| items: |
| type: string |
| supported_entity: |
| type: string |
| description: "The name of entity this ad hoc recognizer detects" |
|
|
| EntityTypes: |
| description: "The supported PII entity types." |
| type: string |
| example: PERSON |
|
|
| Replace: |
| title: Replace |
| description: "Replace with a given value" |
| type: object |
| required: |
| - type |
| - new_value |
| properties: |
| type: |
| type: string |
| description: "replace" |
| example: replace |
| new_value: |
| type: string |
| description: "The string to replace with" |
| example: VALUE |
|
|
| Redact: |
| title: Redact |
| description: "Replace with an empty string" |
| type: object |
| required: |
| - type |
| properties: |
| type: |
| type: string |
| description: "redact" |
| example: redact |
|
|
| Mask: |
| title: Mask |
| description: "Replace with a given character" |
| type: object |
| required: |
| - type |
| - masking_char |
| - chars_to_mask |
| properties: |
| type: |
| type: string |
| description: "mask" |
| example: mask |
| masking_char: |
| type: string |
| description: "The replacement character" |
| example: "*" |
| chars_to_mask: |
| type: integer |
| description: "The amount of characters that should be replaced" |
| example: 4 |
| from_end: |
| type: boolean |
| description: "Whether to mask the PII from it's end" |
| example: true |
| default: false |
|
|
| Hash: |
| title: Hash |
| description: "Replace with hashed value" |
| type: object |
| required: |
| - type |
| properties: |
| type: |
| type: string |
| description: "hash" |
| example: hash |
| hash_type: |
| type: string |
| description: "The hashing algorithm" |
| enum: |
| - md5 |
| - sha256 |
| - sha512 |
| example: md5 |
| default: md5 |
|
|
| Encrypt: |
| title: Encrypt |
| description: "Replace with an encrypted value" |
| type: object |
| required: |
| - type |
| - key |
| properties: |
| type: |
| type: string |
| description: "encrypt" |
| example: encrypt |
| key: |
| type: string |
| description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
| example: "3t6w9z$C&F)J@NcR" |
|
|
| Decrypt: |
| title: Decrypt |
| description: "Replace encrypted PII decrypted text" |
| type: object |
| required: |
| - type |
| - key |
| properties: |
| type: |
| type: string |
| description: "decrypt" |
| example: decrypt |
| key: |
| type: string |
| description: "Cryptographic key of length 128, 192 or 256 bits, in a string format" |
| example: "3t6w9z$C&F)J@NcR" |
|
|
| AnonymizeResponse: |
| type: object |
| properties: |
| text: |
| type: string |
| items: |
| type: array |
| description: "Array of anonymized entities" |
| items: |
| $ref: "#/components/schemas/OperatorResult" |
|
|
| OperatorResult: |
| required: |
| - start |
| - end |
| - entity_type |
| type: object |
| properties: |
| operator: |
| type: string |
| description: "Name of the used operator" |
| entity_type: |
| type: string |
| description: "Type of the PII entity" |
| start: |
| type: integer |
| description: "Start index of the changed text" |
| end: |
| type: integer |
| description: "End index in the changed text" |
| text: |
| type: string |
| description: "The new text returned" |
|
|
| DeanonymizeResponse: |
| type: object |
| properties: |
| text: |
| type: string |
| items: |
| type: array |
| description: "Array of deanonymized entities" |
| items: |
| $ref: "#/components/schemas/OperatorResult" |
|
|
| responses: |
| 400BadRequest: |
| description: Bad request |
| content: |
| application/json: |
| schema: |
| type: object |
| properties: |
| error: |
| type: string |
| example: "Invalid request json" |
|
|
| 422UnprocessableEntity: |
| description: Unprocessable Entity |
| content: |
| application/json: |
| schema: |
| type: object |
| properties: |
| error: |
| type: string |
| example: "Invalid input, text can not be empty" |
|
|