airbyte.SourceS3
Explore with Pulumi AI
SourceS3 Resource
Example Usage
Coming soon!
Coming soon!
Coming soon!
Coming soon!
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.airbyte.SourceS3;
import com.pulumi.airbyte.SourceS3Args;
import com.pulumi.airbyte.inputs.SourceS3ConfigurationArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
public static void main(String[] args) {
Pulumi.run(App::stack);
}
public static void stack(Context ctx) {
var mySourceS3 = new SourceS3("mySourceS3", SourceS3Args.builder()
.configuration(SourceS3ConfigurationArgs.builder()
.aws_access_key_id("...my_aws_access_key_id...")
.aws_secret_access_key("...my_aws_secret_access_key...")
.bucket("...my_bucket...")
.delivery_method(%!v(PANIC=Format method: runtime error: invalid memory address or nil pointer dereference))
.endpoint("my-s3-endpoint.com")
.region_name("...my_region_name...")
.role_arn("...my_role_arn...")
.start_date("2021-01-01T00:00:00.000000Z")
.streams(SourceS3ConfigurationStreamArgs.builder()
.daysToSyncIfHistoryIsFull(5)
.format(SourceS3ConfigurationStreamFormatArgs.builder()
.avroFormat(SourceS3ConfigurationStreamFormatAvroFormatArgs.builder()
.doubleAsString(true)
.build())
.csvFormat(SourceS3ConfigurationStreamFormatCsvFormatArgs.builder()
.delimiter("...my_delimiter...")
.doubleQuote(false)
.encoding("...my_encoding...")
.escapeChar("...my_escape_char...")
.falseValues("...")
.headerDefinition(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs.builder()
.autogenerated()
.fromCsv()
.userProvided(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs.builder()
.columnNames("...")
.build())
.build())
.ignoreErrorsOnFieldsMismatch(true)
.nullValues("...")
.quoteChar("...my_quote_char...")
.skipRowsAfterHeader(4)
.skipRowsBeforeHeader(7)
.stringsCanBeNull(true)
.trueValues("...")
.build())
.excelFormat()
.jsonlFormat()
.parquetFormat(SourceS3ConfigurationStreamFormatParquetFormatArgs.builder()
.decimalAsFloat(false)
.build())
.unstructuredDocumentFormat(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs.builder()
.processing(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs.builder()
.local()
.build())
.skipUnprocessableFiles(true)
.strategy("auto")
.build())
.build())
.globs("...")
.inputSchema("...my_input_schema...")
.name("...my_name...")
.recentNFilesToReadForSchemaDiscovery(10)
.schemaless(true)
.validationPolicy("Wait for Discover")
.build())
.build())
.definitionId("07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c")
.secretId("...my_secret_id...")
.workspaceId("bba7dce0-5020-4916-bbd7-be8f298d5f78")
.build());
}
}
resources:
mySourceS3:
type: airbyte:SourceS3
properties:
configuration:
aws_access_key_id: '...my_aws_access_key_id...'
aws_secret_access_key: '...my_aws_secret_access_key...'
bucket: '...my_bucket...'
delivery_method:
copyRawFiles:
preserveDirectoryStructure: false
replicateRecords: {}
endpoint: my-s3-endpoint.com
region_name: '...my_region_name...'
role_arn: '...my_role_arn...'
start_date: 2021-01-01T00:00:00.000000Z
streams:
- daysToSyncIfHistoryIsFull: 5
format:
avroFormat:
doubleAsString: true
csvFormat:
delimiter: '...my_delimiter...'
doubleQuote: false
encoding: '...my_encoding...'
escapeChar: '...my_escape_char...'
falseValues:
- '...'
headerDefinition:
autogenerated: {}
fromCsv: {}
userProvided:
columnNames:
- '...'
ignoreErrorsOnFieldsMismatch: true
nullValues:
- '...'
quoteChar: '...my_quote_char...'
skipRowsAfterHeader: 4
skipRowsBeforeHeader: 7
stringsCanBeNull: true
trueValues:
- '...'
excelFormat: {}
jsonlFormat: {}
parquetFormat:
decimalAsFloat: false
unstructuredDocumentFormat:
processing:
local: {}
skipUnprocessableFiles: true
strategy: auto
globs:
- '...'
inputSchema: '...my_input_schema...'
name: '...my_name...'
recentNFilesToReadForSchemaDiscovery: 10
schemaless: true
validationPolicy: Wait for Discover
definitionId: 07ef8ae4-b6a4-4fd9-99ea-a368c6fc144c
secretId: '...my_secret_id...'
workspaceId: bba7dce0-5020-4916-bbd7-be8f298d5f78
Create SourceS3 Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new SourceS3(name: string, args: SourceS3Args, opts?: CustomResourceOptions);
@overload
def SourceS3(resource_name: str,
args: SourceS3Args,
opts: Optional[ResourceOptions] = None)
@overload
def SourceS3(resource_name: str,
opts: Optional[ResourceOptions] = None,
configuration: Optional[SourceS3ConfigurationArgs] = None,
workspace_id: Optional[str] = None,
definition_id: Optional[str] = None,
name: Optional[str] = None,
secret_id: Optional[str] = None)
func NewSourceS3(ctx *Context, name string, args SourceS3Args, opts ...ResourceOption) (*SourceS3, error)
public SourceS3(string name, SourceS3Args args, CustomResourceOptions? opts = null)
public SourceS3(String name, SourceS3Args args)
public SourceS3(String name, SourceS3Args args, CustomResourceOptions options)
type: airbyte:SourceS3
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args SourceS3Args
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var sourceS3Resource = new Airbyte.SourceS3("sourceS3Resource", new()
{
Configuration = new Airbyte.Inputs.SourceS3ConfigurationArgs
{
Bucket = "string",
Streams = new[]
{
new Airbyte.Inputs.SourceS3ConfigurationStreamArgs
{
Format = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatArgs
{
AvroFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatAvroFormatArgs
{
DoubleAsString = false,
},
CsvFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatArgs
{
Delimiter = "string",
DoubleQuote = false,
Encoding = "string",
EscapeChar = "string",
FalseValues = new[]
{
"string",
},
HeaderDefinition = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs
{
Autogenerated = null,
FromCsv = null,
UserProvided = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs
{
ColumnNames = new[]
{
"string",
},
},
},
IgnoreErrorsOnFieldsMismatch = false,
NullValues = new[]
{
"string",
},
QuoteChar = "string",
SkipRowsAfterHeader = 0,
SkipRowsBeforeHeader = 0,
StringsCanBeNull = false,
TrueValues = new[]
{
"string",
},
},
ExcelFormat = null,
JsonlFormat = null,
ParquetFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatParquetFormatArgs
{
DecimalAsFloat = false,
},
UnstructuredDocumentFormat = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs
{
Processing = new Airbyte.Inputs.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs
{
Local = null,
},
SkipUnprocessableFiles = false,
Strategy = "string",
},
},
Name = "string",
DaysToSyncIfHistoryIsFull = 0,
Globs = new[]
{
"string",
},
InputSchema = "string",
RecentNFilesToReadForSchemaDiscovery = 0,
Schemaless = false,
ValidationPolicy = "string",
},
},
AwsAccessKeyId = "string",
AwsSecretAccessKey = "string",
DeliveryMethod = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodArgs
{
CopyRawFiles = new Airbyte.Inputs.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
{
PreserveDirectoryStructure = false,
},
ReplicateRecords = null,
},
Endpoint = "string",
RegionName = "string",
RoleArn = "string",
StartDate = "string",
},
WorkspaceId = "string",
DefinitionId = "string",
Name = "string",
SecretId = "string",
});
example, err := airbyte.NewSourceS3(ctx, "sourceS3Resource", &airbyte.SourceS3Args{
Configuration: &.SourceS3ConfigurationArgs{
Bucket: pulumi.String("string"),
Streams: .SourceS3ConfigurationStreamArray{
&.SourceS3ConfigurationStreamArgs{
Format: &.SourceS3ConfigurationStreamFormatArgs{
AvroFormat: &.SourceS3ConfigurationStreamFormatAvroFormatArgs{
DoubleAsString: pulumi.Bool(false),
},
CsvFormat: &.SourceS3ConfigurationStreamFormatCsvFormatArgs{
Delimiter: pulumi.String("string"),
DoubleQuote: pulumi.Bool(false),
Encoding: pulumi.String("string"),
EscapeChar: pulumi.String("string"),
FalseValues: pulumi.StringArray{
pulumi.String("string"),
},
HeaderDefinition: &.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs{
Autogenerated: &.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionAutogeneratedArgs{
},
FromCsv: &.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionFromCsvArgs{
},
UserProvided: &.SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs{
ColumnNames: pulumi.StringArray{
pulumi.String("string"),
},
},
},
IgnoreErrorsOnFieldsMismatch: pulumi.Bool(false),
NullValues: pulumi.StringArray{
pulumi.String("string"),
},
QuoteChar: pulumi.String("string"),
SkipRowsAfterHeader: pulumi.Float64(0),
SkipRowsBeforeHeader: pulumi.Float64(0),
StringsCanBeNull: pulumi.Bool(false),
TrueValues: pulumi.StringArray{
pulumi.String("string"),
},
},
ExcelFormat: &.SourceS3ConfigurationStreamFormatExcelFormatArgs{
},
JsonlFormat: &.SourceS3ConfigurationStreamFormatJsonlFormatArgs{
},
ParquetFormat: &.SourceS3ConfigurationStreamFormatParquetFormatArgs{
DecimalAsFloat: pulumi.Bool(false),
},
UnstructuredDocumentFormat: &.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs{
Processing: &.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs{
Local: &.SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingLocalArgs{
},
},
SkipUnprocessableFiles: pulumi.Bool(false),
Strategy: pulumi.String("string"),
},
},
Name: pulumi.String("string"),
DaysToSyncIfHistoryIsFull: pulumi.Float64(0),
Globs: pulumi.StringArray{
pulumi.String("string"),
},
InputSchema: pulumi.String("string"),
RecentNFilesToReadForSchemaDiscovery: pulumi.Float64(0),
Schemaless: pulumi.Bool(false),
ValidationPolicy: pulumi.String("string"),
},
},
AwsAccessKeyId: pulumi.String("string"),
AwsSecretAccessKey: pulumi.String("string"),
DeliveryMethod: &.SourceS3ConfigurationDeliveryMethodArgs{
CopyRawFiles: &.SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs{
PreserveDirectoryStructure: pulumi.Bool(false),
},
ReplicateRecords: &.SourceS3ConfigurationDeliveryMethodReplicateRecordsArgs{
},
},
Endpoint: pulumi.String("string"),
RegionName: pulumi.String("string"),
RoleArn: pulumi.String("string"),
StartDate: pulumi.String("string"),
},
WorkspaceId: pulumi.String("string"),
DefinitionId: pulumi.String("string"),
Name: pulumi.String("string"),
SecretId: pulumi.String("string"),
})
var sourceS3Resource = new SourceS3("sourceS3Resource", SourceS3Args.builder()
.configuration(SourceS3ConfigurationArgs.builder()
.bucket("string")
.streams(SourceS3ConfigurationStreamArgs.builder()
.format(SourceS3ConfigurationStreamFormatArgs.builder()
.avroFormat(SourceS3ConfigurationStreamFormatAvroFormatArgs.builder()
.doubleAsString(false)
.build())
.csvFormat(SourceS3ConfigurationStreamFormatCsvFormatArgs.builder()
.delimiter("string")
.doubleQuote(false)
.encoding("string")
.escapeChar("string")
.falseValues("string")
.headerDefinition(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs.builder()
.autogenerated()
.fromCsv()
.userProvided(SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs.builder()
.columnNames("string")
.build())
.build())
.ignoreErrorsOnFieldsMismatch(false)
.nullValues("string")
.quoteChar("string")
.skipRowsAfterHeader(0)
.skipRowsBeforeHeader(0)
.stringsCanBeNull(false)
.trueValues("string")
.build())
.excelFormat()
.jsonlFormat()
.parquetFormat(SourceS3ConfigurationStreamFormatParquetFormatArgs.builder()
.decimalAsFloat(false)
.build())
.unstructuredDocumentFormat(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs.builder()
.processing(SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs.builder()
.local()
.build())
.skipUnprocessableFiles(false)
.strategy("string")
.build())
.build())
.name("string")
.daysToSyncIfHistoryIsFull(0)
.globs("string")
.inputSchema("string")
.recentNFilesToReadForSchemaDiscovery(0)
.schemaless(false)
.validationPolicy("string")
.build())
.awsAccessKeyId("string")
.awsSecretAccessKey("string")
.deliveryMethod(SourceS3ConfigurationDeliveryMethodArgs.builder()
.copyRawFiles(SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs.builder()
.preserveDirectoryStructure(false)
.build())
.replicateRecords()
.build())
.endpoint("string")
.regionName("string")
.roleArn("string")
.startDate("string")
.build())
.workspaceId("string")
.definitionId("string")
.name("string")
.secretId("string")
.build());
source_s3_resource = airbyte.SourceS3("sourceS3Resource",
configuration={
"bucket": "string",
"streams": [{
"format": {
"avro_format": {
"double_as_string": False,
},
"csv_format": {
"delimiter": "string",
"double_quote": False,
"encoding": "string",
"escape_char": "string",
"false_values": ["string"],
"header_definition": {
"autogenerated": {},
"from_csv": {},
"user_provided": {
"column_names": ["string"],
},
},
"ignore_errors_on_fields_mismatch": False,
"null_values": ["string"],
"quote_char": "string",
"skip_rows_after_header": 0,
"skip_rows_before_header": 0,
"strings_can_be_null": False,
"true_values": ["string"],
},
"excel_format": {},
"jsonl_format": {},
"parquet_format": {
"decimal_as_float": False,
},
"unstructured_document_format": {
"processing": {
"local": {},
},
"skip_unprocessable_files": False,
"strategy": "string",
},
},
"name": "string",
"days_to_sync_if_history_is_full": 0,
"globs": ["string"],
"input_schema": "string",
"recent_n_files_to_read_for_schema_discovery": 0,
"schemaless": False,
"validation_policy": "string",
}],
"aws_access_key_id": "string",
"aws_secret_access_key": "string",
"delivery_method": {
"copy_raw_files": {
"preserve_directory_structure": False,
},
"replicate_records": {},
},
"endpoint": "string",
"region_name": "string",
"role_arn": "string",
"start_date": "string",
},
workspace_id="string",
definition_id="string",
name="string",
secret_id="string")
const sourceS3Resource = new airbyte.SourceS3("sourceS3Resource", {
configuration: {
bucket: "string",
streams: [{
format: {
avroFormat: {
doubleAsString: false,
},
csvFormat: {
delimiter: "string",
doubleQuote: false,
encoding: "string",
escapeChar: "string",
falseValues: ["string"],
headerDefinition: {
autogenerated: {},
fromCsv: {},
userProvided: {
columnNames: ["string"],
},
},
ignoreErrorsOnFieldsMismatch: false,
nullValues: ["string"],
quoteChar: "string",
skipRowsAfterHeader: 0,
skipRowsBeforeHeader: 0,
stringsCanBeNull: false,
trueValues: ["string"],
},
excelFormat: {},
jsonlFormat: {},
parquetFormat: {
decimalAsFloat: false,
},
unstructuredDocumentFormat: {
processing: {
local: {},
},
skipUnprocessableFiles: false,
strategy: "string",
},
},
name: "string",
daysToSyncIfHistoryIsFull: 0,
globs: ["string"],
inputSchema: "string",
recentNFilesToReadForSchemaDiscovery: 0,
schemaless: false,
validationPolicy: "string",
}],
awsAccessKeyId: "string",
awsSecretAccessKey: "string",
deliveryMethod: {
copyRawFiles: {
preserveDirectoryStructure: false,
},
replicateRecords: {},
},
endpoint: "string",
regionName: "string",
roleArn: "string",
startDate: "string",
},
workspaceId: "string",
definitionId: "string",
name: "string",
secretId: "string",
});
type: airbyte:SourceS3
properties:
configuration:
awsAccessKeyId: string
awsSecretAccessKey: string
bucket: string
deliveryMethod:
copyRawFiles:
preserveDirectoryStructure: false
replicateRecords: {}
endpoint: string
regionName: string
roleArn: string
startDate: string
streams:
- daysToSyncIfHistoryIsFull: 0
format:
avroFormat:
doubleAsString: false
csvFormat:
delimiter: string
doubleQuote: false
encoding: string
escapeChar: string
falseValues:
- string
headerDefinition:
autogenerated: {}
fromCsv: {}
userProvided:
columnNames:
- string
ignoreErrorsOnFieldsMismatch: false
nullValues:
- string
quoteChar: string
skipRowsAfterHeader: 0
skipRowsBeforeHeader: 0
stringsCanBeNull: false
trueValues:
- string
excelFormat: {}
jsonlFormat: {}
parquetFormat:
decimalAsFloat: false
unstructuredDocumentFormat:
processing:
local: {}
skipUnprocessableFiles: false
strategy: string
globs:
- string
inputSchema: string
name: string
recentNFilesToReadForSchemaDiscovery: 0
schemaless: false
validationPolicy: string
definitionId: string
name: string
secretId: string
workspaceId: string
SourceS3 Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The SourceS3 resource accepts the following input properties:
- Configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Workspace
Id string - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Workspace
Id string - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id String - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id string - definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name string
- Name of the source e.g. dev-mysql-instance.
- secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace_
id str - definition_
id str - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name str
- Name of the source e.g. dev-mysql-instance.
- secret_
id str - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- configuration Property Map
- NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- workspace
Id String - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
Outputs
All input properties are implicitly available as output properties. Additionally, the SourceS3 resource produces the following output properties:
- Created
At double - Id string
- The provider-assigned unique ID for this managed resource.
- Source
Id string - Source
Type string
- Created
At float64 - Id string
- The provider-assigned unique ID for this managed resource.
- Source
Id string - Source
Type string
- created
At Double - id String
- The provider-assigned unique ID for this managed resource.
- source
Id String - source
Type String
- created
At number - id string
- The provider-assigned unique ID for this managed resource.
- source
Id string - source
Type string
- created_
at float - id str
- The provider-assigned unique ID for this managed resource.
- source_
id str - source_
type str
- created
At Number - id String
- The provider-assigned unique ID for this managed resource.
- source
Id String - source
Type String
Look up Existing SourceS3 Resource
Get an existing SourceS3 resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: SourceS3State, opts?: CustomResourceOptions): SourceS3
@staticmethod
def get(resource_name: str,
id: str,
opts: Optional[ResourceOptions] = None,
configuration: Optional[SourceS3ConfigurationArgs] = None,
created_at: Optional[float] = None,
definition_id: Optional[str] = None,
name: Optional[str] = None,
secret_id: Optional[str] = None,
source_id: Optional[str] = None,
source_type: Optional[str] = None,
workspace_id: Optional[str] = None) -> SourceS3
func GetSourceS3(ctx *Context, name string, id IDInput, state *SourceS3State, opts ...ResourceOption) (*SourceS3, error)
public static SourceS3 Get(string name, Input<string> id, SourceS3State? state, CustomResourceOptions? opts = null)
public static SourceS3 get(String name, Output<String> id, SourceS3State state, CustomResourceOptions options)
resources: _: type: airbyte:SourceS3 get: id: ${id}
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Created
At double - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Source
Id string - Source
Type string - Workspace
Id string
- Configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- Created
At float64 - Definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- Name string
- Name of the source e.g. dev-mysql-instance.
- Secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- Source
Id string - Source
Type string - Workspace
Id string
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At Double - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id String - source
Type String - workspace
Id String
- configuration
Source
S3Configuration - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At number - definition
Id string - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name string
- Name of the source e.g. dev-mysql-instance.
- secret
Id string - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id string - source
Type string - workspace
Id string
- configuration
Source
S3Configuration Args - NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created_
at float - definition_
id str - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name str
- Name of the source e.g. dev-mysql-instance.
- secret_
id str - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source_
id str - source_
type str - workspace_
id str
- configuration Property Map
- NOTE: When this Spec is changed, legacyconfigtransformer.py must also be modified to uptake the changes because it is responsible for converting legacy S3 v3 configs into v4 configs using the File-Based CDK.
- created
At Number - definition
Id String - The UUID of the connector definition. One of configuration.sourceType or definitionId must be provided. Requires replacement if changed.
- name String
- Name of the source e.g. dev-mysql-instance.
- secret
Id String - Optional secretID obtained through the public API OAuth redirect flow. Requires replacement if changed.
- source
Id String - source
Type String - workspace
Id String
Supporting Types
SourceS3Configuration, SourceS3ConfigurationArgs
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Streams
List<Source
S3Configuration Stream> - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Delivery
Method SourceS3Configuration Delivery Method - Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- Bucket string
- Name of the S3 bucket where the file(s) exist.
- Streams
[]Source
S3Configuration Stream - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- Aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- Delivery
Method SourceS3Configuration Delivery Method - Endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- Region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- Role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- Start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- streams
List<Source
S3Configuration Stream> - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- delivery
Method SourceS3Configuration Delivery Method - endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket string
- Name of the S3 bucket where the file(s) exist.
- streams
Source
S3Configuration Stream[] - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access stringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret stringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- delivery
Method SourceS3Configuration Delivery Method - endpoint string
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- region
Name string - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn string - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date string - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket str
- Name of the S3 bucket where the file(s) exist.
- streams
Sequence[Source
S3Configuration Stream] - Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws_
access_ strkey_ id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws_
secret_ straccess_ key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- delivery_
method SourceS3Configuration Delivery Method - endpoint str
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- region_
name str - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role_
arn str - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start_
date str - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
- bucket String
- Name of the S3 bucket where the file(s) exist.
- streams List<Property Map>
- Each instance of this configuration defines a \n\nstream\n\n. Use this to define which files belong in the stream, their format, and how they should be parsed and validated. When sending data to warehouse destination such as Snowflake or BigQuery, each stream is a separate table.
- aws
Access StringKey Id - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- aws
Secret StringAccess Key - In order to access private Buckets stored on AWS S3, this connector requires credentials with the proper permissions. If accessing publicly available data, this field is not necessary.
- delivery
Method Property Map - endpoint String
- Endpoint to an S3 compatible service. Leave empty to use AWS. Default: ""
- region
Name String - AWS region where the S3 bucket is located. If not provided, the region will be determined automatically.
- role
Arn String - Specifies the Amazon Resource Name (ARN) of an IAM role that you want to use to perform operations requested using this profile. Set the External ID to the Airbyte workspace ID, which can be found in the URL of this page.
- start
Date String - UTC date and time in the format 2017-01-25T00:00:00.000000Z. Any file modified before this date will not be replicated.
SourceS3ConfigurationDeliveryMethod, SourceS3ConfigurationDeliveryMethodArgs
- Copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- Replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- Copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- Replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw SourceFiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy_
raw_ Sourcefiles S3Configuration Delivery Method Copy Raw Files - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate_
records SourceS3Configuration Delivery Method Replicate Records - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
- copy
Raw Property MapFiles - Copy raw files without parsing their contents. Bits are copied into the destination exactly as they appeared in the source. Recommended for use with unstructured text data, non-text and compressed files.
- replicate
Records Property Map - Recommended - Extract and load structured records into your destination of choice. This is the classic method of moving data in Airbyte. It allows for blocking and hashing individual fields or files from a structured schema. Data can be flattened, typed and deduped depending on the destination.
SourceS3ConfigurationDeliveryMethodCopyRawFiles, SourceS3ConfigurationDeliveryMethodCopyRawFilesArgs
- Preserve
Directory boolStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- Preserve
Directory boolStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory BooleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory booleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve_
directory_ boolstructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
- preserve
Directory BooleanStructure - If enabled, sends subdirectory folder structure along with source file names to the destination. Otherwise, files will be synced by their names only. This option is ignored when file-based replication is not enabled. Default: true
SourceS3ConfigurationStream, SourceS3ConfigurationStreamArgs
- Format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- Name string
- The name of the stream.
- Days
To doubleSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- Globs List<string>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- Input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- Recent
NFiles doubleTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- Schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- Validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- Format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- Name string
- The name of the stream.
- Days
To float64Sync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- Globs []string
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- Input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- Recent
NFiles float64To Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- Schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- Validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name String
- The name of the stream.
- days
To DoubleSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs List<String>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- input
Schema String - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- recent
NFiles DoubleTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless Boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy String - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name string
- The name of the stream.
- days
To numberSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs string[]
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- input
Schema string - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- recent
NFiles numberTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy string - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format
Source
S3Configuration Stream Format - The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name str
- The name of the stream.
- days_
to_ floatsync_ if_ history_ is_ full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs Sequence[str]
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- input_
schema str - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- recent_
n_ floatfiles_ to_ read_ for_ schema_ discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless bool
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation_
policy str - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
- format Property Map
- The configuration options that are used to alter how to read incoming files that deviate from the standard formatting.
- name String
- The name of the stream.
- days
To NumberSync If History Is Full - When the state history of the file store is full, syncs will only read files that were last modified in the provided day range. Default: 3
- globs List<String>
- The pattern used to specify which files should be selected from the file system. For more information on glob pattern matching look \n\nhere\n\n.
- input
Schema String - The schema that will be used to validate records extracted from the file. This will override the stream schema that is auto-detected from incoming files.
- recent
NFiles NumberTo Read For Schema Discovery - The number of resent files which will be used to discover the schema for this stream.
- schemaless Boolean
- When enabled, syncs will not validate or structure records against the stream's schema. Default: false
- validation
Policy String - The name of the validation policy that dictates sync behavior when a record does not adhere to the stream schema. Default: "Emit Record"; must be one of ["Emit Record", "Skip Record", "Wait for Discover"]
SourceS3ConfigurationStreamFormat, SourceS3ConfigurationStreamFormatArgs
- Avro
Format SourceS3Configuration Stream Format Avro Format - Csv
Format SourceS3Configuration Stream Format Csv Format - Excel
Format SourceS3Configuration Stream Format Excel Format - Jsonl
Format SourceS3Configuration Stream Format Jsonl Format - Parquet
Format SourceS3Configuration Stream Format Parquet Format - Unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- Avro
Format SourceS3Configuration Stream Format Avro Format - Csv
Format SourceS3Configuration Stream Format Csv Format - Excel
Format SourceS3Configuration Stream Format Excel Format - Jsonl
Format SourceS3Configuration Stream Format Jsonl Format - Parquet
Format SourceS3Configuration Stream Format Parquet Format - Unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format SourceS3Configuration Stream Format Avro Format - csv
Format SourceS3Configuration Stream Format Csv Format - excel
Format SourceS3Configuration Stream Format Excel Format - jsonl
Format SourceS3Configuration Stream Format Jsonl Format - parquet
Format SourceS3Configuration Stream Format Parquet Format - unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format SourceS3Configuration Stream Format Avro Format - csv
Format SourceS3Configuration Stream Format Csv Format - excel
Format SourceS3Configuration Stream Format Excel Format - jsonl
Format SourceS3Configuration Stream Format Jsonl Format - parquet
Format SourceS3Configuration Stream Format Parquet Format - unstructured
Document SourceFormat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro_
format SourceS3Configuration Stream Format Avro Format - csv_
format SourceS3Configuration Stream Format Csv Format - excel_
format SourceS3Configuration Stream Format Excel Format - jsonl_
format SourceS3Configuration Stream Format Jsonl Format - parquet_
format SourceS3Configuration Stream Format Parquet Format - unstructured_
document_ Sourceformat S3Configuration Stream Format Unstructured Document Format - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
- avro
Format Property Map - csv
Format Property Map - excel
Format Property Map - jsonl
Format Property Map - parquet
Format Property Map - unstructured
Document Property MapFormat - Extract text from document formats (.pdf, .docx, .md, .pptx) and emit as one record per file.
SourceS3ConfigurationStreamFormatAvroFormat, SourceS3ConfigurationStreamFormatAvroFormatArgs
- Double
As boolString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- Double
As boolString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As BooleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As booleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double_
as_ boolstring - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
- double
As BooleanString - Whether to convert double fields to strings. This is recommended if you have decimal numbers with a high degree of precision because there can be a loss precision when handling floating point numbers. Default: false
SourceS3ConfigurationStreamFormatCsvFormat, SourceS3ConfigurationStreamFormatCsvFormatArgs
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- False
Values List<string> - A set of case-sensitive strings that should be interpreted as false values.
- Header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - Ignore
Errors boolOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- Null
Values List<string> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- Skip
Rows doubleAfter Header - The number of rows to skip after the header row. Default: 0
- Skip
Rows doubleBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- Strings
Can boolBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- True
Values List<string> - A set of case-sensitive strings that should be interpreted as true values.
- Delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- Double
Quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- Encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- Escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- False
Values []string - A set of case-sensitive strings that should be interpreted as false values.
- Header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - Ignore
Errors boolOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- Null
Values []string - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- Quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- Skip
Rows float64After Header - The number of rows to skip after the header row. Default: 0
- Skip
Rows float64Before Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- Strings
Can boolBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- True
Values []string - A set of case-sensitive strings that should be interpreted as true values.
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values List<String> - A set of case-sensitive strings that should be interpreted as false values.
- header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors BooleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- null
Values List<String> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows DoubleAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows DoubleBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can BooleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values List<String> - A set of case-sensitive strings that should be interpreted as true values.
- delimiter string
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding string
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char string - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values string[] - A set of case-sensitive strings that should be interpreted as false values.
- header
Definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors booleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- null
Values string[] - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- quote
Char string - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows numberAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows numberBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can booleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values string[] - A set of case-sensitive strings that should be interpreted as true values.
- delimiter str
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double_
quote bool - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding str
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape_
char str - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as false values.
- header_
definition SourceS3Configuration Stream Format Csv Format Header Definition - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore_
errors_ boolon_ fields_ mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- null_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- quote_
char str - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip_
rows_ floatafter_ header - The number of rows to skip after the header row. Default: 0
- skip_
rows_ floatbefore_ header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings_
can_ boolbe_ null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true_
values Sequence[str] - A set of case-sensitive strings that should be interpreted as true values.
- delimiter String
- The character delimiting individual cells in the CSV data. This may only be a 1-character string. For tab-delimited data enter '\t'. Default: ","
- double
Quote Boolean - Whether two quotes in a quoted CSV value denote a single quote in the data. Default: true
- encoding String
- The character encoding of the CSV data. Leave blank to default to \n\nUTF8\n\n. See \n\nlist of python encodings\n\n for allowable options. Default: "utf8"
- escape
Char String - The character used for escaping special characters. To disallow escaping, leave this field blank.
- false
Values List<String> - A set of case-sensitive strings that should be interpreted as false values.
- header
Definition Property Map - How headers will be defined.
User Provided
assumes the CSV does not have a header row and uses the headers provided andAutogenerated
assumes the CSV does not have a header row and the CDK will generate headers using forf{i}
wherei
is the index starting from 0. Else, the default behavior is to use the header from the CSV file. If a user wants to autogenerate or provide column names for a CSV having headers, they can skip rows. - ignore
Errors BooleanOn Fields Mismatch - Whether to ignore errors that occur when the number of fields in the CSV does not match the number of columns in the schema. Default: false
- null
Values List<String> - A set of case-sensitive strings that should be interpreted as null values. For example, if the value 'NA' should be interpreted as null, enter 'NA' in this field.
- quote
Char String - The character used for quoting CSV values. To disallow quoting, make this field blank. Default: """
- skip
Rows NumberAfter Header - The number of rows to skip after the header row. Default: 0
- skip
Rows NumberBefore Header - The number of rows to skip before the header row. For example, if the header row is on the 3rd row, enter 2 in this field. Default: 0
- strings
Can BooleanBe Null - Whether strings can be interpreted as null values. If true, strings that match the nullvalues set will be interpreted as null. If false, strings that match the nullvalues set will be interpreted as the string itself. Default: true
- true
Values List<String> - A set of case-sensitive strings that should be interpreted as true values.
SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinition, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionArgs
SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvided, SourceS3ConfigurationStreamFormatCsvFormatHeaderDefinitionUserProvidedArgs
- Column
Names List<string> - The column names that will be used while emitting the CSV records
- Column
Names []string - The column names that will be used while emitting the CSV records
- column
Names List<String> - The column names that will be used while emitting the CSV records
- column
Names string[] - The column names that will be used while emitting the CSV records
- column_
names Sequence[str] - The column names that will be used while emitting the CSV records
- column
Names List<String> - The column names that will be used while emitting the CSV records
SourceS3ConfigurationStreamFormatParquetFormat, SourceS3ConfigurationStreamFormatParquetFormatArgs
- Decimal
As boolFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- Decimal
As boolFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As BooleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As booleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal_
as_ boolfloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
- decimal
As BooleanFloat - Whether to convert decimal fields to floats. There is a loss of precision when converting decimals to floats, so this is not recommended. Default: false
SourceS3ConfigurationStreamFormatUnstructuredDocumentFormat, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatArgs
- Processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- Skip
Unprocessable boolFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- Strategy string
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- Processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- Skip
Unprocessable boolFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- Strategy string
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip
Unprocessable BooleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy String
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip
Unprocessable booleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy string
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing
Source
S3Configuration Stream Format Unstructured Document Format Processing - Processing configuration
- skip_
unprocessable_ boolfiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy str
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
- processing Property Map
- Processing configuration
- skip
Unprocessable BooleanFiles - If true, skip files that cannot be parsed and pass the error message along as the absourcefileparse_error field. If false, fail the sync. Default: true
- strategy String
- The strategy used to parse documents.
fast
extracts text directly from the document which doesn't work for all files.ocr_only
is more reliable, but slower.hi_res
is the most reliable, but requires an API key and a hosted instance of unstructured and can't be used with local mode. See the unstructured.io documentation for more details: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf. Default: "auto"; must be one of ["auto", "fast", "ocronly", "hires"]
SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessing, SourceS3ConfigurationStreamFormatUnstructuredDocumentFormatProcessingArgs
- Local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fast
andocr
modes. This is the default option.
- Local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fast
andocr
modes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fast
andocr
modes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fast
andocr
modes. This is the default option.
- local
Source
S3Configuration Stream Format Unstructured Document Format Processing Local - Process files locally, supporting
fast
andocr
modes. This is the default option.
- local Property Map
- Process files locally, supporting
fast
andocr
modes. This is the default option.
Import
$ pulumi import airbyte:index/sourceS3:SourceS3 my_airbyte_source_s3 ""
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- airbyte airbytehq/terraform-provider-airbyte
- License
- Notes
- This Pulumi package is based on the
airbyte
Terraform Provider.