aws.glue.Crawler
Explore with Pulumi AI
Manages a Glue Crawler. More information can be found in the AWS Glue Developer Guide
Example Usage
DynamoDB Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    dynamodbTargets: [{
        path: "table-name",
    }],
});
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    dynamodb_targets=[{
        "path": "table-name",
    }])
package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			DynamodbTargets: glue.CrawlerDynamodbTargetArray{
				&glue.CrawlerDynamodbTargetArgs{
					Path: pulumi.String("table-name"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        DynamodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
            {
                Path = "table-name",
            },
        },
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerDynamodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
                .path("table-name")
                .build())
            .build());
    }
}
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      dynamodbTargets:
        - path: table-name
JDBC Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    jdbcTargets: [{
        connectionName: exampleAwsGlueConnection.name,
        path: "database-name/%",
    }],
});
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    jdbc_targets=[{
        "connection_name": example_aws_glue_connection["name"],
        "path": "database-name/%",
    }])
package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			JdbcTargets: glue.CrawlerJdbcTargetArray{
				&glue.CrawlerJdbcTargetArgs{
					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
					Path:           pulumi.String("database-name/%"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        JdbcTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
            {
                ConnectionName = exampleAwsGlueConnection.Name,
                Path = "database-name/%",
            },
        },
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerJdbcTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .jdbcTargets(CrawlerJdbcTargetArgs.builder()
                .connectionName(exampleAwsGlueConnection.name())
                .path("database-name/%")
                .build())
            .build());
    }
}
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      jdbcTargets:
        - connectionName: ${exampleAwsGlueConnection.name}
          path: database-name/%
S3 Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    s3Targets: [{
        path: `s3://${exampleAwsS3Bucket.bucket}`,
    }],
});
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    s3_targets=[{
        "path": f"s3://{example_aws_s3_bucket['bucket']}",
    }])
package main
import (
	"fmt"
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.Sprintf("s3://%v", exampleAwsS3Bucket.Bucket),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        S3Targets = new[]
        {
            new Aws.Glue.Inputs.CrawlerS3TargetArgs
            {
                Path = $"s3://{exampleAwsS3Bucket.Bucket}",
            },
        },
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", exampleAwsS3Bucket.bucket()))
                .build())
            .build());
    }
}
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      s3Targets:
        - path: s3://${exampleAwsS3Bucket.bucket}
Catalog Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    catalogTargets: [{
        databaseName: exampleAwsGlueCatalogDatabase.name,
        tables: [exampleAwsGlueCatalogTable.name],
    }],
    schemaChangePolicy: {
        deleteBehavior: "LOG",
    },
    configuration: `{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
`,
});
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    catalog_targets=[{
        "database_name": example_aws_glue_catalog_database["name"],
        "tables": [example_aws_glue_catalog_table["name"]],
    }],
    schema_change_policy={
        "delete_behavior": "LOG",
    },
    configuration="""{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
""")
package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			CatalogTargets: glue.CrawlerCatalogTargetArray{
				&glue.CrawlerCatalogTargetArgs{
					DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
					Tables: pulumi.StringArray{
						exampleAwsGlueCatalogTable.Name,
					},
				},
			},
			SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
				DeleteBehavior: pulumi.String("LOG"),
			},
			Configuration: pulumi.String(`{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
`),
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        CatalogTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
            {
                DatabaseName = exampleAwsGlueCatalogDatabase.Name,
                Tables = new[]
                {
                    exampleAwsGlueCatalogTable.Name,
                },
            },
        },
        SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
        {
            DeleteBehavior = "LOG",
        },
        Configuration = @"{
  ""Version"":1.0,
  ""Grouping"": {
    ""TableGroupingPolicy"": ""CombineCompatibleSchemas""
  }
}
",
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerCatalogTargetArgs;
import com.pulumi.aws.glue.inputs.CrawlerSchemaChangePolicyArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .catalogTargets(CrawlerCatalogTargetArgs.builder()
                .databaseName(exampleAwsGlueCatalogDatabase.name())
                .tables(exampleAwsGlueCatalogTable.name())
                .build())
            .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
                .deleteBehavior("LOG")
                .build())
            .configuration("""
{
  "Version":1.0,
  "Grouping": {
    "TableGroupingPolicy": "CombineCompatibleSchemas"
  }
}
            """)
            .build());
    }
}
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      catalogTargets:
        - databaseName: ${exampleAwsGlueCatalogDatabase.name}
          tables:
            - ${exampleAwsGlueCatalogTable.name}
      schemaChangePolicy:
        deleteBehavior: LOG
      configuration: |
        {
          "Version":1.0,
          "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas"
          }
        }        
MongoDB Target Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const example = new aws.glue.Crawler("example", {
    databaseName: exampleAwsGlueCatalogDatabase.name,
    name: "example",
    role: exampleAwsIamRole.arn,
    mongodbTargets: [{
        connectionName: exampleAwsGlueConnection.name,
        path: "database-name/%",
    }],
});
import pulumi
import pulumi_aws as aws
example = aws.glue.Crawler("example",
    database_name=example_aws_glue_catalog_database["name"],
    name="example",
    role=example_aws_iam_role["arn"],
    mongodb_targets=[{
        "connection_name": example_aws_glue_connection["name"],
        "path": "database-name/%",
    }])
package main
import (
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := glue.NewCrawler(ctx, "example", &glue.CrawlerArgs{
			DatabaseName: pulumi.Any(exampleAwsGlueCatalogDatabase.Name),
			Name:         pulumi.String("example"),
			Role:         pulumi.Any(exampleAwsIamRole.Arn),
			MongodbTargets: glue.CrawlerMongodbTargetArray{
				&glue.CrawlerMongodbTargetArgs{
					ConnectionName: pulumi.Any(exampleAwsGlueConnection.Name),
					Path:           pulumi.String("database-name/%"),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var example = new Aws.Glue.Crawler("example", new()
    {
        DatabaseName = exampleAwsGlueCatalogDatabase.Name,
        Name = "example",
        Role = exampleAwsIamRole.Arn,
        MongodbTargets = new[]
        {
            new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
            {
                ConnectionName = exampleAwsGlueConnection.Name,
                Path = "database-name/%",
            },
        },
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerMongodbTargetArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var example = new Crawler("example", CrawlerArgs.builder()
            .databaseName(exampleAwsGlueCatalogDatabase.name())
            .name("example")
            .role(exampleAwsIamRole.arn())
            .mongodbTargets(CrawlerMongodbTargetArgs.builder()
                .connectionName(exampleAwsGlueConnection.name())
                .path("database-name/%")
                .build())
            .build());
    }
}
resources:
  example:
    type: aws:glue:Crawler
    properties:
      databaseName: ${exampleAwsGlueCatalogDatabase.name}
      name: example
      role: ${exampleAwsIamRole.arn}
      mongodbTargets:
        - connectionName: ${exampleAwsGlueConnection.name}
          path: database-name/%
Configuration Settings Example
import * as pulumi from "@pulumi/pulumi";
import * as aws from "@pulumi/aws";
const eventsCrawler = new aws.glue.Crawler("events_crawler", {
    databaseName: glueDatabase.name,
    schedule: "cron(0 1 * * ? *)",
    name: `events_crawler_${environmentName}`,
    role: glueRole.arn,
    tags: tags,
    configuration: JSON.stringify({
        Grouping: {
            TableGroupingPolicy: "CombineCompatibleSchemas",
        },
        CrawlerOutput: {
            Partitions: {
                AddOrUpdateBehavior: "InheritFromTable",
            },
        },
        Version: 1,
    }),
    s3Targets: [{
        path: `s3://${dataLakeBucket.bucket}`,
    }],
});
import pulumi
import json
import pulumi_aws as aws
events_crawler = aws.glue.Crawler("events_crawler",
    database_name=glue_database["name"],
    schedule="cron(0 1 * * ? *)",
    name=f"events_crawler_{environment_name}",
    role=glue_role["arn"],
    tags=tags,
    configuration=json.dumps({
        "Grouping": {
            "TableGroupingPolicy": "CombineCompatibleSchemas",
        },
        "CrawlerOutput": {
            "Partitions": {
                "AddOrUpdateBehavior": "InheritFromTable",
            },
        },
        "Version": 1,
    }),
    s3_targets=[{
        "path": f"s3://{data_lake_bucket['bucket']}",
    }])
package main
import (
	"encoding/json"
	"fmt"
	"github.com/pulumi/pulumi-aws/sdk/v6/go/aws/glue"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)
func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		tmpJSON0, err := json.Marshal(map[string]interface{}{
			"Grouping": map[string]interface{}{
				"TableGroupingPolicy": "CombineCompatibleSchemas",
			},
			"CrawlerOutput": map[string]interface{}{
				"Partitions": map[string]interface{}{
					"AddOrUpdateBehavior": "InheritFromTable",
				},
			},
			"Version": 1,
		})
		if err != nil {
			return err
		}
		json0 := string(tmpJSON0)
		_, err = glue.NewCrawler(ctx, "events_crawler", &glue.CrawlerArgs{
			DatabaseName:  pulumi.Any(glueDatabase.Name),
			Schedule:      pulumi.String("cron(0 1 * * ? *)"),
			Name:          pulumi.Sprintf("events_crawler_%v", environmentName),
			Role:          pulumi.Any(glueRole.Arn),
			Tags:          pulumi.Any(tags),
			Configuration: pulumi.String(json0),
			S3Targets: glue.CrawlerS3TargetArray{
				&glue.CrawlerS3TargetArgs{
					Path: pulumi.Sprintf("s3://%v", dataLakeBucket.Bucket),
				},
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}
using System.Collections.Generic;
using System.Linq;
using System.Text.Json;
using Pulumi;
using Aws = Pulumi.Aws;
return await Deployment.RunAsync(() => 
{
    var eventsCrawler = new Aws.Glue.Crawler("events_crawler", new()
    {
        DatabaseName = glueDatabase.Name,
        Schedule = "cron(0 1 * * ? *)",
        Name = $"events_crawler_{environmentName}",
        Role = glueRole.Arn,
        Tags = tags,
        Configuration = JsonSerializer.Serialize(new Dictionary<string, object?>
        {
            ["Grouping"] = new Dictionary<string, object?>
            {
                ["TableGroupingPolicy"] = "CombineCompatibleSchemas",
            },
            ["CrawlerOutput"] = new Dictionary<string, object?>
            {
                ["Partitions"] = new Dictionary<string, object?>
                {
                    ["AddOrUpdateBehavior"] = "InheritFromTable",
                },
            },
            ["Version"] = 1,
        }),
        S3Targets = new[]
        {
            new Aws.Glue.Inputs.CrawlerS3TargetArgs
            {
                Path = $"s3://{dataLakeBucket.Bucket}",
            },
        },
    });
});
package generated_program;
import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.aws.glue.Crawler;
import com.pulumi.aws.glue.CrawlerArgs;
import com.pulumi.aws.glue.inputs.CrawlerS3TargetArgs;
import static com.pulumi.codegen.internal.Serialization.*;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }
    public static void stack(Context ctx) {
        var eventsCrawler = new Crawler("eventsCrawler", CrawlerArgs.builder()
            .databaseName(glueDatabase.name())
            .schedule("cron(0 1 * * ? *)")
            .name(String.format("events_crawler_%s", environmentName))
            .role(glueRole.arn())
            .tags(tags)
            .configuration(serializeJson(
                jsonObject(
                    jsonProperty("Grouping", jsonObject(
                        jsonProperty("TableGroupingPolicy", "CombineCompatibleSchemas")
                    )),
                    jsonProperty("CrawlerOutput", jsonObject(
                        jsonProperty("Partitions", jsonObject(
                            jsonProperty("AddOrUpdateBehavior", "InheritFromTable")
                        ))
                    )),
                    jsonProperty("Version", 1)
                )))
            .s3Targets(CrawlerS3TargetArgs.builder()
                .path(String.format("s3://%s", dataLakeBucket.bucket()))
                .build())
            .build());
    }
}
resources:
  eventsCrawler:
    type: aws:glue:Crawler
    name: events_crawler
    properties:
      databaseName: ${glueDatabase.name}
      schedule: cron(0 1 * * ? *)
      name: events_crawler_${environmentName}
      role: ${glueRole.arn}
      tags: ${tags}
      configuration:
        fn::toJSON:
          Grouping:
            TableGroupingPolicy: CombineCompatibleSchemas
          CrawlerOutput:
            Partitions:
              AddOrUpdateBehavior: InheritFromTable
          Version: 1
      s3Targets:
        - path: s3://${dataLakeBucket.bucket}
Create Crawler Resource
Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.
Constructor syntax
new Crawler(name: string, args: CrawlerArgs, opts?: CustomResourceOptions);@overload
def Crawler(resource_name: str,
            args: CrawlerArgs,
            opts: Optional[ResourceOptions] = None)
@overload
def Crawler(resource_name: str,
            opts: Optional[ResourceOptions] = None,
            database_name: Optional[str] = None,
            role: Optional[str] = None,
            description: Optional[str] = None,
            lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
            delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
            catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
            dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
            hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
            iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
            jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
            lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
            configuration: Optional[str] = None,
            mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
            name: Optional[str] = None,
            recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
            classifiers: Optional[Sequence[str]] = None,
            s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
            schedule: Optional[str] = None,
            schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
            security_configuration: Optional[str] = None,
            table_prefix: Optional[str] = None,
            tags: Optional[Mapping[str, str]] = None)func NewCrawler(ctx *Context, name string, args CrawlerArgs, opts ...ResourceOption) (*Crawler, error)public Crawler(string name, CrawlerArgs args, CustomResourceOptions? opts = null)
public Crawler(String name, CrawlerArgs args)
public Crawler(String name, CrawlerArgs args, CustomResourceOptions options)
type: aws:glue:Crawler
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.
Parameters
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- resource_name str
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts ResourceOptions
- Bag of options to control resource's behavior.
- ctx Context
- Context object for the current deployment.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts ResourceOption
- Bag of options to control resource's behavior.
- name string
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- opts CustomResourceOptions
- Bag of options to control resource's behavior.
- name String
- The unique name of the resource.
- args CrawlerArgs
- The arguments to resource properties.
- options CustomResourceOptions
- Bag of options to control resource's behavior.
Constructor example
The following reference example uses placeholder values for all input properties.
var crawlerResource = new Aws.Glue.Crawler("crawlerResource", new()
{
    DatabaseName = "string",
    Role = "string",
    Description = "string",
    LineageConfiguration = new Aws.Glue.Inputs.CrawlerLineageConfigurationArgs
    {
        CrawlerLineageSettings = "string",
    },
    DeltaTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerDeltaTargetArgs
        {
            DeltaTables = new[]
            {
                "string",
            },
            WriteManifest = false,
            ConnectionName = "string",
            CreateNativeDeltaTable = false,
        },
    },
    CatalogTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerCatalogTargetArgs
        {
            DatabaseName = "string",
            Tables = new[]
            {
                "string",
            },
            ConnectionName = "string",
            DlqEventQueueArn = "string",
            EventQueueArn = "string",
        },
    },
    DynamodbTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerDynamodbTargetArgs
        {
            Path = "string",
            ScanAll = false,
            ScanRate = 0,
        },
    },
    HudiTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerHudiTargetArgs
        {
            MaximumTraversalDepth = 0,
            Paths = new[]
            {
                "string",
            },
            ConnectionName = "string",
            Exclusions = new[]
            {
                "string",
            },
        },
    },
    IcebergTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerIcebergTargetArgs
        {
            MaximumTraversalDepth = 0,
            Paths = new[]
            {
                "string",
            },
            ConnectionName = "string",
            Exclusions = new[]
            {
                "string",
            },
        },
    },
    JdbcTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerJdbcTargetArgs
        {
            ConnectionName = "string",
            Path = "string",
            EnableAdditionalMetadatas = new[]
            {
                "string",
            },
            Exclusions = new[]
            {
                "string",
            },
        },
    },
    LakeFormationConfiguration = new Aws.Glue.Inputs.CrawlerLakeFormationConfigurationArgs
    {
        AccountId = "string",
        UseLakeFormationCredentials = false,
    },
    Configuration = "string",
    MongodbTargets = new[]
    {
        new Aws.Glue.Inputs.CrawlerMongodbTargetArgs
        {
            ConnectionName = "string",
            Path = "string",
            ScanAll = false,
        },
    },
    Name = "string",
    RecrawlPolicy = new Aws.Glue.Inputs.CrawlerRecrawlPolicyArgs
    {
        RecrawlBehavior = "string",
    },
    Classifiers = new[]
    {
        "string",
    },
    S3Targets = new[]
    {
        new Aws.Glue.Inputs.CrawlerS3TargetArgs
        {
            Path = "string",
            ConnectionName = "string",
            DlqEventQueueArn = "string",
            EventQueueArn = "string",
            Exclusions = new[]
            {
                "string",
            },
            SampleSize = 0,
        },
    },
    Schedule = "string",
    SchemaChangePolicy = new Aws.Glue.Inputs.CrawlerSchemaChangePolicyArgs
    {
        DeleteBehavior = "string",
        UpdateBehavior = "string",
    },
    SecurityConfiguration = "string",
    TablePrefix = "string",
    Tags = 
    {
        { "string", "string" },
    },
});
example, err := glue.NewCrawler(ctx, "crawlerResource", &glue.CrawlerArgs{
	DatabaseName: pulumi.String("string"),
	Role:         pulumi.String("string"),
	Description:  pulumi.String("string"),
	LineageConfiguration: &glue.CrawlerLineageConfigurationArgs{
		CrawlerLineageSettings: pulumi.String("string"),
	},
	DeltaTargets: glue.CrawlerDeltaTargetArray{
		&glue.CrawlerDeltaTargetArgs{
			DeltaTables: pulumi.StringArray{
				pulumi.String("string"),
			},
			WriteManifest:          pulumi.Bool(false),
			ConnectionName:         pulumi.String("string"),
			CreateNativeDeltaTable: pulumi.Bool(false),
		},
	},
	CatalogTargets: glue.CrawlerCatalogTargetArray{
		&glue.CrawlerCatalogTargetArgs{
			DatabaseName: pulumi.String("string"),
			Tables: pulumi.StringArray{
				pulumi.String("string"),
			},
			ConnectionName:   pulumi.String("string"),
			DlqEventQueueArn: pulumi.String("string"),
			EventQueueArn:    pulumi.String("string"),
		},
	},
	DynamodbTargets: glue.CrawlerDynamodbTargetArray{
		&glue.CrawlerDynamodbTargetArgs{
			Path:     pulumi.String("string"),
			ScanAll:  pulumi.Bool(false),
			ScanRate: pulumi.Float64(0),
		},
	},
	HudiTargets: glue.CrawlerHudiTargetArray{
		&glue.CrawlerHudiTargetArgs{
			MaximumTraversalDepth: pulumi.Int(0),
			Paths: pulumi.StringArray{
				pulumi.String("string"),
			},
			ConnectionName: pulumi.String("string"),
			Exclusions: pulumi.StringArray{
				pulumi.String("string"),
			},
		},
	},
	IcebergTargets: glue.CrawlerIcebergTargetArray{
		&glue.CrawlerIcebergTargetArgs{
			MaximumTraversalDepth: pulumi.Int(0),
			Paths: pulumi.StringArray{
				pulumi.String("string"),
			},
			ConnectionName: pulumi.String("string"),
			Exclusions: pulumi.StringArray{
				pulumi.String("string"),
			},
		},
	},
	JdbcTargets: glue.CrawlerJdbcTargetArray{
		&glue.CrawlerJdbcTargetArgs{
			ConnectionName: pulumi.String("string"),
			Path:           pulumi.String("string"),
			EnableAdditionalMetadatas: pulumi.StringArray{
				pulumi.String("string"),
			},
			Exclusions: pulumi.StringArray{
				pulumi.String("string"),
			},
		},
	},
	LakeFormationConfiguration: &glue.CrawlerLakeFormationConfigurationArgs{
		AccountId:                   pulumi.String("string"),
		UseLakeFormationCredentials: pulumi.Bool(false),
	},
	Configuration: pulumi.String("string"),
	MongodbTargets: glue.CrawlerMongodbTargetArray{
		&glue.CrawlerMongodbTargetArgs{
			ConnectionName: pulumi.String("string"),
			Path:           pulumi.String("string"),
			ScanAll:        pulumi.Bool(false),
		},
	},
	Name: pulumi.String("string"),
	RecrawlPolicy: &glue.CrawlerRecrawlPolicyArgs{
		RecrawlBehavior: pulumi.String("string"),
	},
	Classifiers: pulumi.StringArray{
		pulumi.String("string"),
	},
	S3Targets: glue.CrawlerS3TargetArray{
		&glue.CrawlerS3TargetArgs{
			Path:             pulumi.String("string"),
			ConnectionName:   pulumi.String("string"),
			DlqEventQueueArn: pulumi.String("string"),
			EventQueueArn:    pulumi.String("string"),
			Exclusions: pulumi.StringArray{
				pulumi.String("string"),
			},
			SampleSize: pulumi.Int(0),
		},
	},
	Schedule: pulumi.String("string"),
	SchemaChangePolicy: &glue.CrawlerSchemaChangePolicyArgs{
		DeleteBehavior: pulumi.String("string"),
		UpdateBehavior: pulumi.String("string"),
	},
	SecurityConfiguration: pulumi.String("string"),
	TablePrefix:           pulumi.String("string"),
	Tags: pulumi.StringMap{
		"string": pulumi.String("string"),
	},
})
var crawlerResource = new Crawler("crawlerResource", CrawlerArgs.builder()
    .databaseName("string")
    .role("string")
    .description("string")
    .lineageConfiguration(CrawlerLineageConfigurationArgs.builder()
        .crawlerLineageSettings("string")
        .build())
    .deltaTargets(CrawlerDeltaTargetArgs.builder()
        .deltaTables("string")
        .writeManifest(false)
        .connectionName("string")
        .createNativeDeltaTable(false)
        .build())
    .catalogTargets(CrawlerCatalogTargetArgs.builder()
        .databaseName("string")
        .tables("string")
        .connectionName("string")
        .dlqEventQueueArn("string")
        .eventQueueArn("string")
        .build())
    .dynamodbTargets(CrawlerDynamodbTargetArgs.builder()
        .path("string")
        .scanAll(false)
        .scanRate(0)
        .build())
    .hudiTargets(CrawlerHudiTargetArgs.builder()
        .maximumTraversalDepth(0)
        .paths("string")
        .connectionName("string")
        .exclusions("string")
        .build())
    .icebergTargets(CrawlerIcebergTargetArgs.builder()
        .maximumTraversalDepth(0)
        .paths("string")
        .connectionName("string")
        .exclusions("string")
        .build())
    .jdbcTargets(CrawlerJdbcTargetArgs.builder()
        .connectionName("string")
        .path("string")
        .enableAdditionalMetadatas("string")
        .exclusions("string")
        .build())
    .lakeFormationConfiguration(CrawlerLakeFormationConfigurationArgs.builder()
        .accountId("string")
        .useLakeFormationCredentials(false)
        .build())
    .configuration("string")
    .mongodbTargets(CrawlerMongodbTargetArgs.builder()
        .connectionName("string")
        .path("string")
        .scanAll(false)
        .build())
    .name("string")
    .recrawlPolicy(CrawlerRecrawlPolicyArgs.builder()
        .recrawlBehavior("string")
        .build())
    .classifiers("string")
    .s3Targets(CrawlerS3TargetArgs.builder()
        .path("string")
        .connectionName("string")
        .dlqEventQueueArn("string")
        .eventQueueArn("string")
        .exclusions("string")
        .sampleSize(0)
        .build())
    .schedule("string")
    .schemaChangePolicy(CrawlerSchemaChangePolicyArgs.builder()
        .deleteBehavior("string")
        .updateBehavior("string")
        .build())
    .securityConfiguration("string")
    .tablePrefix("string")
    .tags(Map.of("string", "string"))
    .build());
crawler_resource = aws.glue.Crawler("crawlerResource",
    database_name="string",
    role="string",
    description="string",
    lineage_configuration={
        "crawler_lineage_settings": "string",
    },
    delta_targets=[{
        "delta_tables": ["string"],
        "write_manifest": False,
        "connection_name": "string",
        "create_native_delta_table": False,
    }],
    catalog_targets=[{
        "database_name": "string",
        "tables": ["string"],
        "connection_name": "string",
        "dlq_event_queue_arn": "string",
        "event_queue_arn": "string",
    }],
    dynamodb_targets=[{
        "path": "string",
        "scan_all": False,
        "scan_rate": 0,
    }],
    hudi_targets=[{
        "maximum_traversal_depth": 0,
        "paths": ["string"],
        "connection_name": "string",
        "exclusions": ["string"],
    }],
    iceberg_targets=[{
        "maximum_traversal_depth": 0,
        "paths": ["string"],
        "connection_name": "string",
        "exclusions": ["string"],
    }],
    jdbc_targets=[{
        "connection_name": "string",
        "path": "string",
        "enable_additional_metadatas": ["string"],
        "exclusions": ["string"],
    }],
    lake_formation_configuration={
        "account_id": "string",
        "use_lake_formation_credentials": False,
    },
    configuration="string",
    mongodb_targets=[{
        "connection_name": "string",
        "path": "string",
        "scan_all": False,
    }],
    name="string",
    recrawl_policy={
        "recrawl_behavior": "string",
    },
    classifiers=["string"],
    s3_targets=[{
        "path": "string",
        "connection_name": "string",
        "dlq_event_queue_arn": "string",
        "event_queue_arn": "string",
        "exclusions": ["string"],
        "sample_size": 0,
    }],
    schedule="string",
    schema_change_policy={
        "delete_behavior": "string",
        "update_behavior": "string",
    },
    security_configuration="string",
    table_prefix="string",
    tags={
        "string": "string",
    })
const crawlerResource = new aws.glue.Crawler("crawlerResource", {
    databaseName: "string",
    role: "string",
    description: "string",
    lineageConfiguration: {
        crawlerLineageSettings: "string",
    },
    deltaTargets: [{
        deltaTables: ["string"],
        writeManifest: false,
        connectionName: "string",
        createNativeDeltaTable: false,
    }],
    catalogTargets: [{
        databaseName: "string",
        tables: ["string"],
        connectionName: "string",
        dlqEventQueueArn: "string",
        eventQueueArn: "string",
    }],
    dynamodbTargets: [{
        path: "string",
        scanAll: false,
        scanRate: 0,
    }],
    hudiTargets: [{
        maximumTraversalDepth: 0,
        paths: ["string"],
        connectionName: "string",
        exclusions: ["string"],
    }],
    icebergTargets: [{
        maximumTraversalDepth: 0,
        paths: ["string"],
        connectionName: "string",
        exclusions: ["string"],
    }],
    jdbcTargets: [{
        connectionName: "string",
        path: "string",
        enableAdditionalMetadatas: ["string"],
        exclusions: ["string"],
    }],
    lakeFormationConfiguration: {
        accountId: "string",
        useLakeFormationCredentials: false,
    },
    configuration: "string",
    mongodbTargets: [{
        connectionName: "string",
        path: "string",
        scanAll: false,
    }],
    name: "string",
    recrawlPolicy: {
        recrawlBehavior: "string",
    },
    classifiers: ["string"],
    s3Targets: [{
        path: "string",
        connectionName: "string",
        dlqEventQueueArn: "string",
        eventQueueArn: "string",
        exclusions: ["string"],
        sampleSize: 0,
    }],
    schedule: "string",
    schemaChangePolicy: {
        deleteBehavior: "string",
        updateBehavior: "string",
    },
    securityConfiguration: "string",
    tablePrefix: "string",
    tags: {
        string: "string",
    },
});
type: aws:glue:Crawler
properties:
    catalogTargets:
        - connectionName: string
          databaseName: string
          dlqEventQueueArn: string
          eventQueueArn: string
          tables:
            - string
    classifiers:
        - string
    configuration: string
    databaseName: string
    deltaTargets:
        - connectionName: string
          createNativeDeltaTable: false
          deltaTables:
            - string
          writeManifest: false
    description: string
    dynamodbTargets:
        - path: string
          scanAll: false
          scanRate: 0
    hudiTargets:
        - connectionName: string
          exclusions:
            - string
          maximumTraversalDepth: 0
          paths:
            - string
    icebergTargets:
        - connectionName: string
          exclusions:
            - string
          maximumTraversalDepth: 0
          paths:
            - string
    jdbcTargets:
        - connectionName: string
          enableAdditionalMetadatas:
            - string
          exclusions:
            - string
          path: string
    lakeFormationConfiguration:
        accountId: string
        useLakeFormationCredentials: false
    lineageConfiguration:
        crawlerLineageSettings: string
    mongodbTargets:
        - connectionName: string
          path: string
          scanAll: false
    name: string
    recrawlPolicy:
        recrawlBehavior: string
    role: string
    s3Targets:
        - connectionName: string
          dlqEventQueueArn: string
          eventQueueArn: string
          exclusions:
            - string
          path: string
          sampleSize: 0
    schedule: string
    schemaChangePolicy:
        deleteBehavior: string
        updateBehavior: string
    securityConfiguration: string
    tablePrefix: string
    tags:
        string: string
Crawler Resource Properties
To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.
Inputs
In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.
The Crawler resource accepts the following input properties:
- DatabaseName string
- Glue database where results are written.
- Role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- CatalogTargets List<CrawlerCatalog Target> 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- Classifiers List<string>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- DeltaTargets List<CrawlerDelta Target> 
- List of nested Delta Lake target arguments. See Delta Target below.
- Description string
- Description of the crawler.
- DynamodbTargets List<CrawlerDynamodb Target> 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- HudiTargets List<CrawlerHudi Target> 
- List of nested Hudi target arguments. See Iceberg Target below.
- IcebergTargets List<CrawlerIceberg Target> 
- List of nested Iceberg target arguments. See Iceberg Target below.
- JdbcTargets List<CrawlerJdbc Target> 
- List of nested JDBC target arguments. See JDBC Target below.
- LakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- LineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- MongodbTargets List<CrawlerMongodb Target> 
- List of nested MongoDB target arguments. See MongoDB Target below.
- Name string
- Name of the crawler.
- RecrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- S3Targets
List<CrawlerS3Target> 
- List of nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- SchemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- SecurityConfiguration string
- The name of Security Configuration to be used by the crawler
- TablePrefix string
- The table prefix used for catalog tables that are created.
- Dictionary<string, string>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- DatabaseName string
- Glue database where results are written.
- Role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- CatalogTargets []CrawlerCatalog Target Args 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- Classifiers []string
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- DeltaTargets []CrawlerDelta Target Args 
- List of nested Delta Lake target arguments. See Delta Target below.
- Description string
- Description of the crawler.
- DynamodbTargets []CrawlerDynamodb Target Args 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- HudiTargets []CrawlerHudi Target Args 
- List of nested Hudi target arguments. See Iceberg Target below.
- IcebergTargets []CrawlerIceberg Target Args 
- List of nested Iceberg target arguments. See Iceberg Target below.
- JdbcTargets []CrawlerJdbc Target Args 
- List of nested JDBC target arguments. See JDBC Target below.
- LakeFormation CrawlerConfiguration Lake Formation Configuration Args 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- LineageConfiguration CrawlerLineage Configuration Args 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- MongodbTargets []CrawlerMongodb Target Args 
- List of nested MongoDB target arguments. See MongoDB Target below.
- Name string
- Name of the crawler.
- RecrawlPolicy CrawlerRecrawl Policy Args 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- S3Targets
[]CrawlerS3Target Args 
- List of nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- SchemaChange CrawlerPolicy Schema Change Policy Args 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- SecurityConfiguration string
- The name of Security Configuration to be used by the crawler
- TablePrefix string
- The table prefix used for catalog tables that are created.
- map[string]string
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- databaseName String
- Glue database where results are written.
- role String
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalogTargets List<CrawlerCatalog Target> 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers List<String>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- deltaTargets List<CrawlerDelta Target> 
- List of nested Delta Lake target arguments. See Delta Target below.
- description String
- Description of the crawler.
- dynamodbTargets List<CrawlerDynamodb Target> 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets List<CrawlerHudi Target> 
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets List<CrawlerIceberg Target> 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets List<CrawlerJdbc Target> 
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets List<CrawlerMongodb Target> 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name String
- Name of the crawler.
- recrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets
List<CrawlerS3Target> 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule String
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration String
- The name of Security Configuration to be used by the crawler
- tablePrefix String
- The table prefix used for catalog tables that are created.
- Map<String,String>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- databaseName string
- Glue database where results are written.
- role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalogTargets CrawlerCatalog Target[] 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers string[]
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- deltaTargets CrawlerDelta Target[] 
- List of nested Delta Lake target arguments. See Delta Target below.
- description string
- Description of the crawler.
- dynamodbTargets CrawlerDynamodb Target[] 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets CrawlerHudi Target[] 
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets CrawlerIceberg Target[] 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets CrawlerJdbc Target[] 
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets CrawlerMongodb Target[] 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name string
- Name of the crawler.
- recrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets
CrawlerS3Target[] 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration string
- The name of Security Configuration to be used by the crawler
- tablePrefix string
- The table prefix used for catalog tables that are created.
- {[key: string]: string}
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- database_name str
- Glue database where results are written.
- role str
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalog_targets Sequence[CrawlerCatalog Target Args] 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers Sequence[str]
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- delta_targets Sequence[CrawlerDelta Target Args] 
- List of nested Delta Lake target arguments. See Delta Target below.
- description str
- Description of the crawler.
- dynamodb_targets Sequence[CrawlerDynamodb Target Args] 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi_targets Sequence[CrawlerHudi Target Args] 
- List of nested Hudi target arguments. See Iceberg Target below.
- iceberg_targets Sequence[CrawlerIceberg Target Args] 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbc_targets Sequence[CrawlerJdbc Target Args] 
- List of nested JDBC target arguments. See JDBC Target below.
- lake_formation_ Crawlerconfiguration Lake Formation Configuration Args 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage_configuration CrawlerLineage Configuration Args 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb_targets Sequence[CrawlerMongodb Target Args] 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name str
- Name of the crawler.
- recrawl_policy CrawlerRecrawl Policy Args 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3_targets Sequence[CrawlerS3Target Args] 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule str
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schema_change_ Crawlerpolicy Schema Change Policy Args 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security_configuration str
- The name of Security Configuration to be used by the crawler
- table_prefix str
- The table prefix used for catalog tables that are created.
- Mapping[str, str]
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- databaseName String
- Glue database where results are written.
- role String
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- catalogTargets List<Property Map>
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers List<String>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- deltaTargets List<Property Map>
- List of nested Delta Lake target arguments. See Delta Target below.
- description String
- Description of the crawler.
- dynamodbTargets List<Property Map>
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets List<Property Map>
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets List<Property Map>
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets List<Property Map>
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation Property MapConfiguration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration Property Map
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets List<Property Map>
- List of nested MongoDB target arguments. See MongoDB Target below.
- name String
- Name of the crawler.
- recrawlPolicy Property Map
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- s3Targets List<Property Map>
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule String
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange Property MapPolicy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration String
- The name of Security Configuration to be used by the crawler
- tablePrefix String
- The table prefix used for catalog tables that are created.
- Map<String>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
Outputs
All input properties are implicitly available as output properties. Additionally, the Crawler resource produces the following output properties:
Look up Existing Crawler Resource
Get an existing Crawler resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.
public static get(name: string, id: Input<ID>, state?: CrawlerState, opts?: CustomResourceOptions): Crawler@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        arn: Optional[str] = None,
        catalog_targets: Optional[Sequence[CrawlerCatalogTargetArgs]] = None,
        classifiers: Optional[Sequence[str]] = None,
        configuration: Optional[str] = None,
        database_name: Optional[str] = None,
        delta_targets: Optional[Sequence[CrawlerDeltaTargetArgs]] = None,
        description: Optional[str] = None,
        dynamodb_targets: Optional[Sequence[CrawlerDynamodbTargetArgs]] = None,
        hudi_targets: Optional[Sequence[CrawlerHudiTargetArgs]] = None,
        iceberg_targets: Optional[Sequence[CrawlerIcebergTargetArgs]] = None,
        jdbc_targets: Optional[Sequence[CrawlerJdbcTargetArgs]] = None,
        lake_formation_configuration: Optional[CrawlerLakeFormationConfigurationArgs] = None,
        lineage_configuration: Optional[CrawlerLineageConfigurationArgs] = None,
        mongodb_targets: Optional[Sequence[CrawlerMongodbTargetArgs]] = None,
        name: Optional[str] = None,
        recrawl_policy: Optional[CrawlerRecrawlPolicyArgs] = None,
        role: Optional[str] = None,
        s3_targets: Optional[Sequence[CrawlerS3TargetArgs]] = None,
        schedule: Optional[str] = None,
        schema_change_policy: Optional[CrawlerSchemaChangePolicyArgs] = None,
        security_configuration: Optional[str] = None,
        table_prefix: Optional[str] = None,
        tags: Optional[Mapping[str, str]] = None,
        tags_all: Optional[Mapping[str, str]] = None) -> Crawlerfunc GetCrawler(ctx *Context, name string, id IDInput, state *CrawlerState, opts ...ResourceOption) (*Crawler, error)public static Crawler Get(string name, Input<string> id, CrawlerState? state, CustomResourceOptions? opts = null)public static Crawler get(String name, Output<String> id, CrawlerState state, CustomResourceOptions options)resources:  _:    type: aws:glue:Crawler    get:      id: ${id}- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- resource_name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- name
- The unique name of the resulting resource.
- id
- The unique provider ID of the resource to lookup.
- state
- Any extra arguments used during the lookup.
- opts
- A bag of options that control this resource's behavior.
- Arn string
- The ARN of the crawler
- CatalogTargets List<CrawlerCatalog Target> 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- Classifiers List<string>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- DatabaseName string
- Glue database where results are written.
- DeltaTargets List<CrawlerDelta Target> 
- List of nested Delta Lake target arguments. See Delta Target below.
- Description string
- Description of the crawler.
- DynamodbTargets List<CrawlerDynamodb Target> 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- HudiTargets List<CrawlerHudi Target> 
- List of nested Hudi target arguments. See Iceberg Target below.
- IcebergTargets List<CrawlerIceberg Target> 
- List of nested Iceberg target arguments. See Iceberg Target below.
- JdbcTargets List<CrawlerJdbc Target> 
- List of nested JDBC target arguments. See JDBC Target below.
- LakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- LineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- MongodbTargets List<CrawlerMongodb Target> 
- List of nested MongoDB target arguments. See MongoDB Target below.
- Name string
- Name of the crawler.
- RecrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- Role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
List<CrawlerS3Target> 
- List of nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- SchemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- SecurityConfiguration string
- The name of Security Configuration to be used by the crawler
- TablePrefix string
- The table prefix used for catalog tables that are created.
- Dictionary<string, string>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- Dictionary<string, string>
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
- Arn string
- The ARN of the crawler
- CatalogTargets []CrawlerCatalog Target Args 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- Classifiers []string
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- Configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- DatabaseName string
- Glue database where results are written.
- DeltaTargets []CrawlerDelta Target Args 
- List of nested Delta Lake target arguments. See Delta Target below.
- Description string
- Description of the crawler.
- DynamodbTargets []CrawlerDynamodb Target Args 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- HudiTargets []CrawlerHudi Target Args 
- List of nested Hudi target arguments. See Iceberg Target below.
- IcebergTargets []CrawlerIceberg Target Args 
- List of nested Iceberg target arguments. See Iceberg Target below.
- JdbcTargets []CrawlerJdbc Target Args 
- List of nested JDBC target arguments. See JDBC Target below.
- LakeFormation CrawlerConfiguration Lake Formation Configuration Args 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- LineageConfiguration CrawlerLineage Configuration Args 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- MongodbTargets []CrawlerMongodb Target Args 
- List of nested MongoDB target arguments. See MongoDB Target below.
- Name string
- Name of the crawler.
- RecrawlPolicy CrawlerRecrawl Policy Args 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- Role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- S3Targets
[]CrawlerS3Target Args 
- List of nested Amazon S3 target arguments. See S3 Target below.
- Schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- SchemaChange CrawlerPolicy Schema Change Policy Args 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- SecurityConfiguration string
- The name of Security Configuration to be used by the crawler
- TablePrefix string
- The table prefix used for catalog tables that are created.
- map[string]string
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- map[string]string
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
- arn String
- The ARN of the crawler
- catalogTargets List<CrawlerCatalog Target> 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers List<String>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- databaseName String
- Glue database where results are written.
- deltaTargets List<CrawlerDelta Target> 
- List of nested Delta Lake target arguments. See Delta Target below.
- description String
- Description of the crawler.
- dynamodbTargets List<CrawlerDynamodb Target> 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets List<CrawlerHudi Target> 
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets List<CrawlerIceberg Target> 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets List<CrawlerJdbc Target> 
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets List<CrawlerMongodb Target> 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name String
- Name of the crawler.
- recrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role String
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets
List<CrawlerS3Target> 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule String
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration String
- The name of Security Configuration to be used by the crawler
- tablePrefix String
- The table prefix used for catalog tables that are created.
- Map<String,String>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- Map<String,String>
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
- arn string
- The ARN of the crawler
- catalogTargets CrawlerCatalog Target[] 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers string[]
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration string
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- databaseName string
- Glue database where results are written.
- deltaTargets CrawlerDelta Target[] 
- List of nested Delta Lake target arguments. See Delta Target below.
- description string
- Description of the crawler.
- dynamodbTargets CrawlerDynamodb Target[] 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets CrawlerHudi Target[] 
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets CrawlerIceberg Target[] 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets CrawlerJdbc Target[] 
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation CrawlerConfiguration Lake Formation Configuration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration CrawlerLineage Configuration 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets CrawlerMongodb Target[] 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name string
- Name of the crawler.
- recrawlPolicy CrawlerRecrawl Policy 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role string
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets
CrawlerS3Target[] 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule string
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange CrawlerPolicy Schema Change Policy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration string
- The name of Security Configuration to be used by the crawler
- tablePrefix string
- The table prefix used for catalog tables that are created.
- {[key: string]: string}
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- {[key: string]: string}
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
- arn str
- The ARN of the crawler
- catalog_targets Sequence[CrawlerCatalog Target Args] 
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers Sequence[str]
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration str
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- database_name str
- Glue database where results are written.
- delta_targets Sequence[CrawlerDelta Target Args] 
- List of nested Delta Lake target arguments. See Delta Target below.
- description str
- Description of the crawler.
- dynamodb_targets Sequence[CrawlerDynamodb Target Args] 
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudi_targets Sequence[CrawlerHudi Target Args] 
- List of nested Hudi target arguments. See Iceberg Target below.
- iceberg_targets Sequence[CrawlerIceberg Target Args] 
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbc_targets Sequence[CrawlerJdbc Target Args] 
- List of nested JDBC target arguments. See JDBC Target below.
- lake_formation_ Crawlerconfiguration Lake Formation Configuration Args 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineage_configuration CrawlerLineage Configuration Args 
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodb_targets Sequence[CrawlerMongodb Target Args] 
- List of nested MongoDB target arguments. See MongoDB Target below.
- name str
- Name of the crawler.
- recrawl_policy CrawlerRecrawl Policy Args 
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role str
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3_targets Sequence[CrawlerS3Target Args] 
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule str
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schema_change_ Crawlerpolicy Schema Change Policy Args 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- security_configuration str
- The name of Security Configuration to be used by the crawler
- table_prefix str
- The table prefix used for catalog tables that are created.
- Mapping[str, str]
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- Mapping[str, str]
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
- arn String
- The ARN of the crawler
- catalogTargets List<Property Map>
- List of nested AWS Glue Data Catalog target arguments. See Catalog Target below.
- classifiers List<String>
- List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
- configuration String
- JSON string of configuration information. For more details see Setting Crawler Configuration Options.
- databaseName String
- Glue database where results are written.
- deltaTargets List<Property Map>
- List of nested Delta Lake target arguments. See Delta Target below.
- description String
- Description of the crawler.
- dynamodbTargets List<Property Map>
- List of nested DynamoDB target arguments. See Dynamodb Target below.
- hudiTargets List<Property Map>
- List of nested Hudi target arguments. See Iceberg Target below.
- icebergTargets List<Property Map>
- List of nested Iceberg target arguments. See Iceberg Target below.
- jdbcTargets List<Property Map>
- List of nested JDBC target arguments. See JDBC Target below.
- lakeFormation Property MapConfiguration 
- Specifies Lake Formation configuration settings for the crawler. See Lake Formation Configuration below.
- lineageConfiguration Property Map
- Specifies data lineage configuration settings for the crawler. See Lineage Configuration below.
- mongodbTargets List<Property Map>
- List of nested MongoDB target arguments. See MongoDB Target below.
- name String
- Name of the crawler.
- recrawlPolicy Property Map
- A policy that specifies whether to crawl the entire dataset again, or to crawl only folders that were added since the last crawler run.. See Recrawl Policy below.
- role String
- The IAM role friendly name (including path without leading slash), or ARN of an IAM role, used by the crawler to access other resources.
- s3Targets List<Property Map>
- List of nested Amazon S3 target arguments. See S3 Target below.
- schedule String
- A cron expression used to specify the schedule. For more information, see Time-Based Schedules for Jobs and Crawlers. For example, to run something every day at 12:15 UTC, you would specify: cron(15 12 * * ? *).
- schemaChange Property MapPolicy 
- Policy for the crawler's update and deletion behavior. See Schema Change Policy below.
- securityConfiguration String
- The name of Security Configuration to be used by the crawler
- tablePrefix String
- The table prefix used for catalog tables that are created.
- Map<String>
- Key-value map of resource tags. .If configured with a provider default_tagsconfiguration block present, tags with matching keys will overwrite those defined at the provider-level.
- Map<String>
- A map of tags assigned to the resource, including those inherited from the provider default_tagsconfiguration block.
Supporting Types
CrawlerCatalogTarget, CrawlerCatalogTargetArgs      
- DatabaseName string
- The name of the Glue database to be synchronized.
- Tables List<string>
- A list of catalog tables to be synchronized.
- ConnectionName string
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- DlqEvent stringQueue Arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- EventQueue stringArn 
- A valid Amazon SQS ARN.
- DatabaseName string
- The name of the Glue database to be synchronized.
- Tables []string
- A list of catalog tables to be synchronized.
- ConnectionName string
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- DlqEvent stringQueue Arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- EventQueue stringArn 
- A valid Amazon SQS ARN.
- databaseName String
- The name of the Glue database to be synchronized.
- tables List<String>
- A list of catalog tables to be synchronized.
- connectionName String
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- dlqEvent StringQueue Arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- eventQueue StringArn 
- A valid Amazon SQS ARN.
- databaseName string
- The name of the Glue database to be synchronized.
- tables string[]
- A list of catalog tables to be synchronized.
- connectionName string
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- dlqEvent stringQueue Arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- eventQueue stringArn 
- A valid Amazon SQS ARN.
- database_name str
- The name of the Glue database to be synchronized.
- tables Sequence[str]
- A list of catalog tables to be synchronized.
- connection_name str
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- dlq_event_ strqueue_ arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- event_queue_ strarn 
- A valid Amazon SQS ARN.
- databaseName String
- The name of the Glue database to be synchronized.
- tables List<String>
- A list of catalog tables to be synchronized.
- connectionName String
- The name of the connection for an Amazon S3-backed Data Catalog table to be a target of the crawl when using a Catalog connection type paired with a NETWORKConnection type.
- dlqEvent StringQueue Arn 
- A valid Amazon SQS ARN. - Note: - deletion_behaviorof catalog target doesn't support- DEPRECATE_IN_DATABASE.- Note: - configurationfor catalog target crawlers will have- { ... "Grouping": { "TableGroupingPolicy": "CombineCompatibleSchemas"} }by default.
- eventQueue StringArn 
- A valid Amazon SQS ARN.
CrawlerDeltaTarget, CrawlerDeltaTargetArgs      
- DeltaTables List<string>
- A list of the Amazon S3 paths to the Delta tables.
- WriteManifest bool
- Specifies whether to write the manifest files to the Delta table path.
- ConnectionName string
- The name of the connection to use to connect to the Delta table target.
- CreateNative boolDelta Table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- DeltaTables []string
- A list of the Amazon S3 paths to the Delta tables.
- WriteManifest bool
- Specifies whether to write the manifest files to the Delta table path.
- ConnectionName string
- The name of the connection to use to connect to the Delta table target.
- CreateNative boolDelta Table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- deltaTables List<String>
- A list of the Amazon S3 paths to the Delta tables.
- writeManifest Boolean
- Specifies whether to write the manifest files to the Delta table path.
- connectionName String
- The name of the connection to use to connect to the Delta table target.
- createNative BooleanDelta Table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- deltaTables string[]
- A list of the Amazon S3 paths to the Delta tables.
- writeManifest boolean
- Specifies whether to write the manifest files to the Delta table path.
- connectionName string
- The name of the connection to use to connect to the Delta table target.
- createNative booleanDelta Table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- delta_tables Sequence[str]
- A list of the Amazon S3 paths to the Delta tables.
- write_manifest bool
- Specifies whether to write the manifest files to the Delta table path.
- connection_name str
- The name of the connection to use to connect to the Delta table target.
- create_native_ booldelta_ table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
- deltaTables List<String>
- A list of the Amazon S3 paths to the Delta tables.
- writeManifest Boolean
- Specifies whether to write the manifest files to the Delta table path.
- connectionName String
- The name of the connection to use to connect to the Delta table target.
- createNative BooleanDelta Table 
- Specifies whether the crawler will create native tables, to allow integration with query engines that support querying of the Delta transaction log directly.
CrawlerDynamodbTarget, CrawlerDynamodbTargetArgs      
- Path string
- The name of the DynamoDB table to crawl.
- ScanAll bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- ScanRate double
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- Path string
- The name of the DynamoDB table to crawl.
- ScanAll bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- ScanRate float64
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path String
- The name of the DynamoDB table to crawl.
- scanAll Boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- scanRate Double
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path string
- The name of the DynamoDB table to crawl.
- scanAll boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- scanRate number
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path str
- The name of the DynamoDB table to crawl.
- scan_all bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- scan_rate float
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
- path String
- The name of the DynamoDB table to crawl.
- scanAll Boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. defaults to true.
- scanRate Number
- The percentage of the configured read capacity units to use by the AWS Glue crawler. The valid values are null or a value between 0.1 to 1.5.
CrawlerHudiTarget, CrawlerHudiTargetArgs      
- MaximumTraversal intDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- Paths List<string>
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- ConnectionName string
- The name of the connection to use to connect to the Hudi target.
- Exclusions List<string>
- A list of glob patterns used to exclude from the crawl.
- MaximumTraversal intDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- Paths []string
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- ConnectionName string
- The name of the connection to use to connect to the Hudi target.
- Exclusions []string
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal IntegerDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths List<String>
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connectionName String
- The name of the connection to use to connect to the Hudi target.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal numberDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths string[]
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connectionName string
- The name of the connection to use to connect to the Hudi target.
- exclusions string[]
- A list of glob patterns used to exclude from the crawl.
- maximum_traversal_ intdepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths Sequence[str]
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connection_name str
- The name of the connection to use to connect to the Hudi target.
- exclusions Sequence[str]
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal NumberDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Hudi metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths List<String>
- One or more Amazon S3 paths that contains Hudi metadata folders as s3://bucket/prefix.
- connectionName String
- The name of the connection to use to connect to the Hudi target.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
CrawlerIcebergTarget, CrawlerIcebergTargetArgs      
- MaximumTraversal intDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- Paths List<string>
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- ConnectionName string
- The name of the connection to use to connect to the Iceberg target.
- Exclusions List<string>
- A list of glob patterns used to exclude from the crawl.
- MaximumTraversal intDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- Paths []string
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- ConnectionName string
- The name of the connection to use to connect to the Iceberg target.
- Exclusions []string
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal IntegerDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths List<String>
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connectionName String
- The name of the connection to use to connect to the Iceberg target.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal numberDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths string[]
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connectionName string
- The name of the connection to use to connect to the Iceberg target.
- exclusions string[]
- A list of glob patterns used to exclude from the crawl.
- maximum_traversal_ intdepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths Sequence[str]
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connection_name str
- The name of the connection to use to connect to the Iceberg target.
- exclusions Sequence[str]
- A list of glob patterns used to exclude from the crawl.
- maximumTraversal NumberDepth 
- The maximum depth of Amazon S3 paths that the crawler can traverse to discover the Iceberg metadata folder in your Amazon S3 path. Used to limit the crawler run time. Valid values are between 1and20.
- paths List<String>
- One or more Amazon S3 paths that contains Iceberg metadata folders as s3://bucket/prefix.
- connectionName String
- The name of the connection to use to connect to the Iceberg target.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
CrawlerJdbcTarget, CrawlerJdbcTargetArgs      
- ConnectionName string
- The name of the connection to use to connect to the JDBC target.
- Path string
- The path of the JDBC target.
- EnableAdditional List<string>Metadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- Exclusions List<string>
- A list of glob patterns used to exclude from the crawl.
- ConnectionName string
- The name of the connection to use to connect to the JDBC target.
- Path string
- The path of the JDBC target.
- EnableAdditional []stringMetadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- Exclusions []string
- A list of glob patterns used to exclude from the crawl.
- connectionName String
- The name of the connection to use to connect to the JDBC target.
- path String
- The path of the JDBC target.
- enableAdditional List<String>Metadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
- connectionName string
- The name of the connection to use to connect to the JDBC target.
- path string
- The path of the JDBC target.
- enableAdditional string[]Metadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- exclusions string[]
- A list of glob patterns used to exclude from the crawl.
- connection_name str
- The name of the connection to use to connect to the JDBC target.
- path str
- The path of the JDBC target.
- enable_additional_ Sequence[str]metadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- exclusions Sequence[str]
- A list of glob patterns used to exclude from the crawl.
- connectionName String
- The name of the connection to use to connect to the JDBC target.
- path String
- The path of the JDBC target.
- enableAdditional List<String>Metadatas 
- Specify a value of RAWTYPESorCOMMENTSto enable additional metadata intable responses.RAWTYPESprovides the native-level datatype.COMMENTSprovides comments associated with a column or table in the database.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
CrawlerLakeFormationConfiguration, CrawlerLakeFormationConfigurationArgs        
- AccountId string
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- UseLake boolFormation Credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- AccountId string
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- UseLake boolFormation Credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- accountId String
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- useLake BooleanFormation Credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- accountId string
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- useLake booleanFormation Credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- account_id str
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- use_lake_ boolformation_ credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
- accountId String
- Required for cross account crawls. For same account crawls as the target data, this can omitted.
- useLake BooleanFormation Credentials 
- Specifies whether to use Lake Formation credentials for the crawler instead of the IAM role credentials.
CrawlerLineageConfiguration, CrawlerLineageConfigurationArgs      
- CrawlerLineage stringSettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
- CrawlerLineage stringSettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
- crawlerLineage StringSettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
- crawlerLineage stringSettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
- crawler_lineage_ strsettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
- crawlerLineage StringSettings 
- Specifies whether data lineage is enabled for the crawler. Valid values are: ENABLEandDISABLE. Default value isDISABLE.
CrawlerMongodbTarget, CrawlerMongodbTargetArgs      
- ConnectionName string
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- Path string
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- ScanAll bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
- ConnectionName string
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- Path string
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- ScanAll bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
- connectionName String
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path String
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scanAll Boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
- connectionName string
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path string
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scanAll boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
- connection_name str
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path str
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scan_all bool
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
- connectionName String
- The name of the connection to use to connect to the Amazon DocumentDB or MongoDB target.
- path String
- The path of the Amazon DocumentDB or MongoDB target (database/collection).
- scanAll Boolean
- Indicates whether to scan all the records, or to sample rows from the table. Scanning all the records can take a long time when the table is not a high throughput table. Default value is true.
CrawlerRecrawlPolicy, CrawlerRecrawlPolicyArgs      
- RecrawlBehavior string
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
- RecrawlBehavior string
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
- recrawlBehavior String
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
- recrawlBehavior string
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
- recrawl_behavior str
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
- recrawlBehavior String
- Specifies whether to crawl the entire dataset again, crawl only folders that were added since the last crawler run, or crawl what S3 notifies the crawler of via SQS. Valid Values are: CRAWL_EVENT_MODE,CRAWL_EVERYTHINGandCRAWL_NEW_FOLDERS_ONLY. Default value isCRAWL_EVERYTHING.
CrawlerS3Target, CrawlerS3TargetArgs    
- Path string
- The path to the Amazon S3 target.
- ConnectionName string
- The name of a connection which allows crawler to access data in S3 within a VPC.
- DlqEvent stringQueue Arn 
- The ARN of the dead-letter SQS queue.
- EventQueue stringArn 
- The ARN of the SQS queue to receive S3 notifications from.
- Exclusions List<string>
- A list of glob patterns used to exclude from the crawl.
- SampleSize int
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- Path string
- The path to the Amazon S3 target.
- ConnectionName string
- The name of a connection which allows crawler to access data in S3 within a VPC.
- DlqEvent stringQueue Arn 
- The ARN of the dead-letter SQS queue.
- EventQueue stringArn 
- The ARN of the SQS queue to receive S3 notifications from.
- Exclusions []string
- A list of glob patterns used to exclude from the crawl.
- SampleSize int
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path String
- The path to the Amazon S3 target.
- connectionName String
- The name of a connection which allows crawler to access data in S3 within a VPC.
- dlqEvent StringQueue Arn 
- The ARN of the dead-letter SQS queue.
- eventQueue StringArn 
- The ARN of the SQS queue to receive S3 notifications from.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
- sampleSize Integer
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path string
- The path to the Amazon S3 target.
- connectionName string
- The name of a connection which allows crawler to access data in S3 within a VPC.
- dlqEvent stringQueue Arn 
- The ARN of the dead-letter SQS queue.
- eventQueue stringArn 
- The ARN of the SQS queue to receive S3 notifications from.
- exclusions string[]
- A list of glob patterns used to exclude from the crawl.
- sampleSize number
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path str
- The path to the Amazon S3 target.
- connection_name str
- The name of a connection which allows crawler to access data in S3 within a VPC.
- dlq_event_ strqueue_ arn 
- The ARN of the dead-letter SQS queue.
- event_queue_ strarn 
- The ARN of the SQS queue to receive S3 notifications from.
- exclusions Sequence[str]
- A list of glob patterns used to exclude from the crawl.
- sample_size int
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
- path String
- The path to the Amazon S3 target.
- connectionName String
- The name of a connection which allows crawler to access data in S3 within a VPC.
- dlqEvent StringQueue Arn 
- The ARN of the dead-letter SQS queue.
- eventQueue StringArn 
- The ARN of the SQS queue to receive S3 notifications from.
- exclusions List<String>
- A list of glob patterns used to exclude from the crawl.
- sampleSize Number
- Sets the number of files in each leaf folder to be crawled when crawling sample files in a dataset. If not set, all the files are crawled. A valid value is an integer between 1 and 249.
CrawlerSchemaChangePolicy, CrawlerSchemaChangePolicyArgs        
- DeleteBehavior string
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- UpdateBehavior string
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- DeleteBehavior string
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- UpdateBehavior string
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- deleteBehavior String
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- updateBehavior String
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- deleteBehavior string
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- updateBehavior string
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- delete_behavior str
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- update_behavior str
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
- deleteBehavior String
- The deletion behavior when the crawler finds a deleted object. Valid values: LOG,DELETE_FROM_DATABASE, orDEPRECATE_IN_DATABASE. Defaults toDEPRECATE_IN_DATABASE.
- updateBehavior String
- The update behavior when the crawler finds a changed schema. Valid values: LOGorUPDATE_IN_DATABASE. Defaults toUPDATE_IN_DATABASE.
Import
Using pulumi import, import Glue Crawlers using name. For example:
$ pulumi import aws:glue/crawler:Crawler MyJob MyJob
To learn more about importing existing cloud resources, see Importing resources.
Package Details
- Repository
- AWS Classic pulumi/pulumi-aws
- License
- Apache-2.0
- Notes
- This Pulumi package is based on the awsTerraform Provider.