Docker Image

hive-server:    image: ljiang/hive:2.3.2-postgresql-metastore-kafka-zookeeper-atlas-hive-hook    env_file:      - ./hadoop-hive.env
docker-compose -f docker-compose-local.yml up -d

Observe Kafka Message

docker-compose exec hive-server bash
  1. start the Zookeeper service
  2. Start the Kafka service
cd ~/kafka_2.11-2.3.0/bin/zookeeper-server-start.sh -daemon config/zookeeper.propertiesbin/kafka-server-start.sh  config/server.properties
bin/kafka-topics.sh --list --zookeeper localhost:2181
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ATLAS_ENTITIESbin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ATLAS_HOOK
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic ATLAS_HOOK --from-beginning

Experiment Hive

/opt/hive/bin/hive
hive> create database hive_hook_database;
hive> use hive_hook_database;
hive> CREATE TABLE post (code INT, text STRING);
hive> CREATE TABLE pokes (foo INT, bar STRING);
hive> CREATE TABLE post_pokes (code INT, text STRING, foo INT, bar STRING);
hive> insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po ON p.foo = po.code;
{
"version": {
"version": "1.0.0",
"versionParts": [
1
]
},
"msgCompressionKind": "NONE",
"msgSplitIdx": 1,
"msgSplitCount": 1,
"msgSourceIP": "172.19.0.10",
"msgCreatedBy": "root",
"msgCreationTime": 1606854172717,
"message": {
"type": "ENTITY_CREATE_V2",
"user": "root",
"entities": {
"referredEntities": {
"-565645672010544": {
"typeName": "hive_table",
"attributes": {
"owner": "root",
"tableType": "MANAGED_TABLE",
"temporary": false,
"lastAccessTime": 1605646631000,
"createTime": 1605646631000,
"qualifiedName": "default.pokes@primary",
"name": "pokes",
"comment": null,
"parameters": {
"last_modified_time": "1605646874",
"totalSize": "5812",
"numRows": "0",
"rawDataSize": "0",
"numFiles": "1",
"transient_lastDdlTime": "1605646874",
"last_modified_by": "root"
},
"retention": 0
},
"guid": "-565645672010544",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"sd": {
"guid": "-565645672010545",
"typeName": "hive_storagedesc",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary_storage"
},
"relationshipType": "hive_table_storagedesc"
},
"columns": [
{
"guid": "-565645672010546",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.foo@primary"
},
"relationshipType": "hive_table_columns"
},
{
"guid": "-565645672010547",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.bar@primary"
},
"relationshipType": "hive_table_columns"
},
{
"guid": "-565645672010548",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.release_principal@primary"
},
"relationshipType": "hive_table_columns"
}
],
"partitionKeys": [],
"db": {
"typeName": "hive_db",
"uniqueAttributes": {
"qualifiedName": "default@primary"
},
"relationshipType": "hive_table_db"
}
},
"proxy": false
},
"-565645672010545": {
"typeName": "hive_storagedesc",
"attributes": {
"qualifiedName": "default.pokes@primary_storage",
"storedAsSubDirectories": false,
"location": "hdfs://namenode:8020/user/hive/warehouse/pokes",
"compressed": false,
"inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"parameters": {},
"outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"serdeInfo": {
"typeName": "hive_serde",
"attributes": {
"serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name": null,
"parameters": {
"serialization.format": "1"
}
}
},
"numBuckets": -1
},
"guid": "-565645672010545",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_storagedesc"
}
},
"proxy": false
},
"-565645672010546": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.foo@primary",
"name": "foo",
"comment": null,
"position": 0,
"type": "int"
},
"guid": "-565645672010546",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
},
"-565645672010547": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.bar@primary",
"name": "bar",
"comment": null,
"position": 1,
"type": "string"
},
"guid": "-565645672010547",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
},
"-565645672010548": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.release_principal@primary",
"name": "release_principal",
"comment": "Release date for this post",
"position": 2,
"type": "string"
},
"guid": "-565645672010548",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
}
},
"entities": [
{
"typeName": "hive_process",
"attributes": {
"recentQueries": [
"insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po on p.foo = po.code"
],
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000",
"clusterName": "primary",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000",
"queryText": "",
"operationType": "QUERY",
"startTime": 1606854172663,
"queryPlan": "Not Supported",
"endTime": 1606854172663,
"userName": "",
"queryId": ""
},
"guid": "-565645672010555",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.post@primary"
},
"relationshipType": "dataset_process_inputs"
},
{
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "dataset_process_inputs"
}
]
},
"proxy": false
},
{
"typeName": "hive_process_execution",
"attributes": {
"hostName": "f92480d5e91c",
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:1606854146078:1606854172663",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:1606854146078:1606854172663",
"queryText": "insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po on p.foo = po.code",
"startTime": 1606854146078,
"queryPlan": "Not Supported",
"endTime": 1606854172663,
"userName": "root",
"queryId": "root_20201201202226_03f78300-4f08-4676-aa16-3044b0c7dd8b"
},
"guid": "-565645672010556",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"process": {
"guid": "-565645672010555",
"typeName": "hive_process",
"relationshipType": "hive_process_process_executions"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:code",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:code",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010557",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.code@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post.code@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:text",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:text",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010558",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.text@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post.text@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:foo",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:foo",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010559",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.foo@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"guid": "-565645672010546",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.foo@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:bar",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:bar",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010560",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.bar@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"guid": "-565645672010547",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.bar@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
}
]
}
}
}

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Gov. Beshear: COVID-19 and Vaccine Update

Kotlin vs Groovy: The Best for Android App Development

Why can’t you measure DevOps by popular metrics?

Set up Recurring Reservations in AssetSonar: Ensure Equipment Availability at Regular Events

Recurring Reservations

The Case for Timsort

OpenGL vs WebGL — What are they?

Big Data and Cloud — A perfect combination!

Quick Overview of Custom Scheduler to Manage Highly Available Applications

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Liangjun Jiang

Liangjun Jiang

More from Medium

Deploy Open Source Datahub — Part II

Fully Embracing K8s, Cisco Hangzhou Seeks to Support K8s Tasks Based on ApacheDolphinScheduler

Visualizing Twitter trends live: Redpanda, Materialize and dbt

How to Handle Nested Data in Apache Druid vs Rockset