Docker Image

hive-server:    image: ljiang/hive:2.3.2-postgresql-metastore-kafka-zookeeper-atlas-hive-hook    env_file:      - ./hadoop-hive.env
docker-compose -f docker-compose-local.yml up -d

Observe Kafka Message

docker-compose exec hive-server bash
  1. start the Zookeeper service
  2. Start the Kafka service
cd ~/kafka_2.11-2.3.0/bin/zookeeper-server-start.sh -daemon config/zookeeper.propertiesbin/kafka-server-start.sh  config/server.properties
bin/kafka-topics.sh --list --zookeeper localhost:2181
bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ATLAS_ENTITIESbin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic ATLAS_HOOK
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 --topic ATLAS_HOOK --from-beginning

Experiment Hive

/opt/hive/bin/hive
hive> create database hive_hook_database;
hive> use hive_hook_database;
hive> CREATE TABLE post (code INT, text STRING);
hive> CREATE TABLE pokes (foo INT, bar STRING);
hive> CREATE TABLE post_pokes (code INT, text STRING, foo INT, bar STRING);
hive> insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po ON p.foo = po.code;
{
"version": {
"version": "1.0.0",
"versionParts": [
1
]
},
"msgCompressionKind": "NONE",
"msgSplitIdx": 1,
"msgSplitCount": 1,
"msgSourceIP": "172.19.0.10",
"msgCreatedBy": "root",
"msgCreationTime": 1606854172717,
"message": {
"type": "ENTITY_CREATE_V2",
"user": "root",
"entities": {
"referredEntities": {
"-565645672010544": {
"typeName": "hive_table",
"attributes": {
"owner": "root",
"tableType": "MANAGED_TABLE",
"temporary": false,
"lastAccessTime": 1605646631000,
"createTime": 1605646631000,
"qualifiedName": "default.pokes@primary",
"name": "pokes",
"comment": null,
"parameters": {
"last_modified_time": "1605646874",
"totalSize": "5812",
"numRows": "0",
"rawDataSize": "0",
"numFiles": "1",
"transient_lastDdlTime": "1605646874",
"last_modified_by": "root"
},
"retention": 0
},
"guid": "-565645672010544",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"sd": {
"guid": "-565645672010545",
"typeName": "hive_storagedesc",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary_storage"
},
"relationshipType": "hive_table_storagedesc"
},
"columns": [
{
"guid": "-565645672010546",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.foo@primary"
},
"relationshipType": "hive_table_columns"
},
{
"guid": "-565645672010547",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.bar@primary"
},
"relationshipType": "hive_table_columns"
},
{
"guid": "-565645672010548",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.release_principal@primary"
},
"relationshipType": "hive_table_columns"
}
],
"partitionKeys": [],
"db": {
"typeName": "hive_db",
"uniqueAttributes": {
"qualifiedName": "default@primary"
},
"relationshipType": "hive_table_db"
}
},
"proxy": false
},
"-565645672010545": {
"typeName": "hive_storagedesc",
"attributes": {
"qualifiedName": "default.pokes@primary_storage",
"storedAsSubDirectories": false,
"location": "hdfs://namenode:8020/user/hive/warehouse/pokes",
"compressed": false,
"inputFormat": "org.apache.hadoop.mapred.TextInputFormat",
"parameters": {},
"outputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"serdeInfo": {
"typeName": "hive_serde",
"attributes": {
"serializationLib": "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name": null,
"parameters": {
"serialization.format": "1"
}
}
},
"numBuckets": -1
},
"guid": "-565645672010545",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_storagedesc"
}
},
"proxy": false
},
"-565645672010546": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.foo@primary",
"name": "foo",
"comment": null,
"position": 0,
"type": "int"
},
"guid": "-565645672010546",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
},
"-565645672010547": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.bar@primary",
"name": "bar",
"comment": null,
"position": 1,
"type": "string"
},
"guid": "-565645672010547",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
},
"-565645672010548": {
"typeName": "hive_column",
"attributes": {
"owner": "root",
"qualifiedName": "default.pokes.release_principal@primary",
"name": "release_principal",
"comment": "Release date for this post",
"position": 2,
"type": "string"
},
"guid": "-565645672010548",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"table": {
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "hive_table_columns"
}
},
"proxy": false
}
},
"entities": [
{
"typeName": "hive_process",
"attributes": {
"recentQueries": [
"insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po on p.foo = po.code"
],
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000",
"clusterName": "primary",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000",
"queryText": "",
"operationType": "QUERY",
"startTime": 1606854172663,
"queryPlan": "Not Supported",
"endTime": 1606854172663,
"userName": "",
"queryId": ""
},
"guid": "-565645672010555",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.post@primary"
},
"relationshipType": "dataset_process_inputs"
},
{
"guid": "-565645672010544",
"typeName": "hive_table",
"uniqueAttributes": {
"qualifiedName": "default.pokes@primary"
},
"relationshipType": "dataset_process_inputs"
}
]
},
"proxy": false
},
{
"typeName": "hive_process_execution",
"attributes": {
"hostName": "f92480d5e91c",
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:1606854146078:1606854172663",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:1606854146078:1606854172663",
"queryText": "insert into table post_pokes(foo,bar,code,text) select p.foo, p.bar, po.code, po.text from pokes p inner join post po on p.foo = po.code",
"startTime": 1606854146078,
"queryPlan": "Not Supported",
"endTime": 1606854172663,
"userName": "root",
"queryId": "root_20201201202226_03f78300-4f08-4676-aa16-3044b0c7dd8b"
},
"guid": "-565645672010556",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"process": {
"guid": "-565645672010555",
"typeName": "hive_process",
"relationshipType": "hive_process_process_executions"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:code",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:code",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010557",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.code@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post.code@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:text",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:text",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010558",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.text@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post.text@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:foo",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:foo",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010559",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.foo@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"guid": "-565645672010546",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.foo@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
},
{
"typeName": "hive_column_lineage",
"attributes": {
"expression": null,
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:bar",
"name": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000:bar",
"depenendencyType": "SIMPLE"
},
"guid": "-565645672010560",
"isIncomplete": false,
"provenanceType": 0,
"version": 0,
"relationshipAttributes": {
"outputs": [
{
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.post_pokes.bar@primary"
},
"relationshipType": "process_dataset_outputs"
}
],
"inputs": [
{
"guid": "-565645672010547",
"typeName": "hive_column",
"uniqueAttributes": {
"qualifiedName": "default.pokes.bar@primary"
},
"relationshipType": "dataset_process_inputs"
}
],
"query": {
"guid": "-565645672010555",
"typeName": "hive_process",
"uniqueAttributes": {
"qualifiedName": "QUERY:default.pokes@primary:1605646631000:default.post@primary:1606853324000->:INSERT:default.post_pokes@primary:1606854087000"
},
"relationshipType": "hive_process_column_lineage"
}
},
"proxy": false
}
]
}
}
}

--

--

--

Love podcasts or audiobooks? Learn on the go with our new app.

Recommended from Medium

Game Environment as a Training Platform for Brain Computer Interfaces

ARE YOU AWARE THAT …

Simple Script For Checking Whether an URL Active Or Not Using Python

Becoming a Web Developer using Github as a tool- ICA Kwasu Maiden Meetup.

Understanding 3D medical image orientation for programmers

Synthetix grantsDAO: Aave Improvement Proposals

Flutter hands on: Building a News App

Creating a IAM User, Creating a User Group,Assigning a Permission to a User, and Setting Up MFA.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Liangjun Jiang

Liangjun Jiang

More from Medium

How should we use data integration frameworks to break the dilemma of full lifecycle implementation…

Snowflake: IS_GRANTED_TO_INVOKER_ROLE function

Apache Airflow: Automate Email Alerts for Task Status

Deploy Open Source Datahub — Part II