3 years ago · 4398ec1cc7
--- a/.editorconfig
+++ b/.editorconfig
@@ -17,3 +17,6 @@ indent_style = tab
 
															 [*.md]
														
 
															 indent_size = 4
														
 
															+
														
 
															+[MANIFEST.MF]
														
 
															+end_of_line = lf
														
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 
															 target/*
														
 
															 /kafka*
														
 
															+out/*
														
--- a/.idea/artifacts/assign_jar.xml
+++ b/.idea/artifacts/assign_jar.xml
@@ -0,0 +1,15 @@
 
															+<component name="ArtifactManager">
														
 
															+  <artifact type="jar" build-on-make="true" name="assign:jar">
														
 
															+    <output-path>$PROJECT_DIR$/out/artifacts</output-path>
														
 
															+    <root id="archive" name="assign.jar">
														
 
															+      <element id="directory" name="META-INF">
														
 
															+        <element id="file-copy" path="$PROJECT_DIR$/src/main/java/assign/META-INF/MANIFEST.MF" />
														
 
															+      </element>
														
 
															+      <element id="module-output" name="GettingStartedWithKafka" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/kafka/kafka-clients/0.10.0.1/kafka-clients-0.10.0.1.jar" path-in-jar="/" />
														
 
															+    </root>
														
 
															+  </artifact>
														
 
															+</component>
														
--- a/.idea/artifacts/consumer_jar.xml
+++ b/.idea/artifacts/consumer_jar.xml
@@ -0,0 +1,15 @@
 
															+<component name="ArtifactManager">
														
 
															+  <artifact type="jar" build-on-make="true" name="consumer:jar">
														
 
															+    <output-path>$PROJECT_DIR$/out/artifacts</output-path>
														
 
															+    <root id="archive" name="consumer.jar">
														
 
															+      <element id="directory" name="META-INF">
														
 
															+        <element id="file-copy" path="$PROJECT_DIR$/src/main/java/consumer/META-INF/MANIFEST.MF" />
														
 
															+      </element>
														
 
															+      <element id="module-output" name="GettingStartedWithKafka" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/kafka/kafka-clients/0.10.0.1/kafka-clients-0.10.0.1.jar" path-in-jar="/" />
														
 
															+    </root>
														
 
															+  </artifact>
														
 
															+</component>
														
--- a/.idea/artifacts/producer_jar.xml
+++ b/.idea/artifacts/producer_jar.xml
@@ -0,0 +1,15 @@
 
															+<component name="ArtifactManager">
														
 
															+  <artifact type="jar" build-on-make="true" name="producer:jar">
														
 
															+    <output-path>$PROJECT_DIR$/out/artifacts</output-path>
														
 
															+    <root id="archive" name="producer.jar">
														
 
															+      <element id="directory" name="META-INF">
														
 
															+        <element id="file-copy" path="$PROJECT_DIR$/src/main/java/producer/META-INF/MANIFEST.MF" />
														
 
															+      </element>
														
 
															+      <element id="module-output" name="GettingStartedWithKafka" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/xerial/snappy/snappy-java/1.1.2.6/snappy-java-1.1.2.6.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/slf4j/slf4j-api/1.7.21/slf4j-api-1.7.21.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/net/jpountz/lz4/lz4/1.3.0/lz4-1.3.0.jar" path-in-jar="/" />
														
 
															+      <element id="extracted-dir" path="$MAVEN_REPOSITORY$/org/apache/kafka/kafka-clients/0.10.0.1/kafka-clients-0.10.0.1.jar" path-in-jar="/" />
														
 
															+    </root>
														
 
															+  </artifact>
														
 
															+</component>
														
--- a/.idea/runConfigurations/KafkaAssignApp.xml
+++ b/.idea/runConfigurations/KafkaAssignApp.xml
@@ -1,7 +1,7 @@
 
															 <component name="ProjectRunConfigurationManager">
														
 
															   <configuration default="false" name="KafkaAssignApp" type="Application" factoryName="Application">
														
 
															     <option name="MAIN_CLASS_NAME" value="KafkaAssignApp" />
														
 
															-    <module name="kafkasamples" />
														
 
															+    <module name="GettingStartedWithKafka" />
														
 
															     <method v="2">
														
 
															       <option name="Make" enabled="true" />
														
 
															     </method>
														
--- a/.idea/runConfigurations/KafkaConsumerApp_1.xml
+++ b/.idea/runConfigurations/KafkaConsumerApp_1.xml
@@ -1,7 +1,7 @@
 
															 <component name="ProjectRunConfigurationManager">
														
 
															-  <configuration default="false" name="KafkaConsumerApp" type="Application" factoryName="Application" nameIsGenerated="true">
														
 
															+  <configuration default="false" name="KafkaConsumerApp 1" type="Application" factoryName="Application">
														
 
															     <option name="MAIN_CLASS_NAME" value="KafkaConsumerApp" />
														
 
															-    <module name="kafkasamples" />
														
 
															+    <module name="GettingStartedWithKafka" />
														
 
															     <method v="2">
														
 
															       <option name="Make" enabled="true" />
														
 
															     </method>
														
--- a/.idea/runConfigurations/KafkaConsumerApp_2.xml
+++ b/.idea/runConfigurations/KafkaConsumerApp_2.xml
@@ -0,0 +1,9 @@
 
															+<component name="ProjectRunConfigurationManager">
														
 
															+  <configuration default="false" name="KafkaConsumerApp 2" type="Application" factoryName="Application">
														
 
															+    <option name="MAIN_CLASS_NAME" value="KafkaConsumerApp" />
														
 
															+    <module name="GettingStartedWithKafka" />
														
 
															+    <method v="2">
														
 
															+      <option name="Make" enabled="true" />
														
 
															+    </method>
														
 
															+  </configuration>
														
 
															+</component>
														
--- a/.idea/runConfigurations/KafkaProducerApp.xml
+++ b/.idea/runConfigurations/KafkaProducerApp.xml
@@ -1,8 +1,8 @@
 
															 <component name="ProjectRunConfigurationManager">
														
 
															   <configuration default="false" name="KafkaProducerApp" type="Application" factoryName="Application">
														
 
															     <option name="ALTERNATIVE_JRE_PATH" value="1.8" />
														
 
															-    <option name="MAIN_CLASS_NAME" value="KafkaProducerApp" />
														
 
															-    <module name="kafkasamples" />
														
 
															+    <option name="MAIN_CLASS_NAME" value="producer.KafkaProducerApp" />
														
 
															+    <module name="GettingStartedWithKafka" />
														
 
															     <RunnerSettings RunnerId="Run" />
														
 
															     <ConfigurationWrapper RunnerId="Run" />
														
 
															     <method v="2">
														
--- a/messages.md
+++ b/messages.md
@@ -6,15 +6,15 @@
 
															 ## Creating a producer
														
 
															-- Properties: 
														
 
															+- Properties:
														
 
															     - `bootstrap.servers`
														
 
															     - `key.serializer`
														
 
															     - `value.serializer`
														
 
															     - and others
														
 
															     - => `KafkaProducer`
														
 
															     - [Producer config doc](http://kafka.apache.org/documentation.html#producerconfigs)
														
 
															-- See [KafkaProducerApp](../src/main/java/KafkaProducerApp.java)
														
 
															-  
														
 
															+- See [producer.KafkaProducerApp](../src/main/java/KafkaProducerApp.java)
														
 
															+
														
 
															 ## Creating messages
														
 
															 - K calls messages "ProducerRecord"
														
@@ -27,7 +27,7 @@
 
															         - Key
														
 
															 - KP instances can only send PRs that match the key and value
														
 
															   serializer types they are configured with.
														
 
															-  
														
 
															+
														
 
															 ## Sending messages
														
 
															 When the producer sends:
														
@@ -47,7 +47,7 @@ When the producer sends:
 
															       a Murmur hash in DefaultPartitioner. See `DefaultPartitioner.partition`.
														
 
															     - custom (defined in `PARTITIONER_CLASS_CONFIG == "partitioner.class"` property)
														
 
															 - it pushes the message to an in-memory queue, the `RecordAccumulator`
														
 
															-    - micro-batching: as scale, efficiency is everything. 
														
 
															+    - micro-batching: as scale, efficiency is everything.
														
 
															     - Use on producer, broker, and consumer.
														
 
															     - Also used in the OS (page cache, Linux sendfile() syscall).
														
 
															     - Amortizes the constant cost of sends
														
@@ -82,11 +82,11 @@ When the producer sends:
 
															     - At-least once
														
 
															     - At-most once
														
 
															     - Exactly once
														
 
															-    
														
 
															+
														
 
															 ## Advanced topics
														
 
															 - Cusom serializers
														
 
															 - Custom Partitioners
														
 
															 - Asynchronous send
														
 
															 - Compression
														
 
															-- Advanced settings
														
 
															+- Advanced settings
														
--- a/messages.md
+++ b/messages.md
@@ -94,7 +94,6 @@ public class ConsumerRecords<K, V> implements Iterable<ConsumerRecord<K, V>> {
 
															 Just because something is _read_ does not mean it is _committed_:
														
 
															-
														
 
															 - There are different categories of offsets, representing the stage they're in:
														
 
															     - a consumer needs to know what it has vs has not read
														
 
															     - what it confirms it has read (and processed) is the _last committed offset_
														
@@ -119,7 +118,7 @@ By default, consumers start reading from a new partition at the `latest` committ
 
															 - Optional property:
														
 
															     - `auto.offset.reset` can be `earliest`, `latest` (default), or `none` which
														
 
															-      throws an exception and lets code decide.
														
 
															+      throws an exception and lets code decide. See "rebalancing" below.
														
 
															 Offset choice is different depending on whether the topology has a single consumer,
														
 
															 or a ConsumerGroup.
														
@@ -161,3 +160,67 @@ There are two methods to commit:
 
															 - Atomicity:
														
 
															     - ability to treat consumption and processing as a single atomic operation
														
 
															     - obtaining _exactly-once_ semantics instead of _at-least-once_
														
 
															+
														
 
															+## Scaling out consumers
														
 
															+
														
 
															+- Scaling a single-thread, single-consumer app to the bandwidth, number of topics
														
 
															+  and partitions of a full K cluster is not realistic
														
 
															+- The solution is to scale out consuming to more consumers, but they can't jusst
														
 
															+  consumer anything without synchronizing in some way
														
 
															+- This is the reason for Consumer groups: a collection of independent Consumer
														
 
															+  working as a team, i.e. declaring the same `group.id`.
														
 
															+    - This allows them to share the message consumption and processing load, with more parallelism.
														
 
															+    - It allows more redundancy: failure or limitations of a given consumer are
														
 
															+      automatically handled and balanced by K.
														
 
															+    - It offers more performance, with the ability to support a large backlog
														
 
															+- A Consumer group is create when individual consumers
														
 
															+    - with a commmon `group.id`...
														
 
															+    - invoke the `subscribe()` method...
														
 
															+    - and pass a common topics list.
														
 
															+- One of the brokers gets elected as the `GroupCoordinator` for that topic.
														
 
															+    - Its job is to monitor and maintain group membership.
														
 
															+    - It collaborates with the `ClusterCoordinator` and ZooKeeper to monitor and
														
 
															+      assign partitions within a topic to individual consumers in the group.
														
 
															+- As soon as a Consumer group is formed, each consumer is sending heartbeats,
														
 
															+    - configured by properties:
														
 
															+        - `heartbeat.interval.ms` == `HEARTBEAT_INTERVAL_MS_CONFIG` (default 3000 msec):
														
 
															+          the interval between heartbeat sends
														
 
															+        - `session.timeout.ms` == `SESSION_TIMEOUT_MS_CONFIG` (default 30000 msec)
														
 
															+    - the CG coordinator relies on these heartbeats to evalue whether the consumer
														
 
															+      is alive and able to participate in the group
														
 
															+    - if the coordinator does not receive heartbeat during a "total time" (?)
														
 
															+      larger than `session.timeout.ms`, it will consider the consumer failed and take
														
 
															+      corrective action, following its priority: ensuring that the purpose of the
														
 
															+      group (sharing the load of consuming those topics) is being met.
														
 
															+    - these corrections are _consumer rebalance_ operations, which is complex
														
 
															+        - remaining consumers now need to absorb the workload no longer handled
														
 
															+          by the failed consumer
														
 
															+        - they need to find up to where the failed consumer had worked (commit offset)
														
 
															+          for all partitions, and catch up without creating duplicates.
														
 
															+        - the ability to perform these rebalances is critical to cluster health.
														
 
															+        - example 1: the failed consumer handled messages but could not commit
														
 
															+          them; in that case the new consumer are likely to re-process them,
														
 
															+          possibly introducing duplicates
														
 
															+        - example 2: a new consumer joins the group
														
 
															+        - example 3: a new partition is added to the topic
														
 
															+- Applications are configured with a consumer group to handle their topics
														
 
															+
														
 
															+## Rebalancing
														
 
															+
														
 
															+The offset at which a new consumer in a group starts consuming is defined by
														
 
															+the `auto.offset.reset` == `AUTO_OFFSET_RESET_CONFIG` property.
														
 
															+
														
 
															+If the rebalance was triggered at a point when a previous consumer had already
														
 
															+read but not yet committed some offset, the new consumer is likely to read it again.
														
 
															+
														
 
															+The primary purpose of the Group Coordinator is to evenly ablance available consumers
														
 
															+to partitions.
														
 
															+
														
 
															+- If possible, it will assign a 1:1 consumer/partition ratio.
														
 
															+- If there are more consumers than partitions, it will let the extra consumers idle,
														
 
															+  leading to over-provisioning
														
 
															+  - Apparently (to be confirmed), even if there are more partitions than consumers,
														
 
															+    it will not share a partition across multiple consumers
														
 
															+- If a new partition becomes available, or a consumer fails, or is added,
														
 
															+  the Group Coordinator initiates the rebalancing protocol, engaging each Consumer coordinator
														
 
															+
														
--- a/docs/images/ConsumerGroups.png
+++ b/docs/images/ConsumerGroups.png
--- a/src/main/java/assign/META-INF/MANIFEST.MF
+++ b/src/main/java/assign/META-INF/MANIFEST.MF
@@ -0,0 +1,2 @@
 
															+Manifest-Version: 1.0
														
 
															+Main-Class: KafkaAssignApp
														
--- a/src/main/java/consumer/META-INF/MANIFEST.MF
+++ b/src/main/java/consumer/META-INF/MANIFEST.MF
@@ -0,0 +1,2 @@
 
															+Manifest-Version: 1.0
														
 
															+Main-Class: KafkaConsumerApp
														
--- a/src/main/java/producer/KafkaProducerApp.java
+++ b/src/main/java/producer/KafkaProducerApp.java
@@ -1,3 +1,5 @@
 
															+package producer;
														
 
															+
														
 
															 import org.apache.kafka.clients.producer.KafkaProducer;
														
 
															 import org.apache.kafka.clients.producer.ProducerConfig;
														
 
															 import org.apache.kafka.clients.producer.ProducerRecord;
														
--- a/src/main/java/producer/META-INF/MANIFEST.MF
+++ b/src/main/java/producer/META-INF/MANIFEST.MF
@@ -0,0 +1,2 @@
 
															+Manifest-Version: 1.0
														
 
															+Main-Class: producer.KafkaProducerApp
	`@@ -0,0 +1,2 @@`
			`+Manifest-Version: 1.0`
			`+Main-Class: KafkaAssignApp`
	`@@ -0,0 +1,2 @@`
			`+Manifest-Version: 1.0`
			`+Main-Class: KafkaConsumerApp`
	`@@ -0,0 +1,2 @@`
			`+Manifest-Version: 1.0`
			`+Main-Class: producer.KafkaProducerApp`