From 5ee222108d91f4d0d5d1fc83390e3bb7ba3276a9 Mon Sep 17 00:00:00 2001
From: Derek Worthen <worthend.derek@gmail.com>
Date: Fri, 23 Jan 2026 06:01:32 -0800
Subject: [PATCH 1/5] make model_config.azure_deployment_name optional.

---
 .../graphrag_llm/config/model_config.py             |  8 +++-----
 tests/unit/config/test_model_config.py              | 13 +------------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/packages/graphrag-llm/graphrag_llm/config/model_config.py b/packages/graphrag-llm/graphrag_llm/config/model_config.py
index 1b70f6150..a0d3874e4 100644
--- a/packages/graphrag-llm/graphrag_llm/config/model_config.py
+++ b/packages/graphrag-llm/graphrag_llm/config/model_config.py
@@ -84,13 +84,11 @@ class ModelConfig(BaseModel):
 
     def _validate_lite_llm_config(self) -> None:
         """Validate LiteLLM specific configuration."""
-        if self.model_provider == "azure" and (
-            not self.azure_deployment_name or not self.api_base
-        ):
-            msg = "azure_deployment_name and api_base must be specified with the 'azure' model provider."
+        if self.model_provider == "azure" and not self.api_base:
+            msg = "api_base must be specified with the 'azure' model provider."
             raise ValueError(msg)
 
-        if self.model_provider != "azure" and self.azure_deployment_name:
+        if self.model_provider != "azure" and self.azure_deployment_name is not None:
             msg = "azure_deployment_name should not be specified for non-Azure model providers."
             raise ValueError(msg)
 
diff --git a/tests/unit/config/test_model_config.py b/tests/unit/config/test_model_config.py
index 67de71bf7..f2e7aec8e 100644
--- a/tests/unit/config/test_model_config.py
+++ b/tests/unit/config/test_model_config.py
@@ -48,7 +48,7 @@ def test_litellm_provider_validation() -> None:
 
     with pytest.raises(
         ValueError,
-        match="azure_deployment_name and api_base must be specified with the 'azure' model provider\\.",
+        match="api_base must be specified with the 'azure' model provider\\.",
     ):
         _ = ModelConfig(
             type=LLMProviderType.LiteLLM,
@@ -56,17 +56,6 @@ def test_litellm_provider_validation() -> None:
             model="gpt-4o",
         )
 
-    with pytest.raises(
-        ValueError,
-        match="azure_deployment_name and api_base must be specified with the 'azure' model provider\\.",
-    ):
-        _ = ModelConfig(
-            type=LLMProviderType.LiteLLM,
-            model_provider="azure",
-            model="gpt-4o",
-            azure_deployment_name="my-deployment",
-        )
-
     with pytest.raises(
         ValueError,
         match="api_key should not be set when using Azure Managed Identity\\.",

From 07da21c34fe067b673b19133f4628d5ba5aff3c8 Mon Sep 17 00:00:00 2001
From: Derek Worthen <worthend.derek@gmail.com>
Date: Fri, 23 Jan 2026 06:03:13 -0800
Subject: [PATCH 2/5] cleanup init config.

---
 packages/graphrag/graphrag/config/init_content.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/packages/graphrag/graphrag/config/init_content.py b/packages/graphrag/graphrag/config/init_content.py
index fd0f5aa70..9973d1920 100644
--- a/packages/graphrag/graphrag/config/init_content.py
+++ b/packages/graphrag/graphrag/config/init_content.py
@@ -24,9 +24,6 @@
     api_key: ${{GRAPHRAG_API_KEY}} # set this in the generated .env file, or remove if managed identity
     retry:
       type: exponential_backoff
-      base_delay: 2.0
-      max_retries: 7
-      jitter: true
 
 embedding_models:
   {defs.DEFAULT_EMBEDDING_MODEL_ID}:
@@ -36,9 +33,6 @@
     api_key: ${{GRAPHRAG_API_KEY}}
     retry:
       type: exponential_backoff
-      base_delay: 2.0
-      max_retries: 7
-      jitter: true
 
 ### Document processing settings ###
 

From dc1e89ea6e96f079f2c6bd6081d2528776c3d6b7 Mon Sep 17 00:00:00 2001
From: Derek Worthen <worthend.derek@gmail.com>
Date: Fri, 23 Jan 2026 06:27:05 -0800
Subject: [PATCH 3/5] test hashing.

---
 tests/unit/hasher/__init__.py    |   2 +
 tests/unit/hasher/test_hasher.py | 104 +++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 tests/unit/hasher/__init__.py
 create mode 100644 tests/unit/hasher/test_hasher.py

diff --git a/tests/unit/hasher/__init__.py b/tests/unit/hasher/__init__.py
new file mode 100644
index 000000000..0a3e38adf
--- /dev/null
+++ b/tests/unit/hasher/__init__.py
@@ -0,0 +1,2 @@
+# Copyright (c) 2024 Microsoft Corporation.
+# Licensed under the MIT License
diff --git a/tests/unit/hasher/test_hasher.py b/tests/unit/hasher/test_hasher.py
new file mode 100644
index 000000000..4b13d0dd1
--- /dev/null
+++ b/tests/unit/hasher/test_hasher.py
@@ -0,0 +1,104 @@
+# Copyright (c) 2024 Microsoft Corporation.
+# Licensed under the MIT License
+
+"""Test hasher"""
+
+from graphrag_common.hasher import hash_data
+
+
+def test_hash_data() -> None:
+    """Test hash data function."""
+    # Test different types of data
+
+    class TestClass:  # noqa: B903
+        """Test hasher class."""
+
+        def __init__(self, value: str) -> None:
+            self.value = value
+
+    def _test_func():
+        pass
+
+    # All should work and not raise exceptions
+    _ = hash_data("test string")
+    _ = hash_data(12345)
+    _ = hash_data(12.345)
+    _ = hash_data([1, 2, 3, 4, 5])
+    _ = hash_data({"key": "value", "number": 42})
+    _ = hash_data((1, "two", 3.0))
+    _ = hash_data({1, 2, 3, 4, 5})
+    _ = hash_data(None)
+    _ = hash_data(True)
+    _ = hash_data(b"bytes data")
+    _ = hash_data({"nested": {"list": [1, 2, 3], "dict": {"a": "b"}}})
+    _ = hash_data(range(10))
+    _ = hash_data(frozenset([1, 2, 3]))
+    _ = hash_data(complex(1, 2))
+    _ = hash_data(bytearray(b"byte array data"))
+    _ = hash_data(memoryview(b"memory view data"))
+    _ = hash_data(Exception("test exception"))
+    _ = hash_data(TestClass)
+    _ = hash_data(TestClass("instance value"))
+    _ = hash_data(lambda x: x * 2)
+    _ = hash_data(_test_func)
+
+    # Test that equivalent data structures produce the same hash
+    data1 = {
+        "bool": True,
+        "int": 42,
+        "float": 3.14,
+        "str": "hello, world",
+        "list": [1, 2, 3],
+        "dict": {"key": "value"},
+        "nested": {
+            "list_of_dicts": [{"a": 1}, {"b": 2}],
+            "dict_of_lists": {"numbers": [1, 2, 3]},
+        },
+        "tuple": (1, 2, 3),
+        "set": {1, 2, 3},
+        "class": TestClass,
+        "function": _test_func,
+        "instance": TestClass("instance value"),
+    }
+    # Same data but different order
+    data2 = {
+        "bool": True,
+        "list": [1, 2, 3],
+        "float": 3.14,
+        "str": "hello, world",
+        "int": 42,
+        "nested": {
+            "dict_of_lists": {"numbers": [1, 2, 3]},
+            "list_of_dicts": [{"a": 1}, {"b": 2}],
+        },
+        "dict": {"key": "value"},
+        "tuple": (1, 2, 3),
+        "class": TestClass,
+        "set": {1, 3, 2},
+        "instance": TestClass("instance value"),
+        "function": _test_func,
+    }
+
+    hash1 = hash_data(data1)
+    hash2 = hash_data(data2)
+
+    assert hash1 == hash2, "Hashes should be the same for equivalent data structures"
+
+    data3 = {"key1": "value1", "key2": 124, "key3": [1, 2, 3]}  # Different value
+    hash3 = hash_data(data3)
+
+    assert hash1 != hash3, "Hashes should be different for different data structures"
+
+    # Test classes
+    instance1 = TestClass("value1")
+    instance2 = TestClass("value1")
+    instance3 = TestClass("value2")
+    hash_instance1 = hash_data(instance1)
+    hash_instance2 = hash_data(instance2)
+    hash_instance3 = hash_data(instance3)
+    assert hash_instance1 == hash_instance2, (
+        "Hashes should be the same for equivalent class instances"
+    )
+    assert hash_instance1 != hash_instance3, (
+        "Hashes should be different for different class instances"
+    )

From 036f8fdd4c410d8f80b023ac08f6866d4cc004c9 Mon Sep 17 00:00:00 2001
From: Derek Worthen <worthend.derek@gmail.com>
Date: Fri, 23 Jan 2026 06:33:16 -0800
Subject: [PATCH 4/5] add semversioner entry.

---
 .semversioner/next-release/major-20250909002702300683.json | 4 ----
 .semversioner/next-release/major-20250909010205372690.json | 4 ----
 .semversioner/next-release/major-20250909205146252760.json | 4 ----
 .semversioner/next-release/major-20251009203808375389.json | 4 ----
 .semversioner/next-release/major-20260123143225940955.json | 4 ++++
 5 files changed, 4 insertions(+), 16 deletions(-)
 delete mode 100644 .semversioner/next-release/major-20250909002702300683.json
 delete mode 100644 .semversioner/next-release/major-20250909010205372690.json
 delete mode 100644 .semversioner/next-release/major-20250909205146252760.json
 delete mode 100644 .semversioner/next-release/major-20251009203808375389.json
 create mode 100644 .semversioner/next-release/major-20260123143225940955.json

diff --git a/.semversioner/next-release/major-20250909002702300683.json b/.semversioner/next-release/major-20250909002702300683.json
deleted file mode 100644
index 266194d23..000000000
--- a/.semversioner/next-release/major-20250909002702300683.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "type": "major",
-  "description": "Re-implement graspologic methods to remove dependency. Remove visualization steps."
-}
diff --git a/.semversioner/next-release/major-20250909010205372690.json b/.semversioner/next-release/major-20250909010205372690.json
deleted file mode 100644
index 537045e94..000000000
--- a/.semversioner/next-release/major-20250909010205372690.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "type": "major",
-  "description": "Remove document filtering option."
-}
diff --git a/.semversioner/next-release/major-20250909205146252760.json b/.semversioner/next-release/major-20250909205146252760.json
deleted file mode 100644
index b4e99c535..000000000
--- a/.semversioner/next-release/major-20250909205146252760.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "type": "major",
-  "description": "Remove text unit group-by ability."
-}
diff --git a/.semversioner/next-release/major-20251009203808375389.json b/.semversioner/next-release/major-20251009203808375389.json
deleted file mode 100644
index 4bf235ecb..000000000
--- a/.semversioner/next-release/major-20251009203808375389.json
+++ /dev/null
@@ -1,4 +0,0 @@
-{
-  "type": "major",
-  "description": "Simplify internal args with stronger types and firmer boundaries."
-}
diff --git a/.semversioner/next-release/major-20260123143225940955.json b/.semversioner/next-release/major-20260123143225940955.json
new file mode 100644
index 000000000..76089e1e5
--- /dev/null
+++ b/.semversioner/next-release/major-20260123143225940955.json
@@ -0,0 +1,4 @@
+{
+  "type": "major",
+  "description": "Monorepo restructure\n\n  New Packages:\n    - graphrag-cache\n    - graphrag-chunking\n    - graphrag-common\n    - graphrag-input\n    - graphrag-llm\n    - graphrag-storage\n    - graphrag-vectors\n\n  Changes:\n    - New config: run graphrag init --force to reinitialize config with new layout and options."
+}

From 8fd01b9da9db2c891a11e4030e6df9da5fcf0f7e Mon Sep 17 00:00:00 2001
From: Derek Worthen <worthend.derek@gmail.com>
Date: Fri, 23 Jan 2026 07:24:44 -0800
Subject: [PATCH 5/5] update docs.

---
 docs/config/models.md |  82 ++++++++++++----------------
 docs/config/yaml.md   | 124 ++++++++++++++++++------------------------
 2 files changed, 87 insertions(+), 119 deletions(-)

diff --git a/docs/config/models.md b/docs/config/models.md
index a65bde3f3..0339b7f23 100644
--- a/docs/config/models.md
+++ b/docs/config/models.md
@@ -6,33 +6,26 @@ This page contains information on selecting a model to use and options to supply
 
 GraphRAG was built and tested using OpenAI models, so this is the default model set we support. This is not intended to be a limiter or statement of quality or fitness for your use case, only that it's the set we are most familiar with for prompting, tuning, and debugging.
 
-GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode). 
+GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models. LiteLLM provides support for 100+ models though it is important to note that when choosing a model it must support returning [structured outputs](https://openai.com/index/introducing-structured-outputs-in-the-api/) adhering to a [JSON schema](https://docs.litellm.ai/docs/completion/json_mode).
 
 Example using LiteLLM as the language model manager for GraphRAG:
 
 ```yaml
-models:
-  default_chat_model:
-    type: chat
-    auth_type: api_key
-    api_key: ${GEMINI_API_KEY}
+completion_models:
+  default_completion_model:
     model_provider: gemini
     model: gemini-2.5-flash-lite
-  default_embedding_model:
-    type: embedding
-    auth_type: api_key
+    auth_method: api_key
     api_key: ${GEMINI_API_KEY}
+
+embedding_models:
+  default_embedding_model:
     model_provider: gemini
     model: gemini-embedding-001
+    auth_method: api_key
+    api_key: ${GEMINI_API_KEY}
 ```
 
-To use LiteLLM one must 
-
-- Set `type` to either `chat` or `embedding`.
-- Provide a `model_provider`, e.g., `openai`, `azure`, `gemini`, etc.
-- Set the `model` to a one supported by the `model_provider`'s API.
-- Provide a `deployment_name` if using `azure` as the `model_provider` if your deployment name differs from the model name.
-
 See [Detailed Configuration](yaml.md) for more details on configuration. [View LiteLLM basic usage](https://docs.litellm.ai/docs/#basic-usage) for details on how models are called (The `model_provider` is the portion prior to `/` while the `model` is the portion following the `/`).
 
 ## Model Selection Considerations
@@ -44,40 +37,31 @@ Versions of GraphRAG before 2.2.0 made extensive use of `max_tokens` and `logit_
 - Previously, GraphRAG used `max_tokens` to limit responses in a few locations. This is done so that we can have predictable content sizes when building downstream context windows for summarization. We have now switched from using `max_tokens` to use a prompted approach, which is working well in our tests. We suggest using `max_tokens` in your language model config only for budgetary reasons if you want to limit consumption, and not for expected response length control. We now also support the o-series equivalent `max_completion_tokens`, but if you use this keep in mind that there may be some unknown fixed reasoning consumption amount in addition to the response tokens, so it is not a good technique for response control.
 - Previously, GraphRAG used a combination of `max_tokens` and `logit_bias` to strictly control a binary yes/no question during gleanings. This is not possible with reasoning models, so again we have switched to a prompted approach. Our tests with gpt-4o, gpt-4o-mini, and o1 show that this works consistently, but could have issues if you have an older or smaller model.
 - The o-series models are much slower and more expensive. It may be useful to use an asymmetric approach to model use in your config: you can define as many models as you like in the `models` block of your settings.yaml and reference them by key for every workflow that requires a language model. You could use gpt-4o for indexing and o1 for query, for example. Experiment to find the right balance of cost, speed, and quality for your use case.
-- The o-series models contain a form of native native chain-of-thought reasoning that is absent in the non-o-series models. GraphRAG's prompts sometimes contain CoT because it was an effective technique with the gpt-4* series. It may be counterproductive with the o-series, so you may want to tune or even re-write large portions of the prompt templates (particularly for graph and claim extraction).
+- The o-series models contain a form of native native chain-of-thought reasoning that is absent in the non-o-series models. GraphRAG's prompts sometimes contain CoT because it was an effective technique with the gpt-4\* series. It may be counterproductive with the o-series, so you may want to tune or even re-write large portions of the prompt templates (particularly for graph and claim extraction).
 
 Example config with asymmetric model use:
 
 ```yaml
-models:
-  extraction_chat_model:
-    api_key: ${GRAPHRAG_API_KEY}
-    type: chat
+completion_models:
+  extraction_completion_model:
     model_provider: openai
-    auth_type: api_key
     model: gpt-4o
-    model_supports_json: true
-  query_chat_model:
+    auth_method: api_key
     api_key: ${GRAPHRAG_API_KEY}
-    type: chat
+  query_completion_model:
     model_provider: openai
-    auth_type: api_key
     model: o1
-    model_supports_json: true
-
+    auth_method: api_key
+    api_key: ${GRAPHRAG_API_KEY}
 ...
-
 extract_graph:
-  model_id: extraction_chat_model
+  completion_model_id: extraction_completion_model
   prompt: "prompts/extract_graph.txt"
-  entity_types: [organization,person,geo,event]
+  entity_types: [organization, person, geo, event]
   max_gleanings: 1
-
 ...
-
-
 global_search:
-  chat_model_id: query_chat_model
+  completion_model_id: query_completion_model
   map_prompt: "prompts/global_search_map_system_prompt.txt"
   reduce_prompt: "prompts/global_search_reduce_system_prompt.txt"
   knowledge_prompt: "prompts/global_search_knowledge_system_prompt.txt"
@@ -95,35 +79,37 @@ Many users have used platforms such as [ollama](https://ollama.com/) and [LiteLL
 
 ### Model Protocol
 
-We support model injection through the use of a standard chat and embedding Protocol and accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
+We support model injection through the use of a standard completion and embedding Protocol and accompanying factories that you can use to register your model implementation. This is not supported with the CLI, so you'll need to use GraphRAG as a library.
 
-- Our Protocol is [defined here](https://github.com/microsoft/graphrag/blob/main/graphrag/language_model/protocol/base.py)
-- We have a simple mock implementation in our tests that you can [reference here](https://github.com/microsoft/graphrag/blob/main/tests/mock_provider.py)
+- Our Protocol is [defined here](https://github.com/microsoft/graphrag/blob/main/packages/graphrag-llm/graphrag_llm/completion/completion.py)
+- We have a simple mock implementation in our tests that you can [reference here](https://github.com/microsoft/graphrag/blob/main/packages/graphrag-llm/graphrag_llm/completion/mock_llm_completion.py)
 
-Once you have a model implementation, you need to register it with our ChatModelFactory or EmbeddingModelFactory:
+Once you have a model implementation, you need to register it with our completion model factory or embedding model factory:
 
 ```python
-class MyCustomChatModel:
+from graphrag_llm.completion import LLMCompletion, register_completion
+
+class MyCustomCompletionModel(LLMCompletion):
     ...
     # implementation
 
 # elsewhere...
-ChatModelFactory.register("my-custom-chat-model", MyCustomChatModel)
+register_completion("my-custom-completion-model", MyCustomCompletionModel)
 ```
 
 Then in your config you can reference the type name you used:
 
 ```yaml
-models:
-  default_chat_model:
-    type: my-custom-chat-model
-
+completion_models:
+  default_completion_model:
+    type: my-custom-completion-model
+    ...
 
 extract_graph:
-  model_id: default_chat_model
+  completion_model_id: default_completion_model
   prompt: "prompts/extract_graph.txt"
-  entity_types: [organization,person,geo,event]
+  entity_types: [organization, person, geo, event]
   max_gleanings: 1
 ```
 
-Note that your custom model will be passed the same params for init and method calls that we use throughout GraphRAG. There is not currently any ability to define custom parameters, so you may need to use closure scope or a factory pattern within your implementation to get custom config values.
\ No newline at end of file
+Note that your custom model will be passed the same params for init and method calls that we use throughout GraphRAG. There is not currently any ability to define custom parameters, so you may need to use closure scope or a factory pattern within your implementation to get custom config values.
diff --git a/docs/config/yaml.md b/docs/config/yaml.md
index 0b04c5ae0..38ceac652 100644
--- a/docs/config/yaml.md
+++ b/docs/config/yaml.md
@@ -11,7 +11,7 @@ For example:
 GRAPHRAG_API_KEY=some_api_key
 
 # settings.yml
-default_chat_model: 
+default_chat_model:
   api_key: ${GRAPHRAG_API_KEY}
 ```
 
@@ -21,71 +21,54 @@ default_chat_model:
 
 ### models
 
-This is a dict of model configurations. The dict key is used to reference this configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them independently in the workflow steps.
+This is a set of dicts, one for completion model configuration and one for embedding model configuration. The dict keys are used to reference the model configuration elsewhere when a model instance is desired. In this way, you can specify as many different models as you need, and reference them independently in the workflow steps.
 
 For example:
+
 ```yml
-models:
-  default_chat_model:
-    type: chat
+completion_models:
+  default_completion_model:
     model_provider: openai
-    auth_type: api_key
-    api_key: ${GRAPHRAG_API_KEY}
     model: gpt-4.1
-    api_base: https://<instance>.openai.azure.com
-    api_version: 2024-05-01-preview
-    model_supports_json: true
-    concurrent_requests: 25
-    retry_strategy: exponential_backoff
-    max_retries: 10
-    tokens_per_minute: null
-    requests_per_minute: null
+    auth_method: api_key
+    api_key: ${GRAPHRAG_API_KEY}
+
+embedding_models:
   default_embedding_model:
     model_provider: openai
-    auth_type: api_key
-    api_key: ${GRAPHRAG_API_KEY}
     model: text-embedding-3-large
-    api_base: https://<instance>.openai.azure.com
-    api_version: 2024-05-01-preview
-    concurrent_requests: 25
-    retry_strategy: exponential_backoff
-    max_retries: 10
-    tokens_per_minute: null
-    requests_per_minute: null
+    auth_method: api_key
+    api_key: ${GRAPHRAG_API_KEY}
 ```
 
 #### Fields
 
-- `api_key` **str** - The OpenAI API key to use.
-- `auth_type` **api_key|azure_managed_identity** - Indicate how you want to authenticate requests.
-- `type` **chat**|**embedding**|mock_chat|mock_embeddings** - The type of LLM to use.
-- `model_provider` **str|None** - The model provider to use, e.g., openai, azure, anthropic, etc. [LiteLLM](https://docs.litellm.ai/) is used under the hood which has support for calling 100+ models. [View LiteLLm basic usage](https://docs.litellm.ai/docs/#basic-usage) for details on how models are called (The `model_provider` is the portion prior to `/` while the `model` is the portion following the `/`). [View Language Model Selection](models.md) for more details and examples on using LiteLLM.
+- `type` **litellm|mock** - The type of LLM provider to use. GraphRAG uses [LiteLLM](https://docs.litellm.ai/) for calling language models.
+- `model_provider` **str** - The model provider to use, e.g., openai, azure, anthropic, etc. [LiteLLM](https://docs.litellm.ai/) is used under the hood which has support for calling 100+ models. [View LiteLLm basic usage](https://docs.litellm.ai/docs/#basic-usage) for details on how models are called (The `model_provider` is the portion prior to `/` while the `model` is the portion following the `/`). [View Language Model Selection](models.md) for more details and examples on using LiteLLM.
 - `model` **str** - The model name.
-- `encoding_model` **str** - The text encoding model to use. Default is to use the encoding model aligned with the language model (i.e., it is retrieved from tiktoken if unset).
-- `api_base` **str** - The API base url to use.
-- `api_version` **str** - The API version.
-- `deployment_name` **str** - The deployment name to use if your model is hosted on Azure. Note that if your deployment name on Azure matches the model name, this is unnecessary.
-- `organization` **str** - The client organization.
-- `proxy` **str** - The proxy URL to use.
-- `audience` **str** - (Azure OpenAI only) The URI of the target Azure resource/service for which a managed identity token is requested. Used if `api_key` is not defined. Default=`https://cognitiveservices.azure.com/.default`
-- `model_supports_json` **bool** - Whether the model supports JSON-mode output.
-- `request_timeout` **float** - The per-request timeout.
-- `tokens_per_minute` **int** - Set a leaky-bucket throttle on tokens-per-minute.
-- `requests_per_minute` **int** - Set a leaky-bucket throttle on requests-per-minute.
-- `retry_strategy` **str** - Retry strategy to use, "exponential_backoff" is the default. Other allowable values include "native", "random_wait", and "incremental_wait".
-- `max_retries` **int** - The maximum number of retries to use.
-- `max_retry_wait` **float** - The maximum backoff time.
-- `concurrent_requests` **int** The number of open requests to allow at once.
-- `async_mode` **asyncio|threaded** The async mode to use. Either `asyncio` or `threaded`.
-- `responses` **list[str]** - If this model type is mock, this is a list of response strings to return.
-- `n` **int** - The number of completions to generate.
-- `max_tokens` **int** - The maximum number of output tokens. Not valid for o-series models.
-- `temperature` **float** - The temperature to use. Not valid for o-series models.
-- `top_p` **float** - The top-p value to use. Not valid for o-series models.
-- `frequency_penalty` **float** - Frequency penalty for token generation. Not valid for o-series models.
-- `presence_penalty` **float** - Frequency penalty for token generation. Not valid for o-series models.
-- `max_completion_tokens` **int** - Max number of tokens to consume for chat completion. Must be large enough to include an unknown amount for "reasoning" by the model. o-series models only.
-- `reasoning_effort` **low|medium|high** - Amount of "thought" for the model to expend reasoning about a response. o-series models only.
+- `call_args`: **dict[str, Any]** - Default arguments to send with every model request. Example, `{"n": 5, "max_completion_tokens": 1000, "temperature": 1.5, "organization": "..." }`
+- `api_key` **str|None** - The OpenAI API key to use.
+- `api_base` **str|None** - The API base url to use.
+- `api_version` **str|None** - The API version.
+- `auth_method` **api_key|azure_managed_identity** - Indicate how you want to authenticate requests.
+- `azure_deployment_name` **str|None** - The deployment name to use if your model is hosted on Azure. Note that if your deployment name on Azure matches the model name, this is unnecessary.
+- retry **RetryConfig|None** - Retry settings. default=`None`, no retries.
+  - type **exponential_backoff|immediate** - Type of retry approach. default=`exponential_backoff`
+  - max_retries **int|None** - Max retries to take. default=`7`.
+  - base_delay **float|None** - Base delay when using `exponential_backoff`. default=`2.0`.
+  - jitter **bool|None** - Add jitter to retry delays when using `exponential_backoff`. default=`True`
+  - max_delay **float|None** - Maximum retry delay. default=`None`, no max.
+- rate_limit **RateLimitConfig|None** - Rate limit settings. default=`None`, no rate limiting.
+  - type **sliding_window** - Type of rate limit approach. default=`sliding_window`
+  - period_in_seconds **int|None** - Window size for `sliding_window` rate limiting. default=`60`, limit requests per minute.
+  - requests_per_period **int|None** - Maximum number of requests per period. default=`None`
+  - tokens_per_period **int|None** - Maximum number of tokens per period. default=`None`
+- metrics **MetricsConfig|None** - Metric settings. default=`MetricsConfig()`. View [metrics notebook](https://github.com/microsoft/graphrag/blob/main/packages/graphrag-llm/notebooks/04_metrics.ipynb) for more details on metrics.
+  - type **default** - The type of `MetricsProcessor` service to use for processing request metrics. default=`default`
+  - store **memory** - The type of `MetricsStore` service. default=`memory`.
+  - writer **log|file** - The type of `MetricsWriter` to use. Will write out metrics at the end of the process. default`log`, log metrics out using python standard logging at the end of the process.
+  - log_level **int|None** - The log level when using `log` writer. default=`20`, log `INFO` messages for metrics.
+  - base_dir **str|None** - The directory to write metrics to when using `file` writer. default=`Path.cwd()`.
 
 ## Input Files and Chunking
 
@@ -97,7 +80,7 @@ Our pipeline can ingest .csv, .txt, or .json data from an input location. See th
 
 - `storage` **StorageConfig**
   - `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
-  - `encoding`**str**  - The encoding to use for file storage.
+  - `encoding`**str** - The encoding to use for file storage.
   - `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
   - `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
   - `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -131,7 +114,7 @@ This section controls the storage mechanism used by the pipeline used for export
 #### Fields
 
 - `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
-- `encoding`**str**  - The encoding to use for file storage.
+- `encoding`**str** - The encoding to use for file storage.
 - `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
 - `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
 - `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -147,7 +130,7 @@ The section defines a secondary storage location for running incremental indexin
 #### Fields
 
 - `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
-- `encoding`**str**  - The encoding to use for file storage.
+- `encoding`**str** - The encoding to use for file storage.
 - `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
 - `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
 - `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -165,7 +148,7 @@ This section controls the cache mechanism used by the pipeline. This is used to
 - `type` **json|memory|none** - The storage type to use. Default=`json`
 - `storage` **StorageConfig**
   - `type` **file|memory|blob|cosmosdb** - The storage type to use. Default=`file`
-  - `encoding`**str**  - The encoding to use for file storage.
+  - `encoding`**str** - The encoding to use for file storage.
   - `base_dir` **str** - The base directory to write output artifacts to, relative to the root.
   - `connection_string` **str** - (blob/cosmosdb only) The Azure Storage connection string.
   - `container_name` **str** - (blob/cosmosdb only) The Azure Storage container name.
@@ -198,8 +181,8 @@ Where to put all vectors for the system. Configured for lancedb by default. This
 - `connection_string` **str** - (cosmosdb only) The Azure Storage connection string.
 - `database_name` **str** - (cosmosdb only) Name of the database.
 
-- `index_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings. 
-  - `<supported_embedding>`: 
+- `index_schema` **dict[str, dict[str, str]]** (optional) - Enables customization for each of your embeddings.
+  - `<supported_embedding>`:
     - `index_name` **str**: (optional) - Name for the specific embedding index table.
     - `id_field` **str**: (optional) - Field name to be used as id. Default=`id`
     - `vector_field` **str**: (optional) - Field name to be used as vector. Default=`vector`
@@ -225,7 +208,6 @@ vector_store:
       vector_size: 3072
     entity_description:
       id_field: "id_custom"
-
 ```
 
 ## Workflow Configurations
@@ -248,7 +230,7 @@ Supported embeddings names are:
 
 #### Fields
 
-- `model_id` **str** - Name of the model definition to use for text embedding.
+- `embedding_model_id` **str** - Name of the model definition to use for text embedding.
 - `model_instance_name` **str** - Name of the model singleton instance. Default is "text_embedding". This primarily affects the cache storage partitioning.
 - `batch_size` **int** - The maximum batch size to use.
 - `batch_max_tokens` **int** - The maximum batch # of tokens.
@@ -260,7 +242,7 @@ Tune the language model-based graph extraction process.
 
 #### Fields
 
-- `model_id` **str** - Name of the model definition to use for API calls.
+- `completion_model_id` **str** - Name of the model definition to use for API calls.
 - `model_instance_name` **str** - Name of the model singleton instance. Default is "extract_graph". This primarily affects the cache storage partitioning.
 - `prompt` **str** - The prompt file to use.
 - `entity_types` **list[str]** - The entity types to identify.
@@ -270,7 +252,7 @@ Tune the language model-based graph extraction process.
 
 #### Fields
 
-- `model_id` **str** - Name of the model definition to use for API calls.
+- `completion_model_id` **str** - Name of the model definition to use for API calls.
 - `model_instance_name` **str** - Name of the model singleton instance. Default is "summarize_descriptions". This primarily affects the cache storage partitioning.
 - `prompt` **str** - The prompt file to use.
 - `max_length` **int** - The maximum number of output tokens per summarization.
@@ -326,7 +308,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields
 
 - `enabled` **bool** - Whether to enable claim extraction. Off by default, because claim prompts really need user tuning.
-- `model_id` **str** - Name of the model definition to use for API calls.
+- `completion_model_id` **str** - Name of the model definition to use for API calls.
 - `model_instance_name` **str** - Name of the model singleton instance. Default is "extract_claims". This primarily affects the cache storage partitioning.
 - `prompt` **str** - The prompt file to use.
 - `description` **str** - Describes the types of claims we want to extract.
@@ -336,7 +318,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 
 #### Fields
 
-- `model_id` **str** - Name of the model definition to use for API calls.
+- `completion_model_id` **str** - Name of the model definition to use for API calls.
 - `model_instance_name` **str** - Name of the model singleton instance. Default is "community_reporting". This primarily affects the cache storage partitioning.
 - `graph_prompt` **str | None** - The community report extraction prompt to use for graph-based summarization.
 - `text_prompt` **str | None** - The community report extraction prompt to use for text-based summarization.
@@ -358,9 +340,9 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields
 
 - `prompt` **str** - The prompt file to use.
-- `chat_model_id` **str** - Name of the model definition to use for Chat Completion calls.
+- `completion_model_id` **str** - Name of the model definition to use for Chat Completion calls.
 - `embedding_model_id` **str** - Name of the model definition to use for Embedding calls.
-- `text_unit_prop` **float** - The text unit proportion. 
+- `text_unit_prop` **float** - The text unit proportion.
 - `community_prop` **float** - The community proportion.
 - `conversation_history_max_turns` **int** - The conversation history maximum turns.
 - `top_k_entities` **int** - The top k mapped entities.
@@ -373,7 +355,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 
 - `map_prompt` **str** - The global search mapper prompt to use.
 - `reduce_prompt` **str** - The global search reducer to use.
-- `chat_model_id` **str** - Name of the model definition to use for Chat Completion calls.
+- `completion_model_id` **str** - Name of the model definition to use for Chat Completion calls.
 - `knowledge_prompt` **str** - The knowledge prompt file to use.
 - `data_max_tokens` **int** - The maximum tokens to use constructing the final response from the reduces responses.
 - `map_max_length` **int** - The maximum length to request for map responses, in words.
@@ -390,7 +372,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 
 - `prompt` **str** - The prompt file to use.
 - `reduce_prompt` **str** - The reducer prompt file to use.
-- `chat_model_id` **str** - Name of the model definition to use for Chat Completion calls.
+- `completion_model_id` **str** - Name of the model definition to use for Chat Completion calls.
 - `embedding_model_id` **str** - Name of the model definition to use for Embedding calls.
 - `data_max_tokens` **int** - The data llm maximum tokens.
 - `reduce_max_tokens` **int** - The maximum tokens for the reduce phase. Only use if a non-o-series model.
@@ -417,7 +399,7 @@ These are the settings used for Leiden hierarchical clustering of the graph to c
 #### Fields
 
 - `prompt` **str** - The prompt file to use.
-- `chat_model_id` **str** - Name of the model definition to use for Chat Completion calls.
+- `completion_model_id` **str** - Name of the model definition to use for Chat Completion calls.
 - `embedding_model_id` **str** - Name of the model definition to use for Embedding calls.
 - `k` **int** - Number of text units to retrieve from the vector store for context building.
-- `max_context_tokens` **int** - The maximum context size to create, in tokens.
\ No newline at end of file
+- `max_context_tokens` **int** - The maximum context size to create, in tokens.