`llm-tgi-server | The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
llm-tgi-server | Token is valid (permission: read).
llm-tgi-server | Your token has been saved to /home/user/.cache/huggingface/token
llm-tgi-server | Login successful
llm-tgi-server | INFO: <IP>:37050 - "POST /v1/chat/completions HTTP/1.1" 500 Internal Server Error
llm-tgi-server | ERROR: Exception in ASGI application
llm-tgi-server | Traceback (most recent call last):
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 304, in hf_raise_for_status
llm-tgi-server | response.raise_for_status()
llm-tgi-server | File "/usr/local/lib/python3.11/site-packages/requests/models.py", line 1021, in raise_for_status
llm-tgi-server | raise HTTPError(http_error_msg, response=self)
llm-tgi-server | requests.exceptions.HTTPError: 503 Server Error: Service Unavailable for url: http://<IP>:9009/
llm-tgi-server |
llm-tgi-server | The above exception was the direct cause of the following exception:
llm-tgi-server |
llm-tgi-server | Traceback (most recent call last):
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/huggingface_hub/inference/_client.py", line 273, in post
llm-tgi-server | hf_raise_for_status(response)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/huggingface_hub/utils/_errors.py", line 371, in hf_raise_for_status
llm-tgi-server | raise HfHubHTTPError(str(e), response=response) from e
llm-tgi-server | huggingface_hub.utils._errors.HfHubHTTPError: 503 Server Error: Service Unavailable for url: http://<IP>:9009/
llm-tgi-server |
llm-tgi-server | The above exception was the direct cause of the following exception:
llm-tgi-server |
llm-tgi-server | Traceback (most recent call last):
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/uvicorn/protocols/http/httptools_impl.py", line 411, in run_asgi
llm-tgi-server | result = await app( # type: ignore[func-returns-value]
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/uvicorn/middleware/proxy_headers.py", line 69, in __call__
llm-tgi-server | return await self.app(scope, receive, send)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/fastapi/applications.py", line 1054, in __call__
llm-tgi-server | await super().__call__(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/applications.py", line 123, in __call__
llm-tgi-server | await self.middleware_stack(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 186, in __call__
llm-tgi-server | raise exc
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/middleware/errors.py", line 164, in __call__
llm-tgi-server | await self.app(scope, receive, _send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 65, in __call__
llm-tgi-server | await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
llm-tgi-server | raise exc
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
llm-tgi-server | await app(scope, receive, sender)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/routing.py", line 756, in __call__
llm-tgi-server | await self.middleware_stack(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/routing.py", line 776, in app
llm-tgi-server | await route.handle(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/routing.py", line 297, in handle
llm-tgi-server | await self.app(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/routing.py", line 77, in app
llm-tgi-server | await wrap_app_handling_exceptions(app, request)(scope, receive, send)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 64, in wrapped_app
llm-tgi-server | raise exc
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
llm-tgi-server | await app(scope, receive, sender)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/routing.py", line 72, in app
llm-tgi-server | response = await func(request)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/fastapi/routing.py", line 278, in app
llm-tgi-server | raw_response = await run_endpoint_function(
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/fastapi/routing.py", line 193, in run_endpoint_function
llm-tgi-server | return await run_in_threadpool(dependant.call, **values)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/starlette/concurrency.py", line 42, in run_in_threadpool
llm-tgi-server | return await anyio.to_thread.run_sync(func, *args)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/usr/local/lib/python3.11/site-packages/anyio/to_thread.py", line 56, in run_sync
llm-tgi-server | return await get_async_backend().run_sync_in_worker_thread(
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 2134, in run_sync_in_worker_thread
llm-tgi-server | return await future
llm-tgi-server | ^^^^^^^^^^^^
llm-tgi-server | File "/usr/local/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 851, in run
llm-tgi-server | result = context.run(func, *args)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/comps/llms/langchain/llm_tgi.py", line 73, in llm_generate
llm-tgi-server | response = llm.invoke(input.query)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 276, in invoke
llm-tgi-server | self.generate_prompt(
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 633, in generate_prompt
llm-tgi-server | return self.generate(prompt_strings, stop=stop, callbacks=callbacks, **kwargs)
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 803, in generate
llm-tgi-server | output = self._generate_helper(
llm-tgi-server | ^^^^^^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 670, in _generate_helper
llm-tgi-server | raise e
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 657, in _generate_helper
llm-tgi-server | self._generate(
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_core/language_models/llms.py", line 1317, in _generate
llm-tgi-server | self._call(prompt, stop=stop, run_manager=run_manager, **kwargs)
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/langchain_community/llms/huggingface_endpoint.py", line 256, in _call
llm-tgi-server | response = self.client.post(
llm-tgi-server | ^^^^^^^^^^^^^^^^^
llm-tgi-server | File "/home/user/.local/lib/python3.11/site-packages/huggingface_hub/inference/_client.py", line 283, in post
llm-tgi-server | raise InferenceTimeoutError(
llm-tgi-server | huggingface_hub.errors.InferenceTimeoutError: Model not loaded on the server: http://<IP>:9009. Please retry with a higher timeout (current: 120).`