Oleksandr Nitavskyi
05/25/2023, 12:15 PMPluginClassLoader
, see details in 🧵
Removal monitorInterval
introduced in FLINK-20510 helps to mitigate the issue.
I wonder if it is a known issue and should Flink provide any workaround:
1. Disable monitorInterval
by default in log configuration, so users who do not tune Flink doesn’t hit memory leak
2. Add some mechanism on unload, which properly stops such Log4jThread.
cc @Nicolas Fraison<init>:76, Log4jThreadFactory (org.apache.logging.log4j.core.util)
createDaemonThreadFactory:40, Log4jThreadFactory (org.apache.logging.log4j.core.util)
<init>:42, AbstractWatcher (org.apache.logging.log4j.core.util)
<init>:37, ConfigurationFileWatcher (org.apache.logging.log4j.core.config)
initializeWatchers:260, AbstractConfiguration (org.apache.logging.log4j.core.config)
setMonitorInterval:153, BuiltConfiguration (org.apache.logging.log4j.core.config.builder.impl)
build:217, DefaultConfigurationBuilder (org.apache.logging.log4j.core.config.builder.impl)
build:69, DefaultConfigurationBuilder (org.apache.logging.log4j.core.config.builder.impl)
build:190, PropertiesConfigurationBuilder (org.apache.logging.log4j.core.config.properties)
getConfiguration:56, PropertiesConfigurationFactory (org.apache.logging.log4j.core.config.properties)
getConfiguration:35, PropertiesConfigurationFactory (org.apache.logging.log4j.core.config.properties)
getConfiguration:523, ConfigurationFactory$Factory (org.apache.logging.log4j.core.config)
getConfiguration:498, ConfigurationFactory$Factory (org.apache.logging.log4j.core.config)
getConfiguration:422, ConfigurationFactory$Factory (org.apache.logging.log4j.core.config)
getConfiguration:323, ConfigurationFactory (org.apache.logging.log4j.core.config)
reconfigure:695, LoggerContext (org.apache.logging.log4j.core)
reconfigure:716, LoggerContext (org.apache.logging.log4j.core)
start:270, LoggerContext (org.apache.logging.log4j.core)
getContext:155, Log4jContextFactory (org.apache.logging.log4j.core.impl)
getContext:47, Log4jContextFactory (org.apache.logging.log4j.core.impl)
getContext:309, LogManager (org.apache.logging.log4j)
getContext:59, Logger$PrivateManager (org.apache.log4j)
getLogger:37, Logger (org.apache.log4j)
getLogger:262, Log4JLogger (org.apache.commons.logging.impl)
<init>:108, Log4JLogger (org.apache.commons.logging.impl)
newInstance0:-1, NativeConstructorAccessorImpl (jdk.internal.reflect)
newInstance:62, NativeConstructorAccessorImpl (jdk.internal.reflect)
newInstance:45, DelegatingConstructorAccessorImpl (jdk.internal.reflect)
newInstance:490, Constructor (java.lang.reflect)
createLogFromClass:1025, LogFactoryImpl (org.apache.commons.logging.impl)
discoverLogImplementation:844, LogFactoryImpl (org.apache.commons.logging.impl)
newInstance:541, LogFactoryImpl (org.apache.commons.logging.impl)
getInstance:292, LogFactoryImpl (org.apache.commons.logging.impl)
getInstance:269, LogFactoryImpl (org.apache.commons.logging.impl)
getLog:657, LogFactory (org.apache.commons.logging)
<init>:59, DefaultHttpResponseParser (org.apache.http.impl.conn)
create:76, DefaultHttpResponseParserFactory (org.apache.http.impl.conn)
<init>:99, DefaultBHttpClientConnection (org.apache.http.impl)
<init>:74, DefaultManagedHttpClientConnection (org.apache.http.impl.conn)
<init>:66, LoggingManagedHttpClientConnection (org.apache.http.impl.conn)
create:127, ManagedHttpClientConnectionFactory (org.apache.http.impl.conn)
create:57, ManagedHttpClientConnectionFactory (org.apache.http.impl.conn)
create:639, PoolingHttpClientConnectionManager$InternalConnectionFactory (org.apache.http.impl.conn)
create:610, PoolingHttpClientConnectionManager$InternalConnectionFactory (org.apache.http.impl.conn)
getPoolEntryBlocking:379, AbstractConnPool (org.apache.http.pool)
access$300:70, AbstractConnPool (org.apache.http.pool)
get:253, AbstractConnPool$2 (org.apache.http.pool)
get:198, AbstractConnPool$2 (org.apache.http.pool)
leaseConnection:306, PoolingHttpClientConnectionManager (org.apache.http.impl.conn)
get:282, PoolingHttpClientConnectionManager$1 (org.apache.http.impl.conn)
invoke0:-1, NativeMethodAccessorImpl (jdk.internal.reflect)
invoke:62, NativeMethodAccessorImpl (jdk.internal.reflect)
invoke:43, DelegatingMethodAccessorImpl (jdk.internal.reflect)
invoke:566, Method (java.lang.reflect)
invoke:70, ClientConnectionRequestFactory$Handler (com.amazonaws.http.conn)
get:-1, $Proxy68 (com.amazonaws.http.conn)
execute:190, MainClientExec (org.apache.http.impl.execchain)
execute:186, ProtocolExec (org.apache.http.impl.execchain)
doExecute:185, InternalHttpClient (org.apache.http.impl.client)
execute:83, CloseableHttpClient (org.apache.http.impl.client)
execute:56, CloseableHttpClient (org.apache.http.impl.client)
execute:72, SdkHttpClient (com.amazonaws.http.apache.client.impl)
executeOneRequest:1331, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
executeHelper:1145, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
doExecute:802, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
executeWithTimer:770, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
execute:744, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
access$500:704, AmazonHttpClient$RequestExecutor (com.amazonaws.http)
execute:686, AmazonHttpClient$RequestExecutionBuilderImpl (com.amazonaws.http)
execute:550, AmazonHttpClient (com.amazonaws.http)
execute:530, AmazonHttpClient (com.amazonaws.http)
invoke:5259, AmazonS3Client (com.amazonaws.services.s3)
invoke:5206, AmazonS3Client (com.amazonaws.services.s3)
getObject:1512, AmazonS3Client (com.amazonaws.services.s3)
lambda$openStream$2:1102, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
call:-1, 1553865704 (com.facebook.presto.hive.s3.PrestoS3FileSystem$PrestoS3InputStream$$Lambda$897)
run:139, RetryDriver (com.facebook.presto.hive)
openStream:1099, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
openStream:1084, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
seekStream:1077, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
lambda$read$1:1021, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
call:-1, 432126422 (com.facebook.presto.hive.s3.PrestoS3FileSystem$PrestoS3InputStream$$Lambda$895)
run:139, RetryDriver (com.facebook.presto.hive)
read:1020, PrestoS3FileSystem$PrestoS3InputStream (com.facebook.presto.hive.s3)
fill:252, BufferedInputStream (<http://java.io|java.io>)
read:271, BufferedInputStream (<http://java.io|java.io>)
read:83, FilterInputStream (<http://java.io|java.io>)
read:86, HadoopDataInputStream (org.apache.flink.fs.s3presto.common)
read:50, FSDataInputStreamWrapper (org.apache.flink.core.fs)
readInt:392, DataInputStream (<http://java.io|java.io>)
read:46, VersionedIOReadableWritable (<http://org.apache.flink.core.io|org.apache.flink.core.io>)
read:139, KeyedBackendSerializationProxy (org.apache.flink.runtime.state)
restore:134, HeapRestoreOperation (org.apache.flink.runtime.state.heap)
restore:62, HeapRestoreOperation (org.apache.flink.runtime.state.heap)
restoreState:169, HeapKeyedStateBackendBuilder (org.apache.flink.runtime.state.heap)
build:106, HeapKeyedStateBackendBuilder (org.apache.flink.runtime.state.heap)
createKeyedStateBackend:143, HashMapStateBackend (org.apache.flink.runtime.state.hashmap)
createKeyedStateBackend:74, HashMapStateBackend (org.apache.flink.runtime.state.hashmap)
createKeyedStateBackend:149, StateBackend (org.apache.flink.runtime.state)
lambda$keyedStatedBackend$1:336, StreamTaskStateInitializerImpl (org.apache.flink.streaming.api.operators)
apply:-1, 1862394055 (org.apache.flink.streaming.api.operators.StreamTaskStateInitializerImpl$$Lambda$1503)
attemptCreateAndRestore:168, BackendRestorerProcedure (org.apache.flink.streaming.api.operators)
createAndRestore:135, BackendRestorerProcedure (org.apache.flink.streaming.api.operators)
keyedStatedBackend:353, StreamTaskStateInitializerImpl (org.apache.flink.streaming.api.operators)
streamOperatorStateContext:165, StreamTaskStateInitializerImpl (org.apache.flink.streaming.api.operators)
initializeState:265, AbstractStreamOperator (org.apache.flink.streaming.api.operators)
initializeStateAndOpenOperators:106, RegularOperatorChain (org.apache.flink.streaming.runtime.tasks)
restoreGates:726, StreamTask (org.apache.flink.streaming.runtime.tasks)
call:-1, 777353063 (org.apache.flink.streaming.runtime.tasks.StreamTask$$Lambda$1462)
call:55, StreamTaskActionExecutor$1 (org.apache.flink.streaming.runtime.tasks)
restoreInternal:702, StreamTask (org.apache.flink.streaming.runtime.tasks)
restore:669, StreamTask (org.apache.flink.streaming.runtime.tasks)
run:-1, 936722563 (org.apache.flink.runtime.taskmanager.Task$$Lambda$1020)
runWithSystemExitMonitoring:935, Task (org.apache.flink.runtime.taskmanager)
restoreAndInvoke:904, Task (org.apache.flink.runtime.taskmanager)
doRun:728, Task (org.apache.flink.runtime.taskmanager)
run:550, Task (org.apache.flink.runtime.taskmanager)
run:829, Thread (java.lang)
Martijn Visser
05/25/2023, 8:20 PMOleksandr Nitavskyi
05/26/2023, 7:49 AMmonitoringInterval
config, I believe it could be valuable to make such value by default: https://github.com/apache/flink/pull/22664