Hi I have a question about making a new entity Bro...
# getting-started
m
Hi I have a question about making a new entity Browserable.
I created a new entity
Copy code
// metadata-models/src/main/pegasus/com/linkedin/schema/ThriftEnumKey.pdl
namespace com.linkedin.schema

/**
 * Key to retrieve Thrift Enum Type.
 */
@Aspect = {
  "name": "thriftEnumKey"
}
record ThriftEnumKey {

  /**
   * Thrift Enum Type name e.g. PageViewEvent, identity.Profile, ams.account_management_tracking
   */
  @Searchable = {
    "fieldName": "id"
    "fieldType": "TEXT_PARTIAL",
    "enableAutocomplete": true,
    "boostScore": 10.0
  }  
  name: string

}
and
Copy code
// datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/thrift/ThriftEnumType.java
package com.linkedin.datahub.graphql.types.thrift;

import static com.linkedin.metadata.Constants.*;

import com.linkedin.common.urn.Urn;
import com.linkedin.common.urn.UrnUtils;
import com.linkedin.data.template.StringArray;
import com.linkedin.datahub.graphql.QueryContext;
import com.linkedin.datahub.graphql.generated.AutoCompleteResults;
import com.linkedin.datahub.graphql.generated.BrowsePath;
import com.linkedin.datahub.graphql.generated.BrowseResults;
import com.linkedin.datahub.graphql.generated.EntityType;
import com.linkedin.datahub.graphql.generated.FacetFilterInput;
import com.linkedin.datahub.graphql.generated.SearchResults;
import com.linkedin.datahub.graphql.generated.ThriftEnum;
import com.linkedin.datahub.graphql.resolvers.ResolverUtils;
import com.linkedin.datahub.graphql.types.BrowsableEntityType;
import com.linkedin.datahub.graphql.types.SearchableEntityType;
import com.linkedin.datahub.graphql.types.mappers.AutoCompleteResultsMapper;
import com.linkedin.datahub.graphql.types.mappers.BrowsePathsMapper;
import com.linkedin.datahub.graphql.types.mappers.BrowseResultMapper;
import com.linkedin.datahub.graphql.types.mappers.UrnSearchResultsMapper;
import com.linkedin.datahub.graphql.types.pinterest.ThriftEnumUtils;
import com.linkedin.datahub.graphql.types.thrift.mapper.ThriftEnumMapper;
import com.linkedin.entity.EntityResponse;
import com.linkedin.entity.client.EntityClient;
import com.linkedin.metadata.Constants;
import com.linkedin.metadata.browse.BrowseResult;
import com.linkedin.metadata.query.AutoCompleteResult;
import com.linkedin.metadata.search.SearchResult;

import graphql.com.google.common.collect.ImmutableSet;
import graphql.execution.DataFetcherResult;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import static com.linkedin.datahub.graphql.Constants.*;

public class ThriftEnumType implements SearchableEntityType<ThriftEnum>, BrowsableEntityType<ThriftEnum> {

  private static final Set<String> ASPECTS_TO_RESOLVE = ImmutableSet.of(
    THRIFT_ENUM_KEY_ASPECT_NAME,
    THRIFT_ENUM_PROPERTIES_ASPECT_NAME
  );

  private static final Set<String> FACET_FIELDS = ImmutableSet.of();

  private static final String ENTITY_NAME = "thriftEnum";

  private final EntityClient _entityClient;

  public ThriftEnumType(final EntityClient entityClient) {
    _entityClient = entityClient;
  }

  @Override
  public EntityType type() {
    return EntityType.THRIFT_ENUM;
  }

  @Override
  public Class<ThriftEnum> objectClass() {
    return ThriftEnum.class;
  }

  @Override
  public List<DataFetcherResult<ThriftEnum>> batchLoad(
    List<String> urnStrs,
    QueryContext context
  ) {
    final List<Urn> urns = urnStrs
      .stream()
      .map(UrnUtils::getUrn)
      .collect(Collectors.toList());
    try {
      final Map<Urn, EntityResponse> pinterestThriftEnumItemMap = _entityClient.batchGetV2(
        Constants.THRIFT_ENUM_ENTITY_NAME,
        new HashSet<>(urns),
        ASPECTS_TO_RESOLVE,
        context.getAuthentication()
      );

      final List<EntityResponse> gmsResults = new ArrayList<>();
      for (Urn urn : urns) {
        gmsResults.add(pinterestThriftEnumItemMap.getOrDefault(urn, null));
      }
      return gmsResults
        .stream()
        .map(
          gmsThriftEnum ->
            gmsThriftEnum == null
              ? null
              : DataFetcherResult
                .<ThriftEnum>newResult()
                .data(ThriftEnumMapper.map(gmsThriftEnum))
                .build()
        )
        .collect(Collectors.toList());
    } catch (Exception e) {
      throw new RuntimeException(
        "Failed to batch load PinterestThriftEnumItems",
        e
      );
    }
  }

  @Override
  public SearchResults search(
    @Nonnull String query,
    @Nullable List<FacetFilterInput> filters,
    int start,
    int count,
    @Nonnull final QueryContext context
  )
    throws Exception {
    final Map<String, String> facetFilters = ResolverUtils.buildFacetFilters(
      filters,
      FACET_FIELDS
    );
    final SearchResult searchResult = _entityClient.search(
      ENTITY_NAME,
      query,
      facetFilters,
      start,
      count,
      context.getAuthentication()
    );
    return UrnSearchResultsMapper.map(searchResult);
  }

  @Override
  public AutoCompleteResults autoComplete(
    @Nonnull String query,
    @Nullable String field,
    @Nullable List<FacetFilterInput> filters,
    int limit,
    @Nonnull final QueryContext context
  )
    throws Exception {
    final Map<String, String> facetFilters = ResolverUtils.buildFacetFilters(
      filters,
      FACET_FIELDS
    );
    final AutoCompleteResult result = _entityClient.autoComplete(
      ENTITY_NAME,
      query,
      facetFilters,
      limit,
      context.getAuthentication()
    );
    return AutoCompleteResultsMapper.map(result);
  }

  @Override
  public BrowseResults browse(@Nonnull List<String> path,
                              @Nullable List<FacetFilterInput> filters,
                              int start,
                              int count,
                              @Nonnull final QueryContext context) throws Exception {
      final Map<String, String> facetFilters = ResolverUtils.buildFacetFilters(filters, FACET_FIELDS);
      final String pathStr = path.size() > 0 ? BROWSE_PATH_DELIMITER + String.join(BROWSE_PATH_DELIMITER, path) : "";
      final BrowseResult result = _entityClient.browse(
              "thriftEnum",
              pathStr,
              facetFilters,
              start,
              count,
          context.getAuthentication());
      return BrowseResultMapper.map(result);
  }

  @Override
  public List<BrowsePath> browsePaths(@Nonnull String urn, @Nonnull final QueryContext context) throws Exception {
      final StringArray result = _entityClient.getBrowsePaths(ThriftEnumUtils.getThriftEnumUrn(urn), context.getAuthentication());
      return BrowsePathsMapper.map(result);
  }
}
In entity-registry.yml
Copy code
- name: thriftEnum
    keyAspect: thriftEnumKey
    aspects:
      - thriftEnumProperties
      - browsePaths
After ingestion, I checked mysql
Copy code
mysql> select * from metadata_aspect_v2 where urn = 'urn:li:thriftEnum:com.company.TweetType' and aspect='browsePaths' \G
*************************** 1. row ***************************
           urn: urn:li:thriftEnum:com.company.TweetType
        aspect: browsePaths
       version: 0
      metadata: {"paths":["/prod/thrift/com/company/tweettype"]}
systemmetadata: {"registryVersion":"0.0.0.0-dev","runId":"thrift-2022_06_03-20_12_07","registryName":"unknownRegistry","lastObserved":1654287127450}
     createdon: 2022-06-03 20:12:07.477000
     createdby: urn:li:corpuser:UNKNOWN
    createdfor: NULL
In GraphQL
Copy code
query C {
  thriftEnum(urn: "urn:li:thriftEnum:com.company.TweetType"){
    urn
    type
    items {
      key
    }
  }
}
See
Copy code
{
  "data": {
    "thriftEnum": {
      "urn": "urn:li:thriftEnum:com.company.TweetType",
      "type": "THRIFT_ENUM",
      "items": [
        {
          "key": "TWEET"
        },
        {
          "key": "RETWEET"
        },
        {
          "key": "DM"
        },
        {
          "key": "REPLY"
        }
      ]
    }
  }
}
if I query browse
Copy code
query B {
  browse(input: {type: THRIFT_ENUM, path: ["prod/thrift/com"], count: 10, start:0, filters: null}){
    entities {
      urn
      type      
    }
    groups {
      name
    }
    count
    total
  }
}
See
Copy code
{
  "data": {
    "browse": {
      "entities": [],
      "groups": [
        {
          "name": "company"
        }
      ],
      "count": 10,
      "total": 1
    }
  }
}
So far, it is all expected. But if I replace path to
"prod/thrift/com/company"
I see error in gms
Copy code
datahub-gms               | 20:26:03.920 [ForkJoinPool.commonPool-worker-36] ERROR c.l.d.g.e.DataHubDataFetcherExceptionHandler:21 - Failed to execute DataFetcher
datahub-gms               | java.lang.NullPointerException: null
datahub-gms               |     at com.linkedin.datahub.graphql.resolvers.load.EntityTypeBatchResolver.lambda$get$0(EntityTypeBatchResolver.java:45)
datahub-gms               |     at java.util.stream.ReferencePipeline$2$1.accept(ReferencePipeline.java:174)
datahub-gms               |     at java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1384)
datahub-gms               |     at java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:482)
datahub-gms               |     at java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:472)
datahub-gms               |     at java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:708)
datahub-gms               |     at java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
datahub-gms               |     at java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:566)
datahub-gms               |     at com.linkedin.datahub.graphql.resolvers.load.EntityTypeBatchResolver.get(EntityTypeBatchResolver.java:46)
datahub-gms               |     at com.linkedin.datahub.graphql.resolvers.load.EntityTypeBatchResolver.get(EntityTypeBatchResolver.java:23)
datahub-gms               |     at com.linkedin.datahub.graphql.resolvers.AuthenticatedResolver.get(AuthenticatedResolver.java:25)
datahub-gms               |     at graphql.execution.ExecutionStrategy.fetchField(ExecutionStrategy.java:270)
datahub-gms               |     at graphql.execution.ExecutionStrategy.resolveFieldWithInfo(ExecutionStrategy.java:203)
datahub-gms               |     at graphql.execution.AsyncExecutionStrategy.execute(AsyncExecutionStrategy.java:60)
datahub-gms               |     at graphql.execution.ExecutionStrategy.completeValueForObject(ExecutionStrategy.java:646)
datahub-gms               |     at graphql.execution.ExecutionStrategy.completeValue(ExecutionStrategy.java:438)
datahub-gms               |     at graphql.execution.ExecutionStrategy.completeField(ExecutionStrategy.java:390)
datahub-gms               |     at graphql.execution.ExecutionStrategy.lambda$resolveFieldWithInfo$1(ExecutionStrategy.java:205)
datahub-gms               |     at java.util.concurrent.CompletableFuture.uniApply(CompletableFuture.java:616)
datahub-gms               |     at java.util.concurrent.CompletableFuture$UniApply.tryFire(CompletableFuture.java:591)
datahub-gms               |     at java.util.concurrent.CompletableFuture.postComplete(CompletableFuture.java:488)
datahub-gms               |     at java.util.concurrent.CompletableFuture$AsyncSupply.run(CompletableFuture.java:1609)
datahub-gms               |     at java.util.concurrent.CompletableFuture$AsyncSupply.exec(CompletableFuture.java:1596)
datahub-gms               |     at java.util.concurrent.ForkJoinTask.doExec(ForkJoinTask.java:289)
datahub-gms               |     at java.util.concurrent.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1056)
datahub-gms               |     at java.util.concurrent.ForkJoinPool.runWorker(ForkJoinPool.java:1692)
datahub-gms               |     at java.util.concurrent.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:175)
datahub-gms               | 20:26:03.921 [ForkJoinPool.commonPool-worker-43] ERROR c.datahub.graphql.GraphQLController:93 - Errors while executing graphQL query: "query B {\n  browse(input: {type: THRIFT_ENUM, path: [\"prod/thrift/com/company\"], count: 10, start:0, filters: null}){\n    entities {\n      urn\n      type      \n    }\n    groups {\n      name\n    }\n    count\n    total\n  }\n}", result: {errors=[{message=An unknown error occurred., locations=[{line=3, column=5}], path=[browse, entities], extensions={code=500, type=SERVER_ERROR, classification=DataFetchingException}}], data={browse=null}}, errors: [DataHubGraphQLError{path=[browse, entities], code=SERVER_ERROR, locations=[SourceLocation{line=3, column=5}]}]
Do you know what the reason is? cc @helpful-optician-78938 @early-lamp-41924
cc @green-football-43791
e
It’s a bit hard to follow so much code on slack
Do you mind creating a git pr that we could refer to?
Also a quick thing
all paths should start with /
m
Thanks @early-lamp-41924 If path is "/prod/thrift/com/company" it gave me empty result
Do you have 30 min to sync? If not I will create a branch
e
what happens
if you query
“/”
or “/prod”
and go down the dirs?
m
Copy code
/ => empty
/prod => empty
prod => thrift
prod/thrift => com
prod/thrift/com => company
prod/thrift/com/company => NPE
I double checked the aspect in mysql
Copy code
mysql> select * from metadata_aspect_v2 where urn = 'urn:li:thriftEnum:com.company.TweetType' and aspect='browsePaths' \G
*************************** 1. row ***************************
           urn: urn:li:thriftEnum:com.company.TweetType
        aspect: browsePaths
       version: 0
      metadata: {"paths":["/prod/thrift/com/company/tweettype"]}
systemmetadata: {"registryVersion":"0.0.0.0-dev","runId":"thrift-2022_06_03-20_12_07","registryName":"unknownRegistry","lastObserved":1654287127450}
     createdon: 2022-06-03 20:12:07.477000
     createdby: urn:li:corpuser:UNKNOWN
    createdfor: NULL
e
ah so
got confused bc I never used graphql browse before
seems like the path in the graphql request should be
[“prod”, “thrift”, “com”, “company”, “tweettype”]
checkout the browse function
take a look at how the paths field is used
assuming you are using graphql
m
Yes. I am using graphql
never used graphql browse before
Is there other way to browse?
e
oh I usually use browse for testing
but that’s just me
just try the above array in path
hmn tho just realized it may lead to same result
but let’s see
m
just try the above array in path
How to do it?
e
huh?
in the graphql request
m
I see
Yes. The same thing
Let me create a branch and repro the issue.
Hi @early-lamp-41924 This PR https://github.com/datahub-project/datahub/pull/5090 can repro the issue. Thanks
Hi @big-carpet-38439 This is the context of this PR I found browseable is not working for me. So I created this PR to help reproduce this issue.
@early-lamp-41924 Do you have any suggestions? Thanks
Thanks @big-carpet-38439 and @early-lamp-41924 It works after I add
Copy code
if (input.getEntityType().equals("thriftEnum")) {
      partialEntity = new ThriftEnum();
      ((ThriftEnum) partialEntity).setUrn(input.toString());
      ((ThriftEnum) partialEntity).setType(EntityType.THRIFT_ENUM);
    }
in
datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/types/common/mappers/UrnToEntityMapper.java
b
amazing