/* * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the "Elastic License * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side * Public License v 1"; you may not use this file except in compliance with, at * your election, the "Elastic License 2.0", the "GNU Affero General Public * License v3.0 only", or the "Server Side Public License, v 1". */ package org.elasticsearch.index.mapper; import org.apache.lucene.document.StoredField; import org.apache.lucene.index.LeafReader; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.ByteUtils; import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Tuple; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.stream.Stream; /** * Mapper for the {@code _ignored_source} field. * * A field mapper that records fields that have been ignored or otherwise need storing their source, along with their values. * It's intended for use in indexes with synthetic source to reconstruct the latter, taking into account fields that got ignored or * transformed during indexing. Entries get stored in lexicographical order by field name. * * This overlaps with {@link IgnoredFieldMapper} that tracks just the ignored field names. It's worth evaluating * if we can replace it for all use cases to avoid duplication, assuming that the storage tradeoff is favorable. */ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { private final IndexSettings indexSettings; // This factor is used to combine two offsets within the same integer: // - the offset of the end of the parent field within the field name (N / PARENT_OFFSET_IN_NAME_OFFSET) // - the offset of the field value within the encoding string containing the offset (first 4 bytes), the field name and value // (N % PARENT_OFFSET_IN_NAME_OFFSET) private static final int PARENT_OFFSET_IN_NAME_OFFSET = 1 << 16; public static final String NAME = "_ignored_source"; public static final TypeParser PARSER = new FixedTypeParser(context -> new IgnoredSourceFieldMapper(context.getIndexSettings())); static final NodeFeature DONT_EXPAND_DOTS_IN_IGNORED_SOURCE = new NodeFeature("mapper.ignored_source.dont_expand_dots"); static final NodeFeature IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD = new NodeFeature( "mapper.ignored_source_as_top_level_metadata_array_field" ); static final NodeFeature ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS = new NodeFeature( "mapper.ignored_source.always_store_object_arrays_in_nested" ); public static final FeatureFlag COALESCE_IGNORED_SOURCE_ENTRIES = new FeatureFlag("ignored_source_fields_per_entry"); /* Setting to disable encoding and writing values for this field. This is needed to unblock index functionality in case there is a bug on this code path. */ public static final Setting SKIP_IGNORED_SOURCE_WRITE_SETTING = Setting.boolSetting( "index.mapping.synthetic_source.skip_ignored_source_write", false, Setting.Property.Dynamic, Setting.Property.IndexScope ); /* Setting to disable reading and decoding values stored in this field. This is needed to unblock search functionality in case there is a bug on this code path. */ public static final Setting SKIP_IGNORED_SOURCE_READ_SETTING = Setting.boolSetting( "index.mapping.synthetic_source.skip_ignored_source_read", false, Setting.Property.Dynamic, Setting.Property.IndexScope ); /* * Container for the ignored field data: * - the full name * - the offset in the full name indicating the end of the substring matching * the full name of the parent field * - the value, encoded as a byte array */ public record NameValue(String name, int parentOffset, BytesRef value, LuceneDocument doc) { /** * Factory method, for use with fields under the parent object. It doesn't apply to objects at root level. * @param context the parser context, containing a non-null parent * @param name the fully-qualified field name, including the path from root * @param value the value to store */ public static NameValue fromContext(DocumentParserContext context, String name, BytesRef value) { int parentOffset = context.parent() instanceof RootObjectMapper ? 0 : context.parent().fullPath().length() + 1; return new NameValue(name, parentOffset, value, context.doc()); } String getParentFieldName() { // _doc corresponds to the root object return (parentOffset == 0) ? MapperService.SINGLE_MAPPING_NAME : name.substring(0, parentOffset - 1); } String getFieldName() { return parentOffset() == 0 ? name() : name().substring(parentOffset()); } NameValue cloneWithValue(BytesRef value) { assert value() == null; return new NameValue(name, parentOffset, value, doc); } boolean hasValue() { return XContentDataHelper.isDataPresent(value); } } static final class IgnoredValuesFieldMapperType extends StringFieldType { private static final IgnoredValuesFieldMapperType INSTANCE = new IgnoredValuesFieldMapperType(); private IgnoredValuesFieldMapperType() { super(NAME, IndexType.NONE, true, TextSearchInfo.NONE, Collections.emptyMap()); } @Override public String typeName() { return NAME; } @Override public ValueFetcher valueFetcher(SearchExecutionContext context, String format) { return new StoredValueFetcher(context.lookup(), NAME); } } private IgnoredSourceFieldMapper(IndexSettings indexSettings) { super(IgnoredValuesFieldMapperType.INSTANCE); this.indexSettings = indexSettings; } @Override protected String contentType() { return NAME; } @Override public void postParse(DocumentParserContext context) { // Ignored values are only expected in synthetic mode. if (context.mappingLookup().isSourceSynthetic() == false) { assert context.getIgnoredFieldValues().isEmpty(); return; } ignoredSourceFormat(context.indexSettings().getIndexVersionCreated()).writeIgnoredFields(context.getIgnoredFieldValues()); } // In rare cases decoding values stored in this field can fail leading to entire source // not being available. // We would like to have an option to lose some values in synthetic source // but have search not fail. public static Set ensureLoaded(Set fieldsToLoadForSyntheticSource, IndexSettings indexSettings) { if (indexSettings.getSkipIgnoredSourceRead() == false) { fieldsToLoadForSyntheticSource.add(NAME); } return fieldsToLoadForSyntheticSource; } public static class LegacyIgnoredSourceEncoding { public static BytesRef encode(NameValue values) { assert values.parentOffset < PARENT_OFFSET_IN_NAME_OFFSET; assert values.parentOffset * (long) PARENT_OFFSET_IN_NAME_OFFSET < Integer.MAX_VALUE; byte[] nameBytes = values.name.getBytes(StandardCharsets.UTF_8); byte[] bytes = new byte[4 + nameBytes.length + values.value.length]; ByteUtils.writeIntLE(values.name.length() + PARENT_OFFSET_IN_NAME_OFFSET * values.parentOffset, bytes, 0); System.arraycopy(nameBytes, 0, bytes, 4, nameBytes.length); System.arraycopy(values.value.bytes, values.value.offset, bytes, 4 + nameBytes.length, values.value.length); return new BytesRef(bytes); } public static NameValue decode(Object field) { byte[] bytes = ((BytesRef) field).bytes; int encodedSize = ByteUtils.readIntLE(bytes, 0); int nameSize = encodedSize % PARENT_OFFSET_IN_NAME_OFFSET; int parentOffset = encodedSize / PARENT_OFFSET_IN_NAME_OFFSET; String decoded = new String(bytes, 4, bytes.length - 4, StandardCharsets.UTF_8); String name = decoded.substring(0, nameSize); int nameByteCount = name.getBytes(StandardCharsets.UTF_8).length; BytesRef value = new BytesRef(bytes, 4 + nameByteCount, bytes.length - nameByteCount - 4); return new NameValue(name, parentOffset, value, null); } public static BytesRef encodeFromMap(MappedNameValue mappedNameValue) throws IOException { return encode(mappedToNameValue(mappedNameValue)); } public static MappedNameValue decodeAsMap(BytesRef value) throws IOException { return nameValueToMapped(decode(value)); } } public static class CoalescedIgnoredSourceEncoding { public static BytesRef encode(List values) { assert values.isEmpty() == false; try { BytesStreamOutput stream = new BytesStreamOutput(); stream.writeVInt(values.size()); String fieldName = values.getFirst().name; stream.writeString(fieldName); for (var value : values) { assert fieldName.equals(value.name); stream.writeVInt(value.parentOffset); stream.writeBytesRef(value.value); } return stream.bytes().toBytesRef(); } catch (IOException e) { throw new ElasticsearchException("Failed to encode _ignored_source", e); } } public static List decode(BytesRef value) { try { StreamInput stream = new BytesArray(value).streamInput(); var count = stream.readVInt(); assert count >= 1; String fieldName = stream.readString(); List values = new ArrayList<>(count); for (int i = 0; i < count; i++) { int parentOffset = stream.readVInt(); BytesRef valueBytes = stream.readBytesRef(); values.add(new NameValue(fieldName, parentOffset, valueBytes, null)); } return values; } catch (IOException e) { throw new ElasticsearchException("Failed to decode _ignored_source", e); } } public static BytesRef encodeFromMap(List filteredValues) throws IOException { List filteredNameValues = new ArrayList<>(filteredValues.size()); for (var filteredValue : filteredValues) { filteredNameValues.add(mappedToNameValue(filteredValue)); } return encode(filteredNameValues); } public static List decodeAsMap(BytesRef value) throws IOException { List nameValues = decode(value); List mappedValues = new ArrayList<>(nameValues.size()); for (var nameValue : nameValues) { mappedValues.add(nameValueToMapped(nameValue)); } return mappedValues; } } public enum IgnoredSourceFormat { NO_IGNORED_SOURCE { @Override public Map> loadAllIgnoredFields(SourceFilter filter, Map> storedFields) { return Map.of(); } @Override public Map> loadSingleIgnoredField(Set fieldPaths, Map> storedFields) { return Map.of(); } @Override public void writeIgnoredFields(Collection ignoredFieldValues) { assert false : "cannot write " + ignoredFieldValues.size() + " values with format NO_IGNORED_SOURCE"; } @Override public BytesRef filterValue(BytesRef value, Function, Map> filter) { assert false : "cannot filter ignored source with format NO_IGNORED_SOURCE"; return null; } }, LEGACY_SINGLE_IGNORED_SOURCE { @Override public Map> loadAllIgnoredFields(SourceFilter filter, Map> storedFields) { Map> objectsWithIgnoredFields = null; List