// TODO : write table schema & type
//  - samplesForSchema
// TODO : cache chunks and memoize schema, ....
// handle concurrent calls (ex to schema, ...)

// common table schema : size, chunkSize, chunkCacheSize, offset, samplesForSchema, schema, depth, flatten, transformDefaults, debug?, profile?
// common transform schema : debug, profile, 
const MAX_CACHED_CHUNKS = 3
function cleanupSample(s) {
    return s
    for (let prop in s)
        if (Array.isArray(s[prop]) || typeof s[prop] === "object")
            delete s[prop]
    return s
    // delete s.comCounters
}

function deepEqual(o1, o2) {
    return JSON.stringify(o1) === JSON.stringify(o2)
}

const DEFAULT_SAMPLES_COUNT_FOR_SCHEMA = 1000

function extendSchema(previousSchema, newSchema, hint?: string) {
    const previousType = previousSchema.type
    const newType = newSchema.type
    if (!previousType)
        return newSchema
    if (!newType)
        return previousSchema
    const extension = [previousType, newType].join("-")
    switch (extension) {
        case 'DOUBLE-':
        case '-DOUBLE':
        case 'INT64-DOUBLE':
        case 'DOUBLE-INT64':
            return { type: 'DOUBLE' }
        case 'INT64-':
        case '-INT64':
            return { type: 'DOUBLE' }
        default:
            throw `unable to extend schema for : ${extension}`
    }
}
function get(obj, path, dft: any = undefined) {
    let current = obj
    for (let part of path) {
        if (typeof current !== 'object')
            return dft
        current = current[part]
    }
    if (current === undefined)
        return dft
    return current
}

const SEPARATOR_CHARACTER = '.'
export function adaptSample(sample, schema) {
    let result = {}
    for (let key in schema) {
        const path = key.split(SEPARATOR_CHARACTER)
        let value = get(sample, path, null)
        if (schema[key].type === 'UTF8' && typeof value === 'number')
            value = new Date(value).toISOString()
        result[key] = value
        // if (schema[key].repeated)
        //     delete result[key]
    }
    return result
}

function normalizedSchemas(schema, key) {
    if (!schema)
        return []
    let schemas = schema
    if (!Array.isArray(schema)) {
        return [{ key, localSchema: schema }]
    }
    if (key === null)
        return schemas
    return schemas.map(s => ({ ...s, key: [key, s.key].join(SEPARATOR_CHARACTER) }))
}

function getSchemaFromValue(value) {
    switch (typeof value) {
        case 'number':
            if (false || Number.isInteger(value)) {
                // if (value >= 0)
                //     return { type: 'UINT64' }
                // else
                return { type: 'INT64' }
            }
            return { type: 'DOUBLE' }
        case 'string':
            return { type: 'UTF8' }
        case 'boolean':
            return { type: 'BOOLEAN' }
        case 'object':
            if (value === null)
                return
            else if (false && Array.isArray(value)) {
                return null
                return { repeated: true, ...getSchemaFromValue(value[0]) }
            }
            else {
                let schemas: any[] = []
                for (let innerKey in value) {
                    const innerSchemas = normalizedSchemas(getSchemaFromValue(value[innerKey]), innerKey)
                    schemas = [...schemas, ...innerSchemas]
                }
                return schemas//{type: 'JSON'}
            }
        default:

    }
}

export function getSchema(samples, config: any = {}, sliceSize = DEFAULT_SAMPLES_COUNT_FOR_SCHEMA) {
    if (sliceSize)
        samples = samples.slice(0, sliceSize)
    let schema: any = { ...(config.schema || {}) }
    for (let s of samples) {
        for (let key in s) {
            const value = s[key]
            if (schema[key]) {
                // console.log('already have a schema for', key, value, schema[key])
                continue
            }
            let localSchemas = getSchemaFromValue(value)
            if (localSchemas) {
                localSchemas = normalizedSchemas(localSchemas, key)
                for (let { key, value, localSchema } of localSchemas) {
                    localSchema.optional = true
                    const previousSchema = schema[key]
                    if (previousSchema && !deepEqual(localSchema, previousSchema)) {
                        if (config.strict) {
                            throw `ambigous type for strict schema (${key}) : ${JSON.stringify(localSchema)} vs ${JSON.stringify(previousSchema)}`
                        } else {
                            try {
                                localSchema = extendSchema(previousSchema, localSchema, key)
                                localSchema.optional = true
                            } catch (err) {
                                console.log(key, value, localSchema)
                                console.log(err.toString())
                                throw err

                            }

                        }
                    }
                    schema[key] = { ...localSchema }
                }
            }
        }
    }
    if (config.only) {
        let newSchema = {}
        for (let key of config.only) {
            if (schema[key])
                newSchema[key] = schema[key]
        }
        schema = newSchema
    }
    return schema
}

import BASE_TABLE_TYPES, { BaseTableType } from './base-tables'
import { DataChunk, DEFAULT_CHUNK_SIZE, getChunkSamples } from './chunk'
import { applyTransform } from './transforms'
import { flattenSample } from '../util'

export {
    BASE_TABLE_TYPES
}

// TODO : store stats per field 
export default class DataTable {
    spec: any
    dataset: any
    size: Promise<number>
    baseTableType: BaseTableType
    baseTableState: any
    transforms: { spec: any, state: any }[] = []
    transformStates: any[]
    chunks: { [key: string]: { chunk: Promise<DataChunk>, accessTime: number } } = {}
    schema?: any

    samples?: any[]
    constructor(spec, tables) {
        this.spec = spec
        this.dataset = tables

        let baseTableType: BaseTableType | null = null, typeName: any = null

        for (let key in BASE_TABLE_TYPES) {
            if (this.spec[key]) {
                if (baseTableType)
                    throw `ambiguous base type for ${typeName} and ${key}`
                typeName = key
                baseTableType = BASE_TABLE_TYPES[key](spec, tables)
            }
        }
        if (!baseTableType)
            throw `no base table found in ${JSON.stringify(this.spec)}`
        baseTableType.name = typeName
        this.baseTableType = baseTableType
        this.baseTableState = {}

        this.transforms = JSON.parse(JSON.stringify(spec.transforms || [])).map(spec => ({ spec, state: {} }))
    }

    async previewSamples(chunkSize = DEFAULT_CHUNK_SIZE) {
        const size = await this.getSize()
        if (3 * chunkSize > size)
            return { ratio: 1.0, samples: await this.getSamples() }
        const chunk1 = await this.getChunkAt(0, chunkSize)
        const chunk2 = await this.getChunkAt(size - chunkSize, chunkSize)
        const samples1 = chunk1.samples()
        const samples2 = chunk2.samples()
        const allSamples = [...samples1, ...samples2]

        const previewSize = 2 * chunkSize
        return { ratio: previewSize / size, samples: allSamples, size: previewSize }
    }

    async getStats(property, exhaustive = false) {
        let min = Number.MAX_VALUE, max = -Number.MAX_VALUE, mean = 0, count = 0, stddev = 0
        const totalSize = await this.getSize()
        const { size, ratio, samples } = exhaustive ? await this.getSamples().then((samples) => ({ samples, ratio: 1, size: totalSize })) : await this.previewSamples()
        const path = property.split(SEPARATOR_CHARACTER)
        const values = samples.map(s => s[property] || get(s, path))
        for (let value of values) {
            if (Number.isFinite(value)) {
                count++
                min = Math.min(min, value)
                max = Math.max(max, value)
                mean += value
            }
        }
        mean /= count
        for (let value of values) {
            if (Number.isFinite(value)) {
                const dx = (value - mean)
                stddev += dx * dx
            }
        }
        stddev = Math.sqrt(stddev / count)
        if (!count)
            return {}
        const histogram = []
        return {
            min,
            max,
            count,
            mean,
            stddev,
            size,
            ratio,
            histogram
        }
    }
    async getSamples() {
        if (this.samples)
            return this.samples
        const size = await this.getSize()
        const chunkSize = 1024 * 1024 * 250
        if (size < chunkSize) {
            const chunk = await this.getChunkAt(0, -1)
            this.samples = chunk.samples()
        } else {
            let allSamples: any[] = []
            let chunks: any = [], currentOffset = 0, maxChunks = 5
            while (currentOffset < size && chunks.length < maxChunks) {
                const currentSize = Math.min(size - currentOffset, chunkSize)
                let currentChunk: DataChunk = await this.getChunkAt(currentOffset, currentSize)
                chunks.push(currentChunk)
                allSamples = [...allSamples, ...getChunkSamples(currentChunk)]
                currentOffset = currentOffset + currentChunk.size
            }
            this.samples = allSamples
        }
        return this.samples
    }
    async getSize() {
        if (!this.size) {
            let size = await this.baseTableType.getSize.bind(this.baseTableState)()
            if (this.spec.size)
                size = Math.min(size, this.spec.size)
            this.size = size
        }
        return this.size
    }
    async getChunkAt(offset, size, final = false): Promise<DataChunk> {
        const now = Date.now()
        const key = [offset, size, final].map(n => Number(n || 0)).join('+')
        if (!this.chunks[key]) {
            // console.log('building chunk', key)
            const promise = this.buildChunkAt(offset, size, final)
            this.chunks[key] = { chunk: promise, accessTime: 0 }
        }
        this.chunks[key].accessTime = now
        let cacheKeys = Object.keys(this.chunks)
        if (cacheKeys.length > MAX_CACHED_CHUNKS) {
            console.log('too many cached chunks for ', this.spec.name)
            cacheKeys.sort((k1, k2) => this.chunks[k1].accessTime - this.chunks[k2].accessTime)
            for (let i = 0; i < cacheKeys.length - MAX_CACHED_CHUNKS; i++)
                delete this.chunks[cacheKeys[i]]
        }
        return this.chunks[key].chunk

    }
    async buildChunkAt(offset, size, final): Promise<DataChunk> {
        const tableSize = await this.getSize()
        if (size <= 0) {
            size = tableSize - offset
        }
        size = Math.min(size, tableSize - offset)
        if (size <= 0)
            return { samples: () => [], series: null, offset, size: 0, nrows: 0 }

        const MAX_CHUNK_SIZE = 1024 * 1024 * 500
        let baseChunk
        if (size < MAX_CHUNK_SIZE) {
            // console.log('getBaseChunk', this.spec.name, Math.round(size / (1024 * 1024)) + 'MB')
            baseChunk = await this.getBaseChunk(offset, size)
        } else {
            throw `unable to build chunk of size ${size} (max ${MAX_CHUNK_SIZE})`
        }
        const { transforms } = this
        if (!(transforms || []).length) {
            return { ...baseChunk, tEnd: Date.now() }

        }
        let samples = getChunkSamples(baseChunk)
        for (let { spec, state } of transforms || []) {
            samples = await applyTransform.bind(state)(samples, spec, this.dataset)
            if (spec.debug)
                console.log('after', spec, samples)
            if (spec.final) {
                if (final || (size + offset >= tableSize)) {
                    // console.log('FINAL ONLY !')
                    // console.log(samples)
                } else {
                    // console.log('NOT FINAL')
                    samples = []
                }
            }

        }
        const { depth, flatten } = this.spec
        if (depth) {
            throw `depth not implemented`
        }
        if (flatten) {
            samples = samples.map(s => {
                const r = flattenSample(s, {})
                return r
            })
            // throw  `flatten not implemented`
            // TODO : make the samples falt
        }


        samples = samples.map(cleanupSample)
        if (!samples.length)
            return { ...baseChunk, samples: () => [], series: null, nrows: 0 }
        if (this.spec.schema) {
            const schema = getSchema(samples, {}, 100)
            samples = samples.map(s => adaptSample(s, schema))
        }
        return { ...baseChunk, samples: () => samples, nrows: samples.length, series: null, tEnd: Date.now() }
        // return Arrow.tableFromJSON(samples)
    }

    async getBaseChunk(offset, size) {
        if (this.spec.offset)
            offset += this.spec.offset
        const t = Date.now()
        const baseChunk = await this.baseTableType.getChunkAt.bind(this.baseTableState)(offset, size)
        return {
            ...baseChunk,
            t,
            tParsed: Date.now()
        }
    }
    async getSchema() {
        if (!this.schema) {
            if (this.baseTableType.getSchema && !(this.transforms || []).length) {
                const schema = this.baseTableType.getSchema.bind(this.baseTableState)()
                this.schema = await schema
            }

            if (!this.schema) {
                let samples = [], chunkSize = DEFAULT_CHUNK_SIZE
                const size = await this.getSize()
                do {
                    const chunk = await this.getChunkAt(0, chunkSize, true)
                    samples = getChunkSamples(chunk)
                    chunkSize *= 2
                } while (!samples.length && chunkSize < size)
                this.schema = getSchema(samples, this.spec.samplesForSchema)
            }

        }


        return this.schema
    }
}