2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: TopK(k)，构造方法。add(word)，增加一个新单词。topk()，得到当前最常使用的k个单词。如果两个单词有相同的使用频率，按字典序排名。

福大大答案2021-05-30：

方法一：
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二：
节点结构体：有字符串和词频。
词频表：key是字符串，value是节点。
堆：节点数组。刚开始，我以为是大根堆。采用小根堆，如果比堆顶还小，是进不了小根堆的。
反向表：key是节点，value是在堆中的索引。
有代码。

代码用golang编写。代码如下：

package main

import (

    "fmt"

    "sort"

)

func main() {

    a := NewTopK(2)

    a.add("fdd")

    a.add("moon")

    a.add("moonfdd")

    a.add("moonfdd")

    ret := a.topk()

    for i := 0; i < len(ret); i++ {

        fmt.Println(ret[i])

    }

}

type TopK struct {

    //堆

    heap     []*Node

    heapSize int

    //字，次数

    wordNodeMap map[string]*Node

    //反向表

    nodeIndexMap map[*Node]int

}

func NewTopK(k int) *TopK {

    ret := &TopK{}

    ret.heap = make([]*Node, k)

    ret.wordNodeMap = make(map[string]*Node)

    ret.nodeIndexMap = make(map[*Node]int)

    return ret

}

func (this *TopK) add(word string) {

    if len(this.heap) == 0 {

        return

    }

    var curNode *Node

    preIndex := -1

    curNode = this.wordNodeMap[word]

    //词频表 反向表

    if curNode == nil {

        curNode = &Node{word, 1}

        this.wordNodeMap[word] = curNode

        this.nodeIndexMap[curNode] = -1

    } else {

        curNode.Times++

        preIndex = this.nodeIndexMap[curNode]

    }

    //小根堆

    if preIndex == -1 {

        if this.heapSize == len(this.heap) {

            if this.compare(curNode, this.heap[0]) {

                //不用管了

                return

            }

            curNode, this.heap[0] = this.heap[0], curNode

            this.nodeIndexMap[curNode] = -1

            this.nodeIndexMap[this.heap[0]] = 0

            this.HeapDown(0)

        } else {

            this.Push(curNode)

        }

    } else {

        this.HeapDown(preIndex)

    }

}

func (this *TopK) topk() []string {

    heapCopy := make([]*Node, this.heapSize)

    copy(heapCopy, this.heap)

    sort.Slice(heapCopy, func(i, j int) bool {

        return !this.compare(heapCopy[i], heapCopy[j])

    })

    ans := make([]string, this.heapSize)

    for i := 0; i < this.heapSize; i++ {

        ans[i] = heapCopy[i].Str

    }

    return ans

}

type Node struct {

    Str   string

    Times int

}

//索引上移，小根堆

func (this *TopK) HeapUp(index int) {

    for (index-1)/2 != index && !this.compare(this.heap[(index-1)/2], this.heap[index]) { //父节点小于当前节点，当前节点必须上移

        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]

        //加强堆

        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index

        index = (index - 1) / 2

    }

}

//索引下沉，小根堆

func (this *TopK) HeapDown(index int) {

    left := 2*index + 1

    for left <= this.heapSize-1 { //左孩子存在

        //获取小孩子

        largest := left

        if left+1 <= this.heapSize-1 && this.compare(this.heap[left+1], this.heap[left]) {

            largest++

        }

        //比较

        if !this.compare(this.heap[index], this.heap[largest]) { //当前大于最小孩子，必须下沉

            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]

            //加强堆

            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index

        } else {

            break

        }

        //下一次遍历

        index = largest

        left = 2*index + 1

    }

}

func (this *TopK) Push(node *Node) {

    this.heap[this.heapSize] = node

    //加强堆

    this.nodeIndexMap[node] = this.heapSize

    //索引上移

    this.HeapUp(this.heapSize)

    this.heapSize++

}

func (this *TopK) Pop() *Node {

    ans := this.heap[0]

    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]

    //加强堆

    this.nodeIndexMap[this.heap[0]] = 0

    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1

    this.heapSize--

    //索引下沉

    this.HeapDown(0)

    return ans

}

func (this *TopK) compare(node1 *Node, node2 *Node) bool {

    if node1.Times == node2.Times {

        return node1.Str > node2.Str

    }

    return node1.Times < node2.Times

}

执行结果如下：

福大大答案2021-05-29：

方法一：
redis的sorted set。hash+跳表实现计数和查找。无代码。
方法二：
节点结构体：有字符串和词频。
词频表：key是字符串，value是节点。
堆：节点数组。
反向表：key是节点，value是在堆中的索引。
有代码，但不完整，因为时间紧。

代码用golang编写。代码如下：

package main

import "fmt"

func main() {

    a := NewTopK(2)

    a.add("lint")

    a.add("code")

    a.add("code")

    fmt.Println(a.topk())

}

type TopK struct {

    //堆

    heap     []*Node

    heapSize int

    //字，次数

    wordNodeMap map[string]*Node

    //反向表

    nodeIndexMap map[*Node]int

}

func NewTopK(k int) *TopK {

    ret := &TopK{}

    ret.heap = make([]*Node, k)

    return ret

}

func (this *TopK) add(word string) {

    if len(this.heap) == 0 {

        return

    }

    var curNode *Node

    preIndex := -1

    curNode = this.wordNodeMap[word]

    if curNode == nil {

        curNode = &Node{word, 1}

        this.wordNodeMap[word] = curNode

        this.nodeIndexMap[curNode] = -1

    } else {

        //tree set

        curNode.Times++

        preIndex = this.nodeIndexMap[curNode]

    }

    if preIndex == -1 {

        if this.heapSize == len(this.heap) {

            //treeset

        } else {

            //tree add

            this.nodeIndexMap[curNode] = this.heapSize

            this.heap[this.heapSize] = curNode

            this.HeapUp(preIndex)

        }

    } else {

        //tree add

        this.HeapDown(preIndex)

    }

}

func (this *TopK) topk() []string {

    ans := make([]string, len(this.heap))

    return ans

}

type Node struct {

    Str   string

    Times int

}

//索引上移，大根堆

func (this *TopK) HeapUp(index int) {

    for this.heap[(index-1)/2].Times < this.heap[index].Times { //父节点小于当前节点，当前节点必须上移

        this.heap[index], this.heap[(index-1)/2] = this.heap[(index-1)/2], this.heap[index]

        //加强堆

        this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[(index-1)/2]] = (index-1)/2, index

        index = (index - 1) / 2

    }

}

//索引下沉，大根堆

func (this *TopK) HeapDown(index int) {

    left := 2*index + 1

    for left <= this.heapSize-1 { //左孩子存在

        //获取大孩子

        largest := left

        if left+1 <= this.heapSize-1 && this.heap[left+1].Times > this.heap[left].Times {

            largest++

        }

        //比较

        if this.heap[index].Times < this.heap[largest].Times { //当前小于最大孩子，必须下沉

            this.heap[index], this.heap[largest] = this.heap[largest], this.heap[index]

            //加强堆

            this.nodeIndexMap[this.heap[index]], this.nodeIndexMap[this.heap[largest]] = largest, index

        } else {

            break

        }

        //下一次遍历

        index = largest

        left = 2*index + 1

    }

}

func (this *TopK) Push(node *Node) {

    this.heap[this.heapSize] = node

    //加强堆

    this.nodeIndexMap[node] = this.heapSize

    this.heapSize++

    //索引上移

    this.HeapUp(this.heapSize)

}

func (this *TopK) Pop() *Node {

    ans := this.heap[0]

    this.heap[0], this.heap[this.heapSize-1] = this.heap[this.heapSize-1], this.heap[0]

    //加强堆

    this.nodeIndexMap[this.heap[0]] = 0

    this.nodeIndexMap[this.heap[this.heapSize-1]] = -1

    this.heapSize--

    //索引下沉

    this.HeapDown(0)

    return ans

}

执行结果如下：

左神java代码

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top的相关教程结束。

相关推荐

复刻smartbits的国产网络测试工具minismb-如何添加数据流

2022-01-23：力扣425，单词方块。给定一个单词集合（没有重复），找出其中所有的单词方块。一个单词序列形成了一个有效的单词方块的意思是指从第 k 行和第 k 列 (0 ≤ k ＜ m

2022-01-29：连接词。给你一个不含重复单词的字符串数组 words ，请你找出并返回 words 中的所有连接词。连接词定义为：一个完全由给定数组中的至少两个较短单词组成的字符串

2021-05-22：假设所有字符都是小写字母，大字符串是str，arr是去重的单词表，每个单词都不是空字符串且可以使用任意次。使用arr中的单词有多少种拼接str的方式。返回方法数。

详解java8中的Stream数据流

C++按单词换行的函数的代码

2.Storm集群部署及单词统计案例

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top

2021-05-29：最常使用的K个单词II。在实时数据流中找到最常使用的k个单词，实现TopK类中的三个方法: Top的相关教程结束。

相关推荐

复刻smartbits的国产网络测试工具minismb-如何添加数据流

2022-01-23：力扣425，单词方块。 给定一个单词集合 （没有重复），找出其中所有的 单词方块 。 一个单词序列形成了一个有效的单词方块的意思是指从第 k 行和第 k 列 (0 ≤ k ＜ m

2022-01-29：连接词。 给你一个 不含重复 单词的字符串数组 words ，请你找出并返回 words 中的所有 连接词 。 连接词 定义为：一个完全由给定数组中的至少两个较短单词组成的字符串

2021-05-22：假设所有字符都是小写字母， 大字符串是str，arr是去重的单词表， 每个单词都不是空字符串且可以使用任意次。使用arr中的单词有多少种拼接str的方式。 返回方法数。

详解java8中的Stream数据流

C++按单词换行的函数的代码

2.Storm集群部署及单词统计案例

2022-01-23：力扣425，单词方块。给定一个单词集合（没有重复），找出其中所有的单词方块。一个单词序列形成了一个有效的单词方块的意思是指从第 k 行和第 k 列 (0 ≤ k ＜ m

2022-01-29：连接词。给你一个不含重复单词的字符串数组 words ，请你找出并返回 words 中的所有连接词。连接词定义为：一个完全由给定数组中的至少两个较短单词组成的字符串

2021-05-22：假设所有字符都是小写字母，大字符串是str，arr是去重的单词表，每个单词都不是空字符串且可以使用任意次。使用arr中的单词有多少种拼接str的方式。返回方法数。