2019-02-19 22:39:39 +08:00
|
|
|
// Copyright 2018 The Gitea Authors. All rights reserved.
|
2022-11-28 02:20:29 +08:00
|
|
|
// SPDX-License-Identifier: MIT
|
2019-02-19 22:39:39 +08:00
|
|
|
|
|
|
|
package issues
|
|
|
|
|
2019-02-21 08:54:05 +08:00
|
|
|
import (
|
2020-01-07 19:23:09 +08:00
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"os"
|
2022-04-01 01:01:43 +08:00
|
|
|
"runtime/pprof"
|
2019-10-15 21:39:51 +08:00
|
|
|
"sync"
|
|
|
|
"time"
|
2019-02-21 08:54:05 +08:00
|
|
|
|
2021-09-24 19:32:56 +08:00
|
|
|
"code.gitea.io/gitea/models/db"
|
2022-06-13 17:37:59 +08:00
|
|
|
issues_model "code.gitea.io/gitea/models/issues"
|
2021-12-10 09:27:50 +08:00
|
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
2019-10-15 21:39:51 +08:00
|
|
|
"code.gitea.io/gitea/modules/graceful"
|
2019-02-21 08:54:05 +08:00
|
|
|
"code.gitea.io/gitea/modules/log"
|
2022-04-01 01:01:43 +08:00
|
|
|
"code.gitea.io/gitea/modules/process"
|
2020-01-07 19:23:09 +08:00
|
|
|
"code.gitea.io/gitea/modules/queue"
|
2019-02-21 08:54:05 +08:00
|
|
|
"code.gitea.io/gitea/modules/setting"
|
|
|
|
"code.gitea.io/gitea/modules/util"
|
|
|
|
)
|
|
|
|
|
2019-02-19 22:39:39 +08:00
|
|
|
// IndexerData data stored in the issue indexer
|
|
|
|
type IndexerData struct {
|
2020-02-13 14:06:17 +08:00
|
|
|
ID int64 `json:"id"`
|
|
|
|
RepoID int64 `json:"repo_id"`
|
|
|
|
Title string `json:"title"`
|
|
|
|
Content string `json:"content"`
|
|
|
|
Comments []string `json:"comments"`
|
|
|
|
IsDelete bool `json:"is_delete"`
|
|
|
|
IDs []int64 `json:"ids"`
|
2019-02-19 22:39:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Match represents on search result
|
|
|
|
type Match struct {
|
Allow cross-repository dependencies on issues (#7901)
* in progress changes for #7405, added ability to add cross-repo dependencies
* removed unused repolink var
* fixed query that was breaking ci tests; fixed check in issue dependency add so that the id of the issue and dependency is checked rather than the indexes
* reverted removal of string in local files becasue these are done via crowdin, not updated manually
* removed 'Select("issue.*")' from getBlockedByDependencies and getBlockingDependencies based on comments in PR review
* changed getBlockedByDependencies and getBlockingDependencies to use a more xorm-like query, also updated the sidebar as a result
* simplified the getBlockingDependencies and getBlockedByDependencies methods; changed the sidebar to show the dependencies in a different format where you can see the name of the repository
* made some changes to the issue view in the dependencies (issue name on top, repo full name on separate line). Change view of issue in the dependency search results (also showing the full repo name on separate line)
* replace call to FindUserAccessibleRepoIDs with SearchRepositoryByName. The former was hardcoded to use isPrivate = false on the repo search, but this code needed it to be true. The SearchRepositoryByName method is used more in the code including on the user's dashboard
* some more tweaks to the layout of the issues when showing dependencies and in the search box when you add new dependencies
* added Name to the RepositoryMeta struct
* updated swagger doc
* fixed total count for link header on SearchIssues
* fixed indentation
* fixed aligment of remove icon on dependencies in issue sidebar
* removed unnecessary nil check (unnecessary because issue.loadRepo is called prior to this block)
* reverting .css change, somehow missed or forgot that less is used
* updated less file and generated css; updated sidebar template with styles to line up delete and issue index
* added ordering to the blocked by/depends on queries
* fixed sorting in issue dependency search and the depends on/blocks views to show issues from the current repo first, then by created date descending; added a "all cross repository dependencies" setting to allow this feature to be turned off, if turned off, the issue dependency search will work the way it did before (restricted to the current repository)
* re-applied my swagger changes after merge
* fixed split string condition in issue search
* changed ALLOW_CROSS_REPOSITORY_DEPENDENCIES description to sound more global than just the issue dependency search; returning 400 in the cross repo issue search api method if not enabled; fixed bug where the issue count did not respect the state parameter
* when adding a dependency to an issue, added a check to make sure the issue and dependency are in the same repo if cross repo dependencies is not enabled
* updated sortIssuesSession call in PullRequests, another commit moved this method from pull.go to pull_list.go so I had to re-apply my change here
* fixed incorrect setting of user id parameter in search repos call
2019-10-31 13:06:10 +08:00
|
|
|
ID int64 `json:"id"`
|
|
|
|
Score float64 `json:"score"`
|
2019-02-19 22:39:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// SearchResult represents search results
|
|
|
|
type SearchResult struct {
|
2019-02-21 13:01:28 +08:00
|
|
|
Total int64
|
|
|
|
Hits []Match
|
2019-02-19 22:39:39 +08:00
|
|
|
}
|
|
|
|
|
2019-12-23 20:31:16 +08:00
|
|
|
// Indexer defines an interface to indexer issues contents
|
2019-02-19 22:39:39 +08:00
|
|
|
type Indexer interface {
|
|
|
|
Init() (bool, error)
|
2022-01-27 16:30:51 +08:00
|
|
|
Ping() bool
|
2019-02-19 22:39:39 +08:00
|
|
|
Index(issue []*IndexerData) error
|
|
|
|
Delete(ids ...int64) error
|
2022-01-27 16:30:51 +08:00
|
|
|
Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error)
|
2020-01-07 19:23:09 +08:00
|
|
|
Close()
|
2019-02-19 22:39:39 +08:00
|
|
|
}
|
2019-02-21 08:54:05 +08:00
|
|
|
|
2019-10-15 21:39:51 +08:00
|
|
|
type indexerHolder struct {
|
2020-01-07 19:23:09 +08:00
|
|
|
indexer Indexer
|
|
|
|
mutex sync.RWMutex
|
|
|
|
cond *sync.Cond
|
|
|
|
cancelled bool
|
2019-10-15 21:39:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func newIndexerHolder() *indexerHolder {
|
|
|
|
h := &indexerHolder{}
|
|
|
|
h.cond = sync.NewCond(h.mutex.RLocker())
|
|
|
|
return h
|
|
|
|
}
|
|
|
|
|
2020-01-07 19:23:09 +08:00
|
|
|
func (h *indexerHolder) cancel() {
|
|
|
|
h.mutex.Lock()
|
|
|
|
defer h.mutex.Unlock()
|
|
|
|
h.cancelled = true
|
|
|
|
h.cond.Broadcast()
|
|
|
|
}
|
|
|
|
|
2019-10-15 21:39:51 +08:00
|
|
|
func (h *indexerHolder) set(indexer Indexer) {
|
|
|
|
h.mutex.Lock()
|
|
|
|
defer h.mutex.Unlock()
|
|
|
|
h.indexer = indexer
|
|
|
|
h.cond.Broadcast()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (h *indexerHolder) get() Indexer {
|
|
|
|
h.mutex.RLock()
|
|
|
|
defer h.mutex.RUnlock()
|
2020-01-07 19:23:09 +08:00
|
|
|
if h.indexer == nil && !h.cancelled {
|
2019-10-15 21:39:51 +08:00
|
|
|
h.cond.Wait()
|
|
|
|
}
|
|
|
|
return h.indexer
|
|
|
|
}
|
|
|
|
|
2019-02-21 08:54:05 +08:00
|
|
|
var (
|
2019-04-08 17:05:15 +08:00
|
|
|
// issueIndexerQueue queue of issue ids to be updated
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
issueIndexerQueue *queue.WorkerPoolQueue[*IndexerData]
|
2019-10-15 21:39:51 +08:00
|
|
|
holder = newIndexerHolder()
|
2019-02-21 08:54:05 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
// InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until
|
|
|
|
// all issue index done.
|
2019-10-15 21:39:51 +08:00
|
|
|
func InitIssueIndexer(syncReindex bool) {
|
2022-04-01 01:01:43 +08:00
|
|
|
ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: IssueIndexer", process.SystemProcessType, false)
|
|
|
|
|
2022-04-27 07:22:26 +08:00
|
|
|
waitChannel := make(chan time.Duration, 1)
|
2020-01-07 19:23:09 +08:00
|
|
|
|
|
|
|
// Create the Queue
|
|
|
|
switch setting.Indexer.IssueType {
|
2023-03-29 10:23:23 +08:00
|
|
|
case "bleve", "elasticsearch", "meilisearch":
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
handler := func(items ...*IndexerData) (unhandled []*IndexerData) {
|
2020-01-07 19:23:09 +08:00
|
|
|
indexer := holder.get()
|
|
|
|
if indexer == nil {
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
log.Error("Issue indexer handler: unable to get indexer.")
|
|
|
|
return items
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
toIndex := make([]*IndexerData, 0, len(items))
|
|
|
|
for _, indexerData := range items {
|
2020-01-07 19:23:09 +08:00
|
|
|
log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete)
|
|
|
|
if indexerData.IsDelete {
|
2022-01-27 16:30:51 +08:00
|
|
|
if err := indexer.Delete(indexerData.IDs...); err != nil {
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
log.Error("Issue indexer handler: failed to from index: %v Error: %v", indexerData.IDs, err)
|
|
|
|
if !indexer.Ping() {
|
|
|
|
log.Error("Issue indexer handler: indexer is unavailable when deleting")
|
|
|
|
unhandled = append(unhandled, indexerData)
|
2022-01-27 16:30:51 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
continue
|
|
|
|
}
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
toIndex = append(toIndex, indexerData)
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
if err := indexer.Index(toIndex); err != nil {
|
|
|
|
log.Error("Error whilst indexing: %v Error: %v", toIndex, err)
|
|
|
|
if !indexer.Ping() {
|
|
|
|
log.Error("Issue indexer handler: indexer is unavailable when indexing")
|
|
|
|
unhandled = append(unhandled, toIndex...)
|
2022-01-27 16:30:51 +08:00
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
return unhandled
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
|
|
|
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
issueIndexerQueue = queue.CreateSimpleQueue("issue_indexer", handler)
|
2020-01-07 19:23:09 +08:00
|
|
|
|
|
|
|
if issueIndexerQueue == nil {
|
|
|
|
log.Fatal("Unable to create issue indexer queue")
|
|
|
|
}
|
|
|
|
default:
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
issueIndexerQueue = queue.CreateSimpleQueue[*IndexerData]("issue_indexer", nil)
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create the Indexer
|
2019-10-15 21:39:51 +08:00
|
|
|
go func() {
|
2022-04-01 01:01:43 +08:00
|
|
|
pprof.SetGoroutineLabels(ctx)
|
2019-10-15 21:39:51 +08:00
|
|
|
start := time.Now()
|
2020-01-07 19:23:09 +08:00
|
|
|
log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType)
|
2019-10-15 21:39:51 +08:00
|
|
|
var populate bool
|
|
|
|
switch setting.Indexer.IssueType {
|
|
|
|
case "bleve":
|
2020-02-29 06:00:09 +08:00
|
|
|
defer func() {
|
|
|
|
if err := recover(); err != nil {
|
|
|
|
log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2))
|
|
|
|
log.Error("The indexer files are likely corrupted and may need to be deleted")
|
2020-04-23 04:16:58 +08:00
|
|
|
log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath)
|
2020-02-29 06:00:09 +08:00
|
|
|
holder.cancel()
|
|
|
|
log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err)
|
|
|
|
}
|
|
|
|
}()
|
2020-02-12 07:21:20 +08:00
|
|
|
issueIndexer := NewBleveIndexer(setting.Indexer.IssuePath)
|
|
|
|
exist, err := issueIndexer.Init()
|
|
|
|
if err != nil {
|
|
|
|
holder.cancel()
|
2020-04-23 04:16:58 +08:00
|
|
|
log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err)
|
2020-02-12 07:21:20 +08:00
|
|
|
}
|
|
|
|
populate = !exist
|
|
|
|
holder.set(issueIndexer)
|
2021-05-15 22:22:26 +08:00
|
|
|
graceful.GetManager().RunAtTerminate(func() {
|
2020-02-12 07:21:20 +08:00
|
|
|
log.Debug("Closing issue indexer")
|
|
|
|
issueIndexer := holder.get()
|
|
|
|
if issueIndexer != nil {
|
|
|
|
issueIndexer.Close()
|
2020-01-07 19:23:09 +08:00
|
|
|
}
|
2022-04-01 01:01:43 +08:00
|
|
|
finished()
|
2020-02-12 07:21:20 +08:00
|
|
|
log.Info("PID: %d Issue Indexer closed", os.Getpid())
|
2020-01-07 19:23:09 +08:00
|
|
|
})
|
2020-02-12 07:21:20 +08:00
|
|
|
log.Debug("Created Bleve Indexer")
|
2020-02-13 14:06:17 +08:00
|
|
|
case "elasticsearch":
|
2021-05-15 22:22:26 +08:00
|
|
|
graceful.GetManager().RunWithShutdownFns(func(_, atTerminate func(func())) {
|
2022-04-01 01:01:43 +08:00
|
|
|
pprof.SetGoroutineLabels(ctx)
|
2020-08-19 00:08:51 +08:00
|
|
|
issueIndexer, err := NewElasticSearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName)
|
2020-02-13 14:06:17 +08:00
|
|
|
if err != nil {
|
2020-04-23 04:16:58 +08:00
|
|
|
log.Fatal("Unable to initialize Elastic Search Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err)
|
2020-02-13 14:06:17 +08:00
|
|
|
}
|
|
|
|
exist, err := issueIndexer.Init()
|
|
|
|
if err != nil {
|
2020-04-23 04:16:58 +08:00
|
|
|
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
|
2020-02-13 14:06:17 +08:00
|
|
|
}
|
|
|
|
populate = !exist
|
|
|
|
holder.set(issueIndexer)
|
2022-04-01 01:01:43 +08:00
|
|
|
atTerminate(finished)
|
2020-02-13 14:06:17 +08:00
|
|
|
})
|
2019-10-15 21:39:51 +08:00
|
|
|
case "db":
|
|
|
|
issueIndexer := &DBIndexer{}
|
|
|
|
holder.set(issueIndexer)
|
2022-04-01 01:01:43 +08:00
|
|
|
graceful.GetManager().RunAtTerminate(finished)
|
2023-03-29 10:23:23 +08:00
|
|
|
case "meilisearch":
|
|
|
|
graceful.GetManager().RunWithShutdownFns(func(_, atTerminate func(func())) {
|
|
|
|
pprof.SetGoroutineLabels(ctx)
|
|
|
|
issueIndexer, err := NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName)
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal("Unable to initialize Meilisearch Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err)
|
|
|
|
}
|
|
|
|
exist, err := issueIndexer.Init()
|
|
|
|
if err != nil {
|
|
|
|
log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err)
|
|
|
|
}
|
|
|
|
populate = !exist
|
|
|
|
holder.set(issueIndexer)
|
|
|
|
atTerminate(finished)
|
|
|
|
})
|
2019-10-15 21:39:51 +08:00
|
|
|
default:
|
2020-01-07 19:23:09 +08:00
|
|
|
holder.cancel()
|
2019-10-15 21:39:51 +08:00
|
|
|
log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType)
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
|
|
|
|
2020-01-07 19:23:09 +08:00
|
|
|
// Start processing the queue
|
|
|
|
go graceful.GetManager().RunWithShutdownFns(issueIndexerQueue.Run)
|
2019-02-21 08:54:05 +08:00
|
|
|
|
2020-01-07 19:23:09 +08:00
|
|
|
// Populate the index
|
2019-10-15 21:39:51 +08:00
|
|
|
if populate {
|
|
|
|
if syncReindex {
|
2020-01-07 19:23:09 +08:00
|
|
|
graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
|
2019-10-15 21:39:51 +08:00
|
|
|
} else {
|
2020-01-07 19:23:09 +08:00
|
|
|
go graceful.GetManager().RunWithShutdownContext(populateIssueIndexer)
|
2019-10-15 21:39:51 +08:00
|
|
|
}
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
2019-10-15 21:39:51 +08:00
|
|
|
waitChannel <- time.Since(start)
|
2020-01-07 19:23:09 +08:00
|
|
|
close(waitChannel)
|
2019-10-15 21:39:51 +08:00
|
|
|
}()
|
2020-01-07 19:23:09 +08:00
|
|
|
|
2019-10-15 21:39:51 +08:00
|
|
|
if syncReindex {
|
2020-01-07 19:23:09 +08:00
|
|
|
select {
|
|
|
|
case <-waitChannel:
|
|
|
|
case <-graceful.GetManager().IsShutdown():
|
|
|
|
}
|
2019-10-15 21:39:51 +08:00
|
|
|
} else if setting.Indexer.StartupTimeout > 0 {
|
|
|
|
go func() {
|
2022-04-01 01:01:43 +08:00
|
|
|
pprof.SetGoroutineLabels(ctx)
|
2019-10-15 21:39:51 +08:00
|
|
|
timeout := setting.Indexer.StartupTimeout
|
2019-12-15 17:51:28 +08:00
|
|
|
if graceful.GetManager().IsChild() && setting.GracefulHammerTime > 0 {
|
2019-10-15 21:39:51 +08:00
|
|
|
timeout += setting.GracefulHammerTime
|
|
|
|
}
|
|
|
|
select {
|
|
|
|
case duration := <-waitChannel:
|
|
|
|
log.Info("Issue Indexer Initialization took %v", duration)
|
2020-01-07 19:23:09 +08:00
|
|
|
case <-graceful.GetManager().IsShutdown():
|
|
|
|
log.Warn("Shutdown occurred before issue index initialisation was complete")
|
2019-10-15 21:39:51 +08:00
|
|
|
case <-time.After(timeout):
|
Rewrite queue (#24505)
# ⚠️ Breaking
Many deprecated queue config options are removed (actually, they should
have been removed in 1.18/1.19).
If you see the fatal message when starting Gitea: "Please update your
app.ini to remove deprecated config options", please follow the error
messages to remove these options from your app.ini.
Example:
```
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].ISSUE_INDEXER_QUEUE_TYPE`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [E] Removed queue option: `[indexer].UPDATE_BUFFER_LEN`. Use new options in `[queue.issue_indexer]`
2023/05/06 19:39:22 [F] Please update your app.ini to remove deprecated config options
```
Many options in `[queue]` are are dropped, including:
`WRAP_IF_NECESSARY`, `MAX_ATTEMPTS`, `TIMEOUT`, `WORKERS`,
`BLOCK_TIMEOUT`, `BOOST_TIMEOUT`, `BOOST_WORKERS`, they can be removed
from app.ini.
# The problem
The old queue package has some legacy problems:
* complexity: I doubt few people could tell how it works.
* maintainability: Too many channels and mutex/cond are mixed together,
too many different structs/interfaces depends each other.
* stability: due to the complexity & maintainability, sometimes there
are strange bugs and difficult to debug, and some code doesn't have test
(indeed some code is difficult to test because a lot of things are mixed
together).
* general applicability: although it is called "queue", its behavior is
not a well-known queue.
* scalability: it doesn't seem easy to make it work with a cluster
without breaking its behaviors.
It came from some very old code to "avoid breaking", however, its
technical debt is too heavy now. It's a good time to introduce a better
"queue" package.
# The new queue package
It keeps using old config and concept as much as possible.
* It only contains two major kinds of concepts:
* The "base queue": channel, levelqueue, redis
* They have the same abstraction, the same interface, and they are
tested by the same testing code.
* The "WokerPoolQueue", it uses the "base queue" to provide "worker
pool" function, calls the "handler" to process the data in the base
queue.
* The new code doesn't do "PushBack"
* Think about a queue with many workers, the "PushBack" can't guarantee
the order for re-queued unhandled items, so in new code it just does
"normal push"
* The new code doesn't do "pause/resume"
* The "pause/resume" was designed to handle some handler's failure: eg:
document indexer (elasticsearch) is down
* If a queue is paused for long time, either the producers blocks or the
new items are dropped.
* The new code doesn't do such "pause/resume" trick, it's not a common
queue's behavior and it doesn't help much.
* If there are unhandled items, the "push" function just blocks for a
few seconds and then re-queue them and retry.
* The new code doesn't do "worker booster"
* Gitea's queue's handlers are light functions, the cost is only the
go-routine, so it doesn't make sense to "boost" them.
* The new code only use "max worker number" to limit the concurrent
workers.
* The new "Push" never blocks forever
* Instead of creating more and more blocking goroutines, return an error
is more friendly to the server and to the end user.
There are more details in code comments: eg: the "Flush" problem, the
strange "code.index" hanging problem, the "immediate" queue problem.
Almost ready for review.
TODO:
* [x] add some necessary comments during review
* [x] add some more tests if necessary
* [x] update documents and config options
* [x] test max worker / active worker
* [x] re-run the CI tasks to see whether any test is flaky
* [x] improve the `handleOldLengthConfiguration` to provide more
friendly messages
* [x] fine tune default config values (eg: length?)
## Code coverage:
![image](https://user-images.githubusercontent.com/2114189/236620635-55576955-f95d-4810-b12f-879026a3afdf.png)
2023-05-08 19:49:59 +08:00
|
|
|
issueIndexerQueue.ShutdownWait(5 * time.Second)
|
2019-10-15 21:39:51 +08:00
|
|
|
log.Fatal("Issue Indexer Initialization timed-out after: %v", timeout)
|
|
|
|
}
|
|
|
|
}()
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// populateIssueIndexer populate the issue indexer with issue data
|
2020-01-07 19:23:09 +08:00
|
|
|
func populateIssueIndexer(ctx context.Context) {
|
2022-04-01 01:01:43 +08:00
|
|
|
ctx, _, finished := process.GetManager().AddTypedContext(ctx, "Service: PopulateIssueIndexer", process.SystemProcessType, true)
|
|
|
|
defer finished()
|
2019-02-21 08:54:05 +08:00
|
|
|
for page := 1; ; page++ {
|
2020-01-07 19:23:09 +08:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Warn("Issue Indexer population shutdown before completion")
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
2022-11-19 16:12:33 +08:00
|
|
|
repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{
|
2022-06-06 16:01:49 +08:00
|
|
|
ListOptions: db.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize},
|
2021-11-24 17:49:20 +08:00
|
|
|
OrderBy: db.SearchOrderByID,
|
2019-02-21 08:54:05 +08:00
|
|
|
Private: true,
|
|
|
|
Collaborate: util.OptionalBoolFalse,
|
|
|
|
})
|
|
|
|
if err != nil {
|
2019-04-02 15:48:31 +08:00
|
|
|
log.Error("SearchRepositoryByName: %v", err)
|
2019-02-21 08:54:05 +08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if len(repos) == 0 {
|
2020-01-07 19:23:09 +08:00
|
|
|
log.Debug("Issue Indexer population complete")
|
2019-02-21 08:54:05 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, repo := range repos {
|
2020-01-07 19:23:09 +08:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
log.Info("Issue Indexer population shutdown before completion")
|
|
|
|
return
|
|
|
|
default:
|
|
|
|
}
|
2022-11-19 16:12:33 +08:00
|
|
|
UpdateRepoIndexer(ctx, repo)
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-13 05:46:43 +08:00
|
|
|
// UpdateRepoIndexer add/update all issues of the repositories
|
2022-11-19 16:12:33 +08:00
|
|
|
func UpdateRepoIndexer(ctx context.Context, repo *repo_model.Repository) {
|
|
|
|
is, err := issues_model.Issues(ctx, &issues_model.IssuesOptions{
|
2022-04-25 22:06:24 +08:00
|
|
|
RepoID: repo.ID,
|
2019-12-13 05:46:43 +08:00
|
|
|
IsClosed: util.OptionalBoolNone,
|
|
|
|
IsPull: util.OptionalBoolNone,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
log.Error("Issues: %v", err)
|
|
|
|
return
|
|
|
|
}
|
2022-11-19 16:12:33 +08:00
|
|
|
if err = issues_model.IssueList(is).LoadDiscussComments(ctx); err != nil {
|
|
|
|
log.Error("LoadDiscussComments: %v", err)
|
2019-12-13 05:46:43 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
for _, issue := range is {
|
|
|
|
UpdateIssueIndexer(issue)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-21 08:54:05 +08:00
|
|
|
// UpdateIssueIndexer add/update an issue to the issue indexer
|
2022-06-13 17:37:59 +08:00
|
|
|
func UpdateIssueIndexer(issue *issues_model.Issue) {
|
2019-02-21 08:54:05 +08:00
|
|
|
var comments []string
|
|
|
|
for _, comment := range issue.Comments {
|
2022-06-13 17:37:59 +08:00
|
|
|
if comment.Type == issues_model.CommentTypeComment {
|
2019-02-21 08:54:05 +08:00
|
|
|
comments = append(comments, comment.Content)
|
|
|
|
}
|
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
indexerData := &IndexerData{
|
2019-02-21 08:54:05 +08:00
|
|
|
ID: issue.ID,
|
|
|
|
RepoID: issue.RepoID,
|
|
|
|
Title: issue.Title,
|
|
|
|
Content: issue.Content,
|
|
|
|
Comments: comments,
|
2019-10-15 21:39:51 +08:00
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
log.Debug("Adding to channel: %v", indexerData)
|
|
|
|
if err := issueIndexerQueue.Push(indexerData); err != nil {
|
|
|
|
log.Error("Unable to push to issue indexer: %v: Error: %v", indexerData, err)
|
|
|
|
}
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteRepoIssueIndexer deletes repo's all issues indexes
|
2022-11-19 16:12:33 +08:00
|
|
|
func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) {
|
2019-02-21 08:54:05 +08:00
|
|
|
var ids []int64
|
2022-11-19 16:12:33 +08:00
|
|
|
ids, err := issues_model.GetIssueIDsByRepoID(ctx, repo.ID)
|
2019-02-21 08:54:05 +08:00
|
|
|
if err != nil {
|
2022-11-19 16:12:33 +08:00
|
|
|
log.Error("GetIssueIDsByRepoID failed: %v", err)
|
2019-02-21 08:54:05 +08:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2019-06-13 03:41:28 +08:00
|
|
|
if len(ids) == 0 {
|
2019-02-21 08:54:05 +08:00
|
|
|
return
|
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
indexerData := &IndexerData{
|
2019-02-21 08:54:05 +08:00
|
|
|
IDs: ids,
|
|
|
|
IsDelete: true,
|
2019-10-15 21:39:51 +08:00
|
|
|
}
|
2020-01-07 19:23:09 +08:00
|
|
|
if err := issueIndexerQueue.Push(indexerData); err != nil {
|
|
|
|
log.Error("Unable to push to issue indexer: %v: Error: %v", indexerData, err)
|
|
|
|
}
|
2019-02-21 08:54:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// SearchIssuesByKeyword search issue ids by keywords and repo id
|
2020-02-13 14:06:17 +08:00
|
|
|
// WARNNING: You have to ensure user have permission to visit repoIDs' issues
|
2022-01-27 16:30:51 +08:00
|
|
|
func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) {
|
2019-02-21 08:54:05 +08:00
|
|
|
var issueIDs []int64
|
2020-01-07 19:23:09 +08:00
|
|
|
indexer := holder.get()
|
|
|
|
|
|
|
|
if indexer == nil {
|
|
|
|
log.Error("SearchIssuesByKeyword(): unable to get indexer!")
|
|
|
|
return nil, fmt.Errorf("unable to get issue indexer")
|
|
|
|
}
|
2022-01-27 16:30:51 +08:00
|
|
|
res, err := indexer.Search(ctx, keyword, repoIDs, 50, 0)
|
2019-02-21 08:54:05 +08:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for _, r := range res.Hits {
|
|
|
|
issueIDs = append(issueIDs, r.ID)
|
|
|
|
}
|
|
|
|
return issueIDs, nil
|
|
|
|
}
|
2022-01-27 16:30:51 +08:00
|
|
|
|
|
|
|
// IsAvailable checks if issue indexer is available
|
|
|
|
func IsAvailable() bool {
|
|
|
|
indexer := holder.get()
|
|
|
|
if indexer == nil {
|
|
|
|
log.Error("IsAvailable(): unable to get indexer!")
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
return indexer.Ping()
|
|
|
|
}
|